[{"key": "0ae5e35dc2844afc251d082d8d5ef4be8edce58c", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.45038004869326126, "res": {"No": 0.5495966463917007, "Yes": 0.45038004869326126}, "ground_truth": 0}, {"key": "0ae5e35dc2844afc251d082d8d5ef4be8edce58c", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.49732749312586405, "res": {"No": 0.5026449816331517, "Yes": 0.49732749312586405}, "ground_truth": 0}, {"key": "0ae5e35dc2844afc251d082d8d5ef4be8edce58c", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3847606986084898, "res": {"No": 0.6152118493525539, "Yes": 0.3847606986084898}, "ground_truth": 1}, {"key": "0ae5e35dc2844afc251d082d8d5ef4be8edce58c", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.40520541618560096, "res": {"No": 0.5947591174678811, "Yes": 0.40520541618560096}, "ground_truth": 0}, {"key": "0ae5e35dc2844afc251d082d8d5ef4be8edce58c", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.30337940350047654, "res": {"No": 0.6965795070610364, "Yes": 0.30337940350047654}, "ground_truth": 0}, {"key": "ffb817ce85d7c19720ebbf0b43b01d0da61e9c06", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.34683767634390056, "res": {"No": 0.6531399331090036, "Yes": 0.34683767634390056}, "ground_truth": 0}, {"key": "ffb817ce85d7c19720ebbf0b43b01d0da61e9c06", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.37905356554294695, "res": {"No": 0.6209210740831815, "Yes": 0.37905356554294695}, "ground_truth": 0}, {"key": "ffb817ce85d7c19720ebbf0b43b01d0da61e9c06", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.41587497697861464, "res": {"No": 0.5841010151292257, "Yes": 0.41587497697861464}, "ground_truth": 1}, {"key": "ffb817ce85d7c19720ebbf0b43b01d0da61e9c06", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.42490298784840164, "res": {"No": 0.5750713838868937, "Yes": 0.42490298784840164}, "ground_truth": 0}, {"key": "ffb817ce85d7c19720ebbf0b43b01d0da61e9c06", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5119863778798425, "res": {"Yes": 0.5119863778798425, "No": 0.4879876406434596}, "ground_truth": 0}, {"key": "9bfd6a5beb4e8dec59ba6d805dc9349a401da51d", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.44171610484328677, "res": {"No": 0.5582590204225658, "Yes": 0.44171610484328677}, "ground_truth": 0}, {"key": "9bfd6a5beb4e8dec59ba6d805dc9349a401da51d", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4517909798796561, "res": {"No": 0.5481737939875869, "Yes": 0.4517909798796561}, "ground_truth": 0}, {"key": "9bfd6a5beb4e8dec59ba6d805dc9349a401da51d", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4992200242755215, "res": {"No": 0.5007506281593258, "Yes": 0.4992200242755215}, "ground_truth": 1}, {"key": "9bfd6a5beb4e8dec59ba6d805dc9349a401da51d", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.39998205315228574, "res": {"No": 0.5999896943481008, "Yes": 0.39998205315228574}, "ground_truth": 0}, {"key": "9bfd6a5beb4e8dec59ba6d805dc9349a401da51d", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4522569894451296, "res": {"No": 0.5477186370103919, "Yes": 0.4522569894451296}, "ground_truth": 0}, {"key": "dc7f4527bf654918c7a4bb19179949ecd5982c0f", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4004155284301513, "res": {"No": 0.5995697046284925, "Yes": 0.4004155284301513}, "ground_truth": 0}, {"key": "dc7f4527bf654918c7a4bb19179949ecd5982c0f", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.31345207544149495, "res": {"No": 0.6865318706606999, "Yes": 0.31345207544149495}, "ground_truth": 0}, {"key": "dc7f4527bf654918c7a4bb19179949ecd5982c0f", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.40706386068012673, "res": {"No": 0.5929119341706871, "Yes": 0.40706386068012673}, "ground_truth": 1}, {"key": "dc7f4527bf654918c7a4bb19179949ecd5982c0f", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.48645678970278644, "res": {"No": 0.5135185939731829, "Yes": 0.48645678970278644}, "ground_truth": 0}, {"key": "dc7f4527bf654918c7a4bb19179949ecd5982c0f", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.39422187250346274, "res": {"No": 0.6057641863983098, "Yes": 0.39422187250346274}, "ground_truth": 0}, {"key": "c60e77736087bc85b372a28b4724aae6dcf0b052", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.31862605665333343, "res": {"No": 0.6813315048596789, "Yes": 0.31862605665333343}, "ground_truth": 0}, {"key": "c60e77736087bc85b372a28b4724aae6dcf0b052", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.45737723457631885, "res": {"No": 0.5425908186113945, "Yes": 0.45737723457631885}, "ground_truth": 0}, {"key": "c60e77736087bc85b372a28b4724aae6dcf0b052", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.26984439525143916, "res": {"No": 0.730125435608522, "Yes": 0.26984439525143916}, "ground_truth": 1}, {"key": "c60e77736087bc85b372a28b4724aae6dcf0b052", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3287659761555724, "res": {"No": 0.6712021101414362, "Yes": 0.3287659761555724}, "ground_truth": 0}, {"key": "c60e77736087bc85b372a28b4724aae6dcf0b052", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.430451918037033, "res": {"No": 0.569515915523433, "Yes": 0.430451918037033}, "ground_truth": 0}, {"key": "e0953e2de7cf286ac1d87a41c62e37f21b7c7564", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.5031403459537145, "res": {"Yes": 0.5031403459537145, "No": 0.49683395662709956}, "ground_truth": 0}, {"key": "e0953e2de7cf286ac1d87a41c62e37f21b7c7564", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.40904275211643765, "res": {"No": 0.5909232704727673, "Yes": 0.40904275211643765}, "ground_truth": 0}, {"key": "e0953e2de7cf286ac1d87a41c62e37f21b7c7564", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5331872517016897, "res": {"Yes": 0.5331872517016897, "No": 0.46677641719207646}, "ground_truth": 1}, {"key": "e0953e2de7cf286ac1d87a41c62e37f21b7c7564", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4673888792267866, "res": {"No": 0.5325809362449776, "Yes": 0.4673888792267866}, "ground_truth": 0}, {"key": "e0953e2de7cf286ac1d87a41c62e37f21b7c7564", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4550080353851423, "res": {"No": 0.5449702352666147, "Yes": 0.4550080353851423}, "ground_truth": 0}, {"key": "8b6f228661fd5f3170dc2c2e6b353e1d6c3859ec", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3959448678583277, "res": {"No": 0.6040283168163384, "Yes": 0.3959448678583277}, "ground_truth": 0}, {"key": "8b6f228661fd5f3170dc2c2e6b353e1d6c3859ec", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5572442083726765, "res": {"Yes": 0.5572442083726765, "No": 0.4427063745912318}, "ground_truth": 0}, {"key": "8b6f228661fd5f3170dc2c2e6b353e1d6c3859ec", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5558670400110839, "res": {"Yes": 0.5558670400110839, "No": 0.4441040931482627}, "ground_truth": 1}, {"key": "8b6f228661fd5f3170dc2c2e6b353e1d6c3859ec", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3559507145462283, "res": {"No": 0.6438356868986563, "Yes": 0.3559507145462283}, "ground_truth": 0}, {"key": "8b6f228661fd5f3170dc2c2e6b353e1d6c3859ec", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.44913523277776857, "res": {"No": 0.5508222924990119, "Yes": 0.44913523277776857}, "ground_truth": 0}, {"key": "2f482e021caf07e2f07baf111c36d1587758f188", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2927847446082804, "res": {"No": 0.7071987748892962, "Yes": 0.2927847446082804}, "ground_truth": 0}, {"key": "2f482e021caf07e2f07baf111c36d1587758f188", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.21604931222843105, "res": {"No": 0.7839371403273967, "Yes": 0.21604931222843105}, "ground_truth": 0}, {"key": "2f482e021caf07e2f07baf111c36d1587758f188", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3540320971653567, "res": {"No": 0.6459505797893655, "Yes": 0.3540320971653567}, "ground_truth": 1}, {"key": "2f482e021caf07e2f07baf111c36d1587758f188", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.28122416147358087, "res": {"No": 0.7187600315707317, "Yes": 0.28122416147358087}, "ground_truth": 0}, {"key": "2f482e021caf07e2f07baf111c36d1587758f188", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.349426758524582, "res": {"No": 0.6505504033623669, "Yes": 0.349426758524582}, "ground_truth": 0}, {"key": "dc8ce4207b91a323bf6e2fbab889efeed22aa428", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4278325749305631, "res": {"No": 0.5721506489434333, "Yes": 0.4278325749305631}, "ground_truth": 0}, {"key": "dc8ce4207b91a323bf6e2fbab889efeed22aa428", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3351341096343282, "res": {"No": 0.6648520993864785, "Yes": 0.3351341096343282}, "ground_truth": 0}, {"key": "dc8ce4207b91a323bf6e2fbab889efeed22aa428", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.42327763403490887, "res": {"No": 0.5767076173119621, "Yes": 0.42327763403490887}, "ground_truth": 1}, {"key": "dc8ce4207b91a323bf6e2fbab889efeed22aa428", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.38451014710290116, "res": {"No": 0.6154764842608521, "Yes": 0.38451014710290116}, "ground_truth": 0}, {"key": "dc8ce4207b91a323bf6e2fbab889efeed22aa428", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2214324061892689, "res": {"No": 0.7785476360881001, "Yes": 0.2214324061892689}, "ground_truth": 0}, {"key": "22a9005fe99c5dd536a4f41de4eb59d4f633ef9e", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.44832096650431424, "res": {"No": 0.5516630145566498, "Yes": 0.44832096650431424}, "ground_truth": 0}, {"key": "22a9005fe99c5dd536a4f41de4eb59d4f633ef9e", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3875330281056108, "res": {"No": 0.6124501362998088, "Yes": 0.3875330281056108}, "ground_truth": 0}, {"key": "22a9005fe99c5dd536a4f41de4eb59d4f633ef9e", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3938139615511131, "res": {"No": 0.6061689720581822, "Yes": 0.3938139615511131}, "ground_truth": 1}, {"key": "22a9005fe99c5dd536a4f41de4eb59d4f633ef9e", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.33002700319687844, "res": {"No": 0.6699539033058969, "Yes": 0.33002700319687844}, "ground_truth": 0}, {"key": "22a9005fe99c5dd536a4f41de4eb59d4f633ef9e", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.44697893980888603, "res": {"No": 0.5530054069617037, "Yes": 0.44697893980888603}, "ground_truth": 0}, {"key": "1b00b2d2edef396855ad392f08a6e74550af569b", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2535516573961252, "res": {"No": 0.7464303562940185, "Yes": 0.2535516573961252}, "ground_truth": 0}, {"key": "1b00b2d2edef396855ad392f08a6e74550af569b", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.36529011700827474, "res": {"No": 0.6346970249584392, "Yes": 0.36529011700827474}, "ground_truth": 0}, {"key": "1b00b2d2edef396855ad392f08a6e74550af569b", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5830060589357329, "res": {"Yes": 0.5830060589357329, "No": 0.4169752381006739}, "ground_truth": 1}, {"key": "1b00b2d2edef396855ad392f08a6e74550af569b", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3662002368368969, "res": {"No": 0.6337856330103437, "Yes": 0.3662002368368969}, "ground_truth": 0}, {"key": "1b00b2d2edef396855ad392f08a6e74550af569b", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.334054441762318, "res": {"No": 0.6659273489850823, "Yes": 0.334054441762318}, "ground_truth": 0}, {"key": "9b43472077873c45e7f278418885ad028eb993e3", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2974676795697192, "res": {"No": 0.702513992375749, "Yes": 0.2974676795697192}, "ground_truth": 0}, {"key": "9b43472077873c45e7f278418885ad028eb993e3", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3844842319923355, "res": {"No": 0.6154854949025395, "Yes": 0.3844842319923355}, "ground_truth": 0}, {"key": "9b43472077873c45e7f278418885ad028eb993e3", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.36771282952497875, "res": {"No": 0.6322572288287466, "Yes": 0.36771282952497875}, "ground_truth": 1}, {"key": "9b43472077873c45e7f278418885ad028eb993e3", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4643943293197832, "res": {"No": 0.5355789862346766, "Yes": 0.4643943293197832}, "ground_truth": 0}, {"key": "9b43472077873c45e7f278418885ad028eb993e3", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2573402087608016, "res": {"No": 0.7426216376290271, "Yes": 0.2573402087608016}, "ground_truth": 0}, {"key": "281347d0e99bc099d6991a6a60e30dc13d558216", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.44912164664246834, "res": {"No": 0.5508508258327705, "Yes": 0.44912164664246834}, "ground_truth": 0}, {"key": "281347d0e99bc099d6991a6a60e30dc13d558216", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4859556569927464, "res": {"No": 0.5140189739030583, "Yes": 0.4859556569927464}, "ground_truth": 0}, {"key": "281347d0e99bc099d6991a6a60e30dc13d558216", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.49827857540170056, "res": {"No": 0.5016906922930421, "Yes": 0.49827857540170056}, "ground_truth": 1}, {"key": "281347d0e99bc099d6991a6a60e30dc13d558216", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4778637303903034, "res": {"No": 0.5221108799284763, "Yes": 0.4778637303903034}, "ground_truth": 0}, {"key": "281347d0e99bc099d6991a6a60e30dc13d558216", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.44214396821333074, "res": {"No": 0.5578211431340638, "Yes": 0.44214396821333074}, "ground_truth": 0}, {"key": "703d248cce913a6036347aabd2ba011a7f660bf4", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.39579202305343597, "res": {"No": 0.6041856866894051, "Yes": 0.39579202305343597}, "ground_truth": 0}, {"key": "703d248cce913a6036347aabd2ba011a7f660bf4", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.40669307235924795, "res": {"No": 0.5932899767068847, "Yes": 0.40669307235924795}, "ground_truth": 0}, {"key": "703d248cce913a6036347aabd2ba011a7f660bf4", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6257011362583532, "res": {"Yes": 0.6257011362583532, "No": 0.3742772672324123}, "ground_truth": 1}, {"key": "703d248cce913a6036347aabd2ba011a7f660bf4", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4564382118613056, "res": {"No": 0.543528736920433, "Yes": 0.4564382118613056}, "ground_truth": 0}, {"key": "703d248cce913a6036347aabd2ba011a7f660bf4", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5480575167328948, "res": {"Yes": 0.5480575167328948, "No": 0.45191058930014644}, "ground_truth": 0}, {"key": "517b86691dda04f41123354c1b7a7bfe1290eadc", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.28205973619449454, "res": {"No": 0.7179083746392911, "Yes": 0.28205973619449454}, "ground_truth": 0}, {"key": "517b86691dda04f41123354c1b7a7bfe1290eadc", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3789047972671967, "res": {"No": 0.6210658028132912, "Yes": 0.3789047972671967}, "ground_truth": 0}, {"key": "517b86691dda04f41123354c1b7a7bfe1290eadc", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.21019344617953079, "res": {"No": 0.789791028025309, "Yes": 0.21019344617953079}, "ground_truth": 1}, {"key": "517b86691dda04f41123354c1b7a7bfe1290eadc", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.48942646647588334, "res": {"No": 0.5105427517631921, "Yes": 0.48942646647588334}, "ground_truth": 0}, {"key": "517b86691dda04f41123354c1b7a7bfe1290eadc", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.45096664503872624, "res": {"No": 0.549010341869868, "Yes": 0.45096664503872624}, "ground_truth": 0}, {"key": "7b361feeebfb078b80831d40d2ad8640c361e049", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.48059254868146073, "res": {"No": 0.5193848232256978, "Yes": 0.48059254868146073}, "ground_truth": 0}, {"key": "7b361feeebfb078b80831d40d2ad8640c361e049", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.32505908928298405, "res": {"No": 0.6749198944776164, "Yes": 0.32505908928298405}, "ground_truth": 0}, {"key": "7b361feeebfb078b80831d40d2ad8640c361e049", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5864347912114888, "res": {"Yes": 0.5864347912114888, "No": 0.4135396832324154}, "ground_truth": 1}, {"key": "7b361feeebfb078b80831d40d2ad8640c361e049", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.432378118180756, "res": {"No": 0.5675833319307322, "Yes": 0.432378118180756}, "ground_truth": 0}, {"key": "7b361feeebfb078b80831d40d2ad8640c361e049", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.24635586073428298, "res": {"No": 0.7536109541981931, "Yes": 0.24635586073428298}, "ground_truth": 0}, {"key": "7d8e196095e4561ee4f01c650101f5591fce4a8d", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.056544179971065665, "res": {"No": 0.9434252816964556, "Yes": 0.056544179971065665}, "ground_truth": 0}, {"key": "7d8e196095e4561ee4f01c650101f5591fce4a8d", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3495269535385199, "res": {"No": 0.6504503108726147, "Yes": 0.3495269535385199}, "ground_truth": 0}, {"key": "7d8e196095e4561ee4f01c650101f5591fce4a8d", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4350533290023303, "res": {"No": 0.5649101426733589, "Yes": 0.4350533290023303}, "ground_truth": 1}, {"key": "7d8e196095e4561ee4f01c650101f5591fce4a8d", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3363165666253722, "res": {"No": 0.6636454055071763, "Yes": 0.3363165666253722}, "ground_truth": 0}, {"key": "7d8e196095e4561ee4f01c650101f5591fce4a8d", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.43902437215967294, "res": {"No": 0.5609383906939767, "Yes": 0.43902437215967294}, "ground_truth": 0}, {"key": "ae03390949407f55c8d8f794896af3020d180b12", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.19564368654009345, "res": {"No": 0.8043321445498265, "Yes": 0.19564368654009345}, "ground_truth": 0}, {"key": "ae03390949407f55c8d8f794896af3020d180b12", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.33990935495172886, "res": {"No": 0.660062956417168, "Yes": 0.33990935495172886}, "ground_truth": 0}, {"key": "ae03390949407f55c8d8f794896af3020d180b12", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3136747681743026, "res": {"No": 0.6862966845390667, "Yes": 0.3136747681743026}, "ground_truth": 1}, {"key": "ae03390949407f55c8d8f794896af3020d180b12", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.23355235700643887, "res": {"No": 0.7664169930586398, "Yes": 0.23355235700643887}, "ground_truth": 0}, {"key": "ae03390949407f55c8d8f794896af3020d180b12", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.27465997524764224, "res": {"No": 0.7253170785593126, "Yes": 0.27465997524764224}, "ground_truth": 0}, {"key": "4160464d3fcba547cb1664e54ce51ad06f78b773", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.32561688112197745, "res": {"No": 0.6743676783299629, "Yes": 0.32561688112197745}, "ground_truth": 0}, {"key": "4160464d3fcba547cb1664e54ce51ad06f78b773", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.45853713417413333, "res": {"No": 0.541439037322882, "Yes": 0.45853713417413333}, "ground_truth": 0}, {"key": "4160464d3fcba547cb1664e54ce51ad06f78b773", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.44465465690452555, "res": {"No": 0.5553197527802182, "Yes": 0.44465465690452555}, "ground_truth": 1}, {"key": "4160464d3fcba547cb1664e54ce51ad06f78b773", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3638509038733055, "res": {"No": 0.6361274771220994, "Yes": 0.3638509038733055}, "ground_truth": 0}, {"key": "4160464d3fcba547cb1664e54ce51ad06f78b773", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.31999417067337277, "res": {"No": 0.6799909090129193, "Yes": 0.31999417067337277}, "ground_truth": 0}, {"key": "72b396634cc46f29b5ef146a61073ee7e7fa3cdc", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.25163712736469335, "res": {"No": 0.7483308544637349, "Yes": 0.25163712736469335}, "ground_truth": 0}, {"key": "72b396634cc46f29b5ef146a61073ee7e7fa3cdc", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.380071494704253, "res": {"No": 0.6198821745819085, "Yes": 0.380071494704253}, "ground_truth": 0}, {"key": "72b396634cc46f29b5ef146a61073ee7e7fa3cdc", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4539235668817849, "res": {"No": 0.5460387145400056, "Yes": 0.4539235668817849}, "ground_truth": 1}, {"key": "72b396634cc46f29b5ef146a61073ee7e7fa3cdc", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.40704637766276097, "res": {"No": 0.5929113175425963, "Yes": 0.40704637766276097}, "ground_truth": 0}, {"key": "72b396634cc46f29b5ef146a61073ee7e7fa3cdc", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2610083708373474, "res": {"No": 0.7389571599588508, "Yes": 0.2610083708373474}, "ground_truth": 0}, {"key": "b9b09b290cdf4d74cb64684a6cae8b6832645607", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2578332295907932, "res": {"No": 0.7421465860520704, "Yes": 0.2578332295907932}, "ground_truth": 0}, {"key": "b9b09b290cdf4d74cb64684a6cae8b6832645607", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.40960909537418344, "res": {"No": 0.5902737480157217, "Yes": 0.40960909537418344}, "ground_truth": 0}, {"key": "b9b09b290cdf4d74cb64684a6cae8b6832645607", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5441198031466195, "res": {"Yes": 0.5441198031466195, "No": 0.45585167340618754}, "ground_truth": 1}, {"key": "b9b09b290cdf4d74cb64684a6cae8b6832645607", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5477107827814521, "res": {"Yes": 0.5477107827814521, "No": 0.4522151123868447}, "ground_truth": 0}, {"key": "b9b09b290cdf4d74cb64684a6cae8b6832645607", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3871717058438961, "res": {"No": 0.6127912251073929, "Yes": 0.3871717058438961}, "ground_truth": 0}, {"key": "9e49d58fa5ab7f3e4356cb61337605ac007c49ca", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2980835388651771, "res": {"No": 0.7019010284567533, "Yes": 0.2980835388651771}, "ground_truth": 0}, {"key": "9e49d58fa5ab7f3e4356cb61337605ac007c49ca", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3594916244022308, "res": {"No": 0.6404941251337887, "Yes": 0.3594916244022308}, "ground_truth": 0}, {"key": "9e49d58fa5ab7f3e4356cb61337605ac007c49ca", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.38010418225834225, "res": {"No": 0.6198669380653148, "Yes": 0.38010418225834225}, "ground_truth": 1}, {"key": "9e49d58fa5ab7f3e4356cb61337605ac007c49ca", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.32747610120151804, "res": {"No": 0.672497658026959, "Yes": 0.32747610120151804}, "ground_truth": 0}, {"key": "9e49d58fa5ab7f3e4356cb61337605ac007c49ca", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3305925925255652, "res": {"No": 0.6693813977696446, "Yes": 0.3305925925255652}, "ground_truth": 0}, {"key": "47fe0e2bba8206014fb53a7d5247efd1e316e087", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.19714079698832956, "res": {"No": 0.8028434182517947, "Yes": 0.19714079698832956}, "ground_truth": 0}, {"key": "47fe0e2bba8206014fb53a7d5247efd1e316e087", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2540569054122631, "res": {"No": 0.745918950591807, "Yes": 0.2540569054122631}, "ground_truth": 0}, {"key": "47fe0e2bba8206014fb53a7d5247efd1e316e087", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.35960811855957153, "res": {"No": 0.6403669100161958, "Yes": 0.35960811855957153}, "ground_truth": 1}, {"key": "47fe0e2bba8206014fb53a7d5247efd1e316e087", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.32545375537741594, "res": {"No": 0.6745237450669536, "Yes": 0.32545375537741594}, "ground_truth": 0}, {"key": "47fe0e2bba8206014fb53a7d5247efd1e316e087", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4025148050769958, "res": {"No": 0.5974629532664916, "Yes": 0.4025148050769958}, "ground_truth": 0}, {"key": "f54f84affaebd1d86011ad31adbf9fac4754a8f2", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.33169015961173054, "res": {"No": 0.6682890563196393, "Yes": 0.33169015961173054}, "ground_truth": 0}, {"key": "f54f84affaebd1d86011ad31adbf9fac4754a8f2", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.38366828533226355, "res": {"No": 0.6163044811421822, "Yes": 0.38366828533226355}, "ground_truth": 0}, {"key": "f54f84affaebd1d86011ad31adbf9fac4754a8f2", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5796267367158975, "res": {"Yes": 0.5796267367158975, "No": 0.4203499921818395}, "ground_truth": 1}, {"key": "f54f84affaebd1d86011ad31adbf9fac4754a8f2", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.37474267071095974, "res": {"No": 0.6252400831287064, "Yes": 0.37474267071095974}, "ground_truth": 0}, {"key": "f54f84affaebd1d86011ad31adbf9fac4754a8f2", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.26266868337453714, "res": {"No": 0.7373135030259169, "Yes": 0.26266868337453714}, "ground_truth": 0}, {"key": "bc4992871e25e6584936af456ad4b819dcc52f27", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.40761382098575916, "res": {"No": 0.5923691643615805, "Yes": 0.40761382098575916}, "ground_truth": 0}, {"key": "bc4992871e25e6584936af456ad4b819dcc52f27", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.39504406236940354, "res": {"No": 0.6049309256190324, "Yes": 0.39504406236940354}, "ground_truth": 0}, {"key": "bc4992871e25e6584936af456ad4b819dcc52f27", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4727769340171754, "res": {"No": 0.5271934605430707, "Yes": 0.4727769340171754}, "ground_truth": 1}, {"key": "bc4992871e25e6584936af456ad4b819dcc52f27", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.42197461111103446, "res": {"No": 0.5779999308190452, "Yes": 0.42197461111103446}, "ground_truth": 0}, {"key": "bc4992871e25e6584936af456ad4b819dcc52f27", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.32407328516038814, "res": {"No": 0.6759125738007267, "Yes": 0.32407328516038814}, "ground_truth": 0}, {"key": "271416bf0f15c52a4417339de78dcecf378b3950", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3440492373567869, "res": {"No": 0.6559313532169638, "Yes": 0.3440492373567869}, "ground_truth": 0}, {"key": "271416bf0f15c52a4417339de78dcecf378b3950", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4960972514524478, "res": {"No": 0.5038740917887082, "Yes": 0.4960972514524478}, "ground_truth": 0}, {"key": "271416bf0f15c52a4417339de78dcecf378b3950", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.36509477557341136, "res": {"No": 0.634884186619294, "Yes": 0.36509477557341136}, "ground_truth": 1}, {"key": "271416bf0f15c52a4417339de78dcecf378b3950", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.41342270940275727, "res": {"No": 0.5865519139336083, "Yes": 0.41342270940275727}, "ground_truth": 0}, {"key": "271416bf0f15c52a4417339de78dcecf378b3950", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4290223086032368, "res": {"No": 0.5709448836454706, "Yes": 0.4290223086032368}, "ground_truth": 0}, {"key": "0add6b43e18072837f0bef41031d6f92f740d625", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3185261511879627, "res": {"No": 0.6814556820278044, "Yes": 0.3185261511879627}, "ground_truth": 0}, {"key": "0add6b43e18072837f0bef41031d6f92f740d625", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2055280208171281, "res": {"No": 0.7944578314468635, "Yes": 0.2055280208171281}, "ground_truth": 0}, {"key": "0add6b43e18072837f0bef41031d6f92f740d625", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.37812854690363773, "res": {"No": 0.6218514382675756, "Yes": 0.37812854690363773}, "ground_truth": 1}, {"key": "0add6b43e18072837f0bef41031d6f92f740d625", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4552140186324502, "res": {"No": 0.5447616605842958, "Yes": 0.4552140186324502}, "ground_truth": 0}, {"key": "0add6b43e18072837f0bef41031d6f92f740d625", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3629853150294193, "res": {"No": 0.6369946386547154, "Yes": 0.3629853150294193}, "ground_truth": 0}, {"key": "7060e3feb685bda7b5102e1cc8e9b35da37ca2a1", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2872604452471576, "res": {"No": 0.7127116488020386, "Yes": 0.2872604452471576}, "ground_truth": 0}, {"key": "7060e3feb685bda7b5102e1cc8e9b35da37ca2a1", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.23752400189575873, "res": {"No": 0.7624507215991212, "Yes": 0.23752400189575873}, "ground_truth": 0}, {"key": "7060e3feb685bda7b5102e1cc8e9b35da37ca2a1", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.47276123808352, "res": {"No": 0.527203108271675, "Yes": 0.47276123808352}, "ground_truth": 1}, {"key": "7060e3feb685bda7b5102e1cc8e9b35da37ca2a1", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.35104038680952926, "res": {"No": 0.6489326128171788, "Yes": 0.35104038680952926}, "ground_truth": 0}, {"key": "7060e3feb685bda7b5102e1cc8e9b35da37ca2a1", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3801479727824242, "res": {"No": 0.619816569724421, "Yes": 0.3801479727824242}, "ground_truth": 0}, {"key": "04fbd3088e8610dd4d5bea709e7cc5ee3e53710d", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3317839748537187, "res": {"No": 0.6681951148474027, "Yes": 0.3317839748537187}, "ground_truth": 0}, {"key": "04fbd3088e8610dd4d5bea709e7cc5ee3e53710d", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.11258317468213068, "res": {"No": 0.8873934599192249, "Yes": 0.11258317468213068}, "ground_truth": 0}, {"key": "04fbd3088e8610dd4d5bea709e7cc5ee3e53710d", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3715001169572545, "res": {"No": 0.628481114456976, "Yes": 0.3715001169572545}, "ground_truth": 1}, {"key": "04fbd3088e8610dd4d5bea709e7cc5ee3e53710d", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.45281347194819294, "res": {"No": 0.5471618137075764, "Yes": 0.45281347194819294}, "ground_truth": 0}, {"key": "04fbd3088e8610dd4d5bea709e7cc5ee3e53710d", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3695993713902537, "res": {"No": 0.6303679389477225, "Yes": 0.3695993713902537}, "ground_truth": 0}, {"key": "b198f0356c0f0cb09aba4d7d5541dd7c5ce91678", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4260121551090469, "res": {"No": 0.5739600410392499, "Yes": 0.4260121551090469}, "ground_truth": 0}, {"key": "b198f0356c0f0cb09aba4d7d5541dd7c5ce91678", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.539831497384268, "res": {"Yes": 0.539831497384268, "No": 0.4601431234304822}, "ground_truth": 0}, {"key": "b198f0356c0f0cb09aba4d7d5541dd7c5ce91678", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.455331733986693, "res": {"No": 0.5446448217395146, "Yes": 0.455331733986693}, "ground_truth": 1}, {"key": "b198f0356c0f0cb09aba4d7d5541dd7c5ce91678", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4535437509954027, "res": {"No": 0.5464305832603199, "Yes": 0.4535437509954027}, "ground_truth": 0}, {"key": "b198f0356c0f0cb09aba4d7d5541dd7c5ce91678", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4986543542977599, "res": {"No": 0.5013245917306314, "Yes": 0.4986543542977599}, "ground_truth": 0}, {"key": "0f60442453e038c6bbe3aa525bedf2a97f459e04", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.47473240100651054, "res": {"No": 0.5252353663486068, "Yes": 0.47473240100651054}, "ground_truth": 0}, {"key": "0f60442453e038c6bbe3aa525bedf2a97f459e04", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.39597587155525993, "res": {"No": 0.6039949753734823, "Yes": 0.39597587155525993}, "ground_truth": 0}, {"key": "0f60442453e038c6bbe3aa525bedf2a97f459e04", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4179825055852893, "res": {"No": 0.5819938781201043, "Yes": 0.4179825055852893}, "ground_truth": 1}, {"key": "0f60442453e038c6bbe3aa525bedf2a97f459e04", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5247534215288165, "res": {"Yes": 0.5247534215288165, "No": 0.4752177019734006}, "ground_truth": 0}, {"key": "0f60442453e038c6bbe3aa525bedf2a97f459e04", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5379274388190286, "res": {"Yes": 0.5379274388190286, "No": 0.46204319342898403}, "ground_truth": 0}, {"key": "79b423b90af6b3381ebfa34a7382ca472bdb5b35", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3531039815949251, "res": {"No": 0.64686392723033, "Yes": 0.3531039815949251}, "ground_truth": 0}, {"key": "79b423b90af6b3381ebfa34a7382ca472bdb5b35", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.342360672384994, "res": {"No": 0.6576093151279091, "Yes": 0.342360672384994}, "ground_truth": 0}, {"key": "79b423b90af6b3381ebfa34a7382ca472bdb5b35", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.43106094291480945, "res": {"No": 0.5688984339487626, "Yes": 0.43106094291480945}, "ground_truth": 1}, {"key": "79b423b90af6b3381ebfa34a7382ca472bdb5b35", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.41079948688143353, "res": {"No": 0.5891742138690225, "Yes": 0.41079948688143353}, "ground_truth": 0}, {"key": "79b423b90af6b3381ebfa34a7382ca472bdb5b35", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5147082210205227, "res": {"Yes": 0.5147082210205227, "No": 0.48524481606943687}, "ground_truth": 0}, {"key": "3e5a3a2dddee7ebebe57c12d0ace97a24cbabd72", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.31032482732905053, "res": {"No": 0.6896370253646082, "Yes": 0.31032482732905053}, "ground_truth": 0}, {"key": "3e5a3a2dddee7ebebe57c12d0ace97a24cbabd72", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3047520145933343, "res": {"No": 0.6952133862263954, "Yes": 0.3047520145933343}, "ground_truth": 0}, {"key": "3e5a3a2dddee7ebebe57c12d0ace97a24cbabd72", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.45701051938725273, "res": {"No": 0.5429453547129964, "Yes": 0.45701051938725273}, "ground_truth": 1}, {"key": "3e5a3a2dddee7ebebe57c12d0ace97a24cbabd72", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4402627373237177, "res": {"No": 0.5597037797143379, "Yes": 0.4402627373237177}, "ground_truth": 0}, {"key": "3e5a3a2dddee7ebebe57c12d0ace97a24cbabd72", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.25932738000870187, "res": {"No": 0.740652621871627, "Yes": 0.25932738000870187}, "ground_truth": 0}, {"key": "c623e4914c04811d354fa137e3bf7a00980ef7d7", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3179370410254871, "res": {"No": 0.6820465625059959, "Yes": 0.3179370410254871}, "ground_truth": 0}, {"key": "c623e4914c04811d354fa137e3bf7a00980ef7d7", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.44465839646591493, "res": {"No": 0.5553074803492926, "Yes": 0.44465839646591493}, "ground_truth": 0}, {"key": "c623e4914c04811d354fa137e3bf7a00980ef7d7", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.46989389268988396, "res": {"No": 0.530083307937748, "Yes": 0.46989389268988396}, "ground_truth": 1}, {"key": "c623e4914c04811d354fa137e3bf7a00980ef7d7", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4536444080571773, "res": {"No": 0.5463319341243309, "Yes": 0.4536444080571773}, "ground_truth": 0}, {"key": "c623e4914c04811d354fa137e3bf7a00980ef7d7", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.40326381637883907, "res": {"No": 0.5967064112502046, "Yes": 0.40326381637883907}, "ground_truth": 0}, {"key": "d5ef1fb6f2df20e46fb032b9e318bc8555283e78", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.010295681947967407, "res": {"No": 0.9896986333039165, "Yes": 0.010295681947967407}, "ground_truth": 0}, {"key": "d5ef1fb6f2df20e46fb032b9e318bc8555283e78", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4291936213738238, "res": {"No": 0.5707884319302166, "Yes": 0.4291936213738238}, "ground_truth": 0}, {"key": "d5ef1fb6f2df20e46fb032b9e318bc8555283e78", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4019837958525665, "res": {"No": 0.5979986395703877, "Yes": 0.4019837958525665}, "ground_truth": 1}, {"key": "d5ef1fb6f2df20e46fb032b9e318bc8555283e78", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.49000504108588877, "res": {"No": 0.5099708814958142, "Yes": 0.49000504108588877}, "ground_truth": 0}, {"key": "d5ef1fb6f2df20e46fb032b9e318bc8555283e78", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.40922510310495513, "res": {"No": 0.5907516321943618, "Yes": 0.40922510310495513}, "ground_truth": 0}, {"key": "e1a36c50adadc5db9ffe6fd2c5e275d522902193", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.534804938234625, "res": {"Yes": 0.534804938234625, "No": 0.4651626730762716}, "ground_truth": 0}, {"key": "e1a36c50adadc5db9ffe6fd2c5e275d522902193", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.48330215075314553, "res": {"No": 0.516663389841151, "Yes": 0.48330215075314553}, "ground_truth": 0}, {"key": "e1a36c50adadc5db9ffe6fd2c5e275d522902193", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.547405223521303, "res": {"Yes": 0.547405223521303, "No": 0.4525425170159055}, "ground_truth": 1}, {"key": "e1a36c50adadc5db9ffe6fd2c5e275d522902193", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.38433104555031233, "res": {"No": 0.6156370214610883, "Yes": 0.38433104555031233}, "ground_truth": 0}, {"key": "e1a36c50adadc5db9ffe6fd2c5e275d522902193", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5074930818407478, "res": {"Yes": 0.5074930818407478, "No": 0.4924733950658492}, "ground_truth": 0}, {"key": "bad69c9a3c7d84a4717705a3063dd7367243772b", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.44612687246720445, "res": {"No": 0.5538557805732472, "Yes": 0.44612687246720445}, "ground_truth": 0}, {"key": "bad69c9a3c7d84a4717705a3063dd7367243772b", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.476842543534447, "res": {"No": 0.5231380247915424, "Yes": 0.476842543534447}, "ground_truth": 0}, {"key": "bad69c9a3c7d84a4717705a3063dd7367243772b", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.47576839894316364, "res": {"No": 0.5242054244983286, "Yes": 0.47576839894316364}, "ground_truth": 1}, {"key": "bad69c9a3c7d84a4717705a3063dd7367243772b", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4557351042476305, "res": {"No": 0.5442369810793404, "Yes": 0.4557351042476305}, "ground_truth": 0}, {"key": "bad69c9a3c7d84a4717705a3063dd7367243772b", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5047341620965776, "res": {"Yes": 0.5047341620965776, "No": 0.49524747096452926}, "ground_truth": 0}, {"key": "4b40ad1f0e59b139e9a1466859ee08a048ca8d2c", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4612426816936959, "res": {"No": 0.5387322794916387, "Yes": 0.4612426816936959}, "ground_truth": 0}, {"key": "4b40ad1f0e59b139e9a1466859ee08a048ca8d2c", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2670238496604809, "res": {"No": 0.7329439841514651, "Yes": 0.2670238496604809}, "ground_truth": 0}, {"key": "4b40ad1f0e59b139e9a1466859ee08a048ca8d2c", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.25339944533210496, "res": {"No": 0.7465772981068902, "Yes": 0.25339944533210496}, "ground_truth": 1}, {"key": "4b40ad1f0e59b139e9a1466859ee08a048ca8d2c", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.463855870004555, "res": {"No": 0.5361153156032568, "Yes": 0.463855870004555}, "ground_truth": 0}, {"key": "4b40ad1f0e59b139e9a1466859ee08a048ca8d2c", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.41115751077804696, "res": {"No": 0.5888121538803638, "Yes": 0.41115751077804696}, "ground_truth": 0}, {"key": "c17a30b4c0f8f58e3afa6279a39470f65a179f6b", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.5696319889987626, "res": {"Yes": 0.5696319889987626, "No": 0.43034261865132445}, "ground_truth": 0}, {"key": "c17a30b4c0f8f58e3afa6279a39470f65a179f6b", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4279870246920302, "res": {"No": 0.5719818035979751, "Yes": 0.4279870246920302}, "ground_truth": 0}, {"key": "c17a30b4c0f8f58e3afa6279a39470f65a179f6b", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.49557580501558907, "res": {"No": 0.5043795091114334, "Yes": 0.49557580501558907}, "ground_truth": 1}, {"key": "c17a30b4c0f8f58e3afa6279a39470f65a179f6b", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4547636979965826, "res": {"No": 0.5452139241277054, "Yes": 0.4547636979965826}, "ground_truth": 0}, {"key": "c17a30b4c0f8f58e3afa6279a39470f65a179f6b", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.44238576767855975, "res": {"No": 0.5575729121911494, "Yes": 0.44238576767855975}, "ground_truth": 0}, {"key": "6f579aad2d1f601b17e01057cd4b5e4942627230", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.504266893741023, "res": {"Yes": 0.504266893741023, "No": 0.4957033578611683}, "ground_truth": 0}, {"key": "6f579aad2d1f601b17e01057cd4b5e4942627230", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4894222084841473, "res": {"No": 0.5105500015217402, "Yes": 0.4894222084841473}, "ground_truth": 0}, {"key": "6f579aad2d1f601b17e01057cd4b5e4942627230", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4692249255996071, "res": {"No": 0.5307490597234809, "Yes": 0.4692249255996071}, "ground_truth": 1}, {"key": "6f579aad2d1f601b17e01057cd4b5e4942627230", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4712583601527909, "res": {"No": 0.5287163295432991, "Yes": 0.4712583601527909}, "ground_truth": 0}, {"key": "6f579aad2d1f601b17e01057cd4b5e4942627230", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3833785104075543, "res": {"No": 0.6166064950088593, "Yes": 0.3833785104075543}, "ground_truth": 0}, {"key": "7551f8e13485bb1738c16eccad343a14a47b2903", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.24359043721816545, "res": {"No": 0.7563938294636111, "Yes": 0.24359043721816545}, "ground_truth": 0}, {"key": "7551f8e13485bb1738c16eccad343a14a47b2903", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4405529837096072, "res": {"No": 0.5594279137574845, "Yes": 0.4405529837096072}, "ground_truth": 0}, {"key": "7551f8e13485bb1738c16eccad343a14a47b2903", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.44292441245844427, "res": {"No": 0.557056281596825, "Yes": 0.44292441245844427}, "ground_truth": 1}, {"key": "7551f8e13485bb1738c16eccad343a14a47b2903", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.37259758586335906, "res": {"No": 0.6273787209765759, "Yes": 0.37259758586335906}, "ground_truth": 0}, {"key": "7551f8e13485bb1738c16eccad343a14a47b2903", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.41550750594509256, "res": {"No": 0.5844671508950233, "Yes": 0.41550750594509256}, "ground_truth": 0}, {"key": "257d8d38863175ec9401d0532963fb71a47f0734", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.24940572273226444, "res": {"No": 0.7505723876461821, "Yes": 0.24940572273226444}, "ground_truth": 0}, {"key": "257d8d38863175ec9401d0532963fb71a47f0734", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3145497587295788, "res": {"No": 0.6854272722944047, "Yes": 0.3145497587295788}, "ground_truth": 0}, {"key": "257d8d38863175ec9401d0532963fb71a47f0734", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4734543408711414, "res": {"No": 0.5265279091400573, "Yes": 0.4734543408711414}, "ground_truth": 1}, {"key": "257d8d38863175ec9401d0532963fb71a47f0734", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3883286118696258, "res": {"No": 0.6116460398782962, "Yes": 0.3883286118696258}, "ground_truth": 0}, {"key": "257d8d38863175ec9401d0532963fb71a47f0734", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.42049827577235604, "res": {"No": 0.5794807703083095, "Yes": 0.42049827577235604}, "ground_truth": 0}, {"key": "aa596e10dfe78dd875a7e15bfd53138bc8bde471", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.27692075410190353, "res": {"No": 0.7230612962415118, "Yes": 0.27692075410190353}, "ground_truth": 0}, {"key": "aa596e10dfe78dd875a7e15bfd53138bc8bde471", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.31960730776631563, "res": {"No": 0.6803628841448496, "Yes": 0.31960730776631563}, "ground_truth": 0}, {"key": "aa596e10dfe78dd875a7e15bfd53138bc8bde471", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.34257341290504395, "res": {"No": 0.6574007019405513, "Yes": 0.34257341290504395}, "ground_truth": 1}, {"key": "aa596e10dfe78dd875a7e15bfd53138bc8bde471", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.34363081847844845, "res": {"No": 0.6563468467267458, "Yes": 0.34363081847844845}, "ground_truth": 0}, {"key": "aa596e10dfe78dd875a7e15bfd53138bc8bde471", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2478632490557879, "res": {"No": 0.7521163471838029, "Yes": 0.2478632490557879}, "ground_truth": 0}, {"key": "a0dff972172125f32b79977a47b0d250be8f71e4", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.23246255980935204, "res": {"No": 0.7675183431013971, "Yes": 0.23246255980935204}, "ground_truth": 0}, {"key": "a0dff972172125f32b79977a47b0d250be8f71e4", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.34291368867431843, "res": {"No": 0.6570679573512457, "Yes": 0.34291368867431843}, "ground_truth": 0}, {"key": "a0dff972172125f32b79977a47b0d250be8f71e4", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.46649247014186657, "res": {"No": 0.5334883208740543, "Yes": 0.46649247014186657}, "ground_truth": 1}, {"key": "a0dff972172125f32b79977a47b0d250be8f71e4", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5175703608963609, "res": {"Yes": 0.5175703608963609, "No": 0.4824092976593627}, "ground_truth": 0}, {"key": "a0dff972172125f32b79977a47b0d250be8f71e4", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2850986266947145, "res": {"No": 0.7148824324129027, "Yes": 0.2850986266947145}, "ground_truth": 0}, {"key": "faac3a6337907b1552b7068c8701cae665c4885c", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4055318352001161, "res": {"No": 0.5944474561622324, "Yes": 0.4055318352001161}, "ground_truth": 0}, {"key": "faac3a6337907b1552b7068c8701cae665c4885c", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5219144987570616, "res": {"Yes": 0.5219144987570616, "No": 0.4780576725014681}, "ground_truth": 0}, {"key": "faac3a6337907b1552b7068c8701cae665c4885c", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4156169650372881, "res": {"No": 0.5843613485395436, "Yes": 0.4156169650372881}, "ground_truth": 1}, {"key": "faac3a6337907b1552b7068c8701cae665c4885c", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5194829806498549, "res": {"Yes": 0.5194829806498549, "No": 0.4804991113505668}, "ground_truth": 0}, {"key": "faac3a6337907b1552b7068c8701cae665c4885c", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3492898450078721, "res": {"No": 0.6506900720082545, "Yes": 0.3492898450078721}, "ground_truth": 0}, {"key": "c741b211364d761605050776064a506d24378d10", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.24181664627875432, "res": {"No": 0.7581722472792409, "Yes": 0.24181664627875432}, "ground_truth": 0}, {"key": "c741b211364d761605050776064a506d24378d10", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.34868693281714125, "res": {"No": 0.6512963158862363, "Yes": 0.34868693281714125}, "ground_truth": 0}, {"key": "c741b211364d761605050776064a506d24378d10", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.31915554281556624, "res": {"No": 0.6808267775636002, "Yes": 0.31915554281556624}, "ground_truth": 1}, {"key": "c741b211364d761605050776064a506d24378d10", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.33406085566917315, "res": {"No": 0.665919770774971, "Yes": 0.33406085566917315}, "ground_truth": 0}, {"key": "c741b211364d761605050776064a506d24378d10", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2262823778774991, "res": {"No": 0.7736897697626949, "Yes": 0.2262823778774991}, "ground_truth": 0}, {"key": "a062b8ba08369be2364a863020a28de63a3cbf86", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2722001058217913, "res": {"No": 0.7277814077160898, "Yes": 0.2722001058217913}, "ground_truth": 0}, {"key": "a062b8ba08369be2364a863020a28de63a3cbf86", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.20502197714591666, "res": {"No": 0.7949526943980949, "Yes": 0.20502197714591666}, "ground_truth": 0}, {"key": "a062b8ba08369be2364a863020a28de63a3cbf86", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.45349893401637276, "res": {"No": 0.546478725915358, "Yes": 0.45349893401637276}, "ground_truth": 1}, {"key": "a062b8ba08369be2364a863020a28de63a3cbf86", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.32349419809285096, "res": {"No": 0.6764817356742376, "Yes": 0.32349419809285096}, "ground_truth": 0}, {"key": "a062b8ba08369be2364a863020a28de63a3cbf86", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.38298147849182657, "res": {"No": 0.6170014901041536, "Yes": 0.38298147849182657}, "ground_truth": 0}, {"key": "f11102404d23702fe398b1d690d13418703e617a", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.40577812861481527, "res": {"No": 0.5941995578878113, "Yes": 0.40577812861481527}, "ground_truth": 0}, {"key": "f11102404d23702fe398b1d690d13418703e617a", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3670643520917082, "res": {"No": 0.6329118842020109, "Yes": 0.3670643520917082}, "ground_truth": 0}, {"key": "f11102404d23702fe398b1d690d13418703e617a", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.44274907125279733, "res": {"No": 0.5572373543110661, "Yes": 0.44274907125279733}, "ground_truth": 1}, {"key": "f11102404d23702fe398b1d690d13418703e617a", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.19844928553228142, "res": {"No": 0.8015342864447516, "Yes": 0.19844928553228142}, "ground_truth": 0}, {"key": "f11102404d23702fe398b1d690d13418703e617a", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4575572525090023, "res": {"No": 0.542426601226156, "Yes": 0.4575572525090023}, "ground_truth": 0}, {"key": "62481fda7e7b9714b994617909349550707fe8a7", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4358041952365236, "res": {"No": 0.5641812578210202, "Yes": 0.4358041952365236}, "ground_truth": 0}, {"key": "62481fda7e7b9714b994617909349550707fe8a7", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.29322985190946893, "res": {"No": 0.706747570486339, "Yes": 0.29322985190946893}, "ground_truth": 0}, {"key": "62481fda7e7b9714b994617909349550707fe8a7", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5086467302772708, "res": {"Yes": 0.5086467302772708, "No": 0.4913404552129561}, "ground_truth": 1}, {"key": "62481fda7e7b9714b994617909349550707fe8a7", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5774368460819508, "res": {"Yes": 0.5774368460819508, "No": 0.4225504631115237}, "ground_truth": 0}, {"key": "62481fda7e7b9714b994617909349550707fe8a7", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4535202036151463, "res": {"No": 0.5464678729556315, "Yes": 0.4535202036151463}, "ground_truth": 0}, {"key": "13943ef2d9c3b737c9a900f0cdb32a9c39121e6f", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2727370594838465, "res": {"No": 0.727243274137166, "Yes": 0.2727370594838465}, "ground_truth": 0}, {"key": "13943ef2d9c3b737c9a900f0cdb32a9c39121e6f", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.37397774076870816, "res": {"No": 0.6259807745093854, "Yes": 0.37397774076870816}, "ground_truth": 0}, {"key": "13943ef2d9c3b737c9a900f0cdb32a9c39121e6f", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5246265252593697, "res": {"Yes": 0.5246265252593697, "No": 0.4753448206987998}, "ground_truth": 1}, {"key": "13943ef2d9c3b737c9a900f0cdb32a9c39121e6f", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.42928521248026014, "res": {"No": 0.570691411851128, "Yes": 0.42928521248026014}, "ground_truth": 0}, {"key": "13943ef2d9c3b737c9a900f0cdb32a9c39121e6f", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.42383925974644515, "res": {"No": 0.5761361181338046, "Yes": 0.42383925974644515}, "ground_truth": 0}, {"key": "0c25e0cd62589fde1b378606febbcc5a2bafc770", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3553958643272306, "res": {"No": 0.6445842136819828, "Yes": 0.3553958643272306}, "ground_truth": 0}, {"key": "0c25e0cd62589fde1b378606febbcc5a2bafc770", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3003913663553541, "res": {"No": 0.6995867747730543, "Yes": 0.3003913663553541}, "ground_truth": 0}, {"key": "0c25e0cd62589fde1b378606febbcc5a2bafc770", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.39840468253096034, "res": {"No": 0.6015771434522632, "Yes": 0.39840468253096034}, "ground_truth": 1}, {"key": "0c25e0cd62589fde1b378606febbcc5a2bafc770", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.33876297667305744, "res": {"No": 0.6612182117057208, "Yes": 0.33876297667305744}, "ground_truth": 0}, {"key": "0c25e0cd62589fde1b378606febbcc5a2bafc770", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3479825653610149, "res": {"No": 0.6520020844835077, "Yes": 0.3479825653610149}, "ground_truth": 0}, {"key": "a86bc16fad35d84e988af4cc64bf14877337d47e", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.5010982947678486, "res": {"Yes": 0.5010982947678486, "No": 0.4988734313465209}, "ground_truth": 0}, {"key": "a86bc16fad35d84e988af4cc64bf14877337d47e", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5275582464063749, "res": {"Yes": 0.5275582464063749, "No": 0.47241601210823936}, "ground_truth": 0}, {"key": "a86bc16fad35d84e988af4cc64bf14877337d47e", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5637371061506127, "res": {"Yes": 0.5637371061506127, "No": 0.4362367274987303}, "ground_truth": 1}, {"key": "a86bc16fad35d84e988af4cc64bf14877337d47e", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.47143280515093916, "res": {"No": 0.5285439432573465, "Yes": 0.47143280515093916}, "ground_truth": 0}, {"key": "a86bc16fad35d84e988af4cc64bf14877337d47e", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5207928588850739, "res": {"Yes": 0.5207928588850739, "No": 0.4791869957335406}, "ground_truth": 0}, {"key": "0f937bcc8cd07bab2d5aa9a4492d3c2430063b67", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3223969914899688, "res": {"No": 0.6775751295850015, "Yes": 0.3223969914899688}, "ground_truth": 0}, {"key": "0f937bcc8cd07bab2d5aa9a4492d3c2430063b67", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.46847634476897804, "res": {"No": 0.5312786663558833, "Yes": 0.46847634476897804}, "ground_truth": 0}, {"key": "0f937bcc8cd07bab2d5aa9a4492d3c2430063b67", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5440404925836057, "res": {"Yes": 0.5440404925836057, "No": 0.4559365152969015}, "ground_truth": 1}, {"key": "0f937bcc8cd07bab2d5aa9a4492d3c2430063b67", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4720528402406894, "res": {"No": 0.5279123251057174, "Yes": 0.4720528402406894}, "ground_truth": 0}, {"key": "0f937bcc8cd07bab2d5aa9a4492d3c2430063b67", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3352196132512186, "res": {"No": 0.6647604026604201, "Yes": 0.3352196132512186}, "ground_truth": 0}, {"key": "a6c0617d92807f05f02d767261c45b616382e499", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3436634993232353, "res": {"No": 0.6563177843318189, "Yes": 0.3436634993232353}, "ground_truth": 0}, {"key": "a6c0617d92807f05f02d767261c45b616382e499", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.43500230023973374, "res": {"No": 0.5649710150206707, "Yes": 0.43500230023973374}, "ground_truth": 0}, {"key": "a6c0617d92807f05f02d767261c45b616382e499", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4637070889072215, "res": {"No": 0.5362695567183792, "Yes": 0.4637070889072215}, "ground_truth": 1}, {"key": "a6c0617d92807f05f02d767261c45b616382e499", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3835394673157657, "res": {"No": 0.6164431467585217, "Yes": 0.3835394673157657}, "ground_truth": 0}, {"key": "a6c0617d92807f05f02d767261c45b616382e499", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4754707562212264, "res": {"No": 0.5245123527214242, "Yes": 0.4754707562212264}, "ground_truth": 0}, {"key": "4f45a0576d353fa9b5b9d1c8e3cc0e6a1cd4897b", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.36404685394497677, "res": {"No": 0.6359143020025405, "Yes": 0.36404685394497677}, "ground_truth": 0}, {"key": "4f45a0576d353fa9b5b9d1c8e3cc0e6a1cd4897b", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.35021711088546914, "res": {"No": 0.6497611923393805, "Yes": 0.35021711088546914}, "ground_truth": 0}, {"key": "4f45a0576d353fa9b5b9d1c8e3cc0e6a1cd4897b", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3704043989743473, "res": {"No": 0.6295673761562434, "Yes": 0.3704043989743473}, "ground_truth": 1}, {"key": "4f45a0576d353fa9b5b9d1c8e3cc0e6a1cd4897b", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.436399792720183, "res": {"No": 0.5635478124688572, "Yes": 0.436399792720183}, "ground_truth": 0}, {"key": "4f45a0576d353fa9b5b9d1c8e3cc0e6a1cd4897b", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4080095083786079, "res": {"No": 0.591961073785922, "Yes": 0.4080095083786079}, "ground_truth": 0}, {"key": "f6d43a207c2b3c48e787087b345c23d2fb1dc92f", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.35385360378964376, "res": {"No": 0.6461301014780656, "Yes": 0.35385360378964376}, "ground_truth": 0}, {"key": "f6d43a207c2b3c48e787087b345c23d2fb1dc92f", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2940276407224304, "res": {"No": 0.7059561386336947, "Yes": 0.2940276407224304}, "ground_truth": 0}, {"key": "f6d43a207c2b3c48e787087b345c23d2fb1dc92f", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4640747920419366, "res": {"No": 0.5359010195407159, "Yes": 0.4640747920419366}, "ground_truth": 1}, {"key": "f6d43a207c2b3c48e787087b345c23d2fb1dc92f", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.504008301407393, "res": {"Yes": 0.504008301407393, "No": 0.4959531065364464}, "ground_truth": 0}, {"key": "f6d43a207c2b3c48e787087b345c23d2fb1dc92f", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3862997486384967, "res": {"No": 0.6136777289777092, "Yes": 0.3862997486384967}, "ground_truth": 0}, {"key": "e0127487aee3a00fe164d72eb697221079518471", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.38111355638820155, "res": {"No": 0.6188609116569869, "Yes": 0.38111355638820155}, "ground_truth": 0}, {"key": "e0127487aee3a00fe164d72eb697221079518471", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.39767319066149154, "res": {"No": 0.6022653801480242, "Yes": 0.39767319066149154}, "ground_truth": 0}, {"key": "e0127487aee3a00fe164d72eb697221079518471", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.39356876664056634, "res": {"No": 0.6064019372412649, "Yes": 0.39356876664056634}, "ground_truth": 1}, {"key": "e0127487aee3a00fe164d72eb697221079518471", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.38202629028810803, "res": {"No": 0.6179445811870823, "Yes": 0.38202629028810803}, "ground_truth": 0}, {"key": "e0127487aee3a00fe164d72eb697221079518471", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3376762104101299, "res": {"No": 0.6623003533313182, "Yes": 0.3376762104101299}, "ground_truth": 0}, {"key": "2f3af8decce1c0a6cd22b21df96acd40b14bb62b", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2397259219703129, "res": {"No": 0.7602404987777213, "Yes": 0.2397259219703129}, "ground_truth": 0}, {"key": "2f3af8decce1c0a6cd22b21df96acd40b14bb62b", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.27379084595881686, "res": {"No": 0.726182673076092, "Yes": 0.27379084595881686}, "ground_truth": 0}, {"key": "2f3af8decce1c0a6cd22b21df96acd40b14bb62b", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.42952065839789194, "res": {"No": 0.5704524848512473, "Yes": 0.42952065839789194}, "ground_truth": 1}, {"key": "2f3af8decce1c0a6cd22b21df96acd40b14bb62b", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3589318648538488, "res": {"No": 0.6410293854349397, "Yes": 0.3589318648538488}, "ground_truth": 0}, {"key": "2f3af8decce1c0a6cd22b21df96acd40b14bb62b", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3774139436959778, "res": {"No": 0.6225626083736677, "Yes": 0.3774139436959778}, "ground_truth": 0}, {"key": "13a366f303e6ea2f07c353a9f5fb49a44840f808", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.5589822496991388, "res": {"Yes": 0.5589822496991388, "No": 0.44099197543207297}, "ground_truth": 0}, {"key": "13a366f303e6ea2f07c353a9f5fb49a44840f808", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5065458683328854, "res": {"Yes": 0.5065458683328854, "No": 0.49343294782921854}, "ground_truth": 0}, {"key": "13a366f303e6ea2f07c353a9f5fb49a44840f808", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4856115764714002, "res": {"No": 0.5143687645529887, "Yes": 0.4856115764714002}, "ground_truth": 1}, {"key": "13a366f303e6ea2f07c353a9f5fb49a44840f808", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5458959167894075, "res": {"Yes": 0.5458959167894075, "No": 0.45408279033492266}, "ground_truth": 0}, {"key": "13a366f303e6ea2f07c353a9f5fb49a44840f808", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5643570276575622, "res": {"Yes": 0.5643570276575622, "No": 0.4356257830224175}, "ground_truth": 0}, {"key": "ee36a8b5db09b9e9a6d462116539ffc095d5c017", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.45698393585852937, "res": {"No": 0.5429914255808728, "Yes": 0.45698393585852937}, "ground_truth": 0}, {"key": "ee36a8b5db09b9e9a6d462116539ffc095d5c017", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.31737202905297696, "res": {"No": 0.6826081110491952, "Yes": 0.31737202905297696}, "ground_truth": 0}, {"key": "ee36a8b5db09b9e9a6d462116539ffc095d5c017", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5062968609529074, "res": {"Yes": 0.5062968609529074, "No": 0.4936740980757356}, "ground_truth": 1}, {"key": "ee36a8b5db09b9e9a6d462116539ffc095d5c017", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4603994485030124, "res": {"No": 0.539576179694132, "Yes": 0.4603994485030124}, "ground_truth": 0}, {"key": "ee36a8b5db09b9e9a6d462116539ffc095d5c017", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5382494847537946, "res": {"Yes": 0.5382494847537946, "No": 0.46172410112395423}, "ground_truth": 0}, {"key": "d3b3bcf90226a694361cb27f1a4d481c79b7368a", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.36160757576718183, "res": {"No": 0.6383705275695643, "Yes": 0.36160757576718183}, "ground_truth": 0}, {"key": "d3b3bcf90226a694361cb27f1a4d481c79b7368a", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3518315391152267, "res": {"No": 0.6481425125730214, "Yes": 0.3518315391152267}, "ground_truth": 0}, {"key": "d3b3bcf90226a694361cb27f1a4d481c79b7368a", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4891847495104503, "res": {"No": 0.5107979337281522, "Yes": 0.4891847495104503}, "ground_truth": 1}, {"key": "d3b3bcf90226a694361cb27f1a4d481c79b7368a", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3499214423919963, "res": {"No": 0.6500613928592075, "Yes": 0.3499214423919963}, "ground_truth": 0}, {"key": "d3b3bcf90226a694361cb27f1a4d481c79b7368a", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.33455693654908036, "res": {"No": 0.665424630501521, "Yes": 0.33455693654908036}, "ground_truth": 0}, {"key": "e5e837f2cc75fc44ebf826822aa4c9dc59344b8b", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.21195989430777812, "res": {"No": 0.7880298650758418, "Yes": 0.21195989430777812}, "ground_truth": 0}, {"key": "e5e837f2cc75fc44ebf826822aa4c9dc59344b8b", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4272923241936741, "res": {"No": 0.5726829335550927, "Yes": 0.4272923241936741}, "ground_truth": 0}, {"key": "e5e837f2cc75fc44ebf826822aa4c9dc59344b8b", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.46725400558622987, "res": {"No": 0.5327248963477887, "Yes": 0.46725400558622987}, "ground_truth": 1}, {"key": "e5e837f2cc75fc44ebf826822aa4c9dc59344b8b", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.31728393880520395, "res": {"No": 0.6826894691319019, "Yes": 0.31728393880520395}, "ground_truth": 0}, {"key": "e5e837f2cc75fc44ebf826822aa4c9dc59344b8b", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3518561329993571, "res": {"No": 0.6481259981121961, "Yes": 0.3518561329993571}, "ground_truth": 0}, {"key": "1317a869cc1d6d534a93eaad67fb6550aa155ae6", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.28594808660652277, "res": {"No": 0.7140345417582831, "Yes": 0.28594808660652277}, "ground_truth": 0}, {"key": "1317a869cc1d6d534a93eaad67fb6550aa155ae6", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.15706356232381968, "res": {"No": 0.8429168766947985, "Yes": 0.15706356232381968}, "ground_truth": 0}, {"key": "1317a869cc1d6d534a93eaad67fb6550aa155ae6", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3712638288987696, "res": {"No": 0.6287058366220879, "Yes": 0.3712638288987696}, "ground_truth": 1}, {"key": "1317a869cc1d6d534a93eaad67fb6550aa155ae6", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.38604142369621885, "res": {"No": 0.6139192341251448, "Yes": 0.38604142369621885}, "ground_truth": 0}, {"key": "1317a869cc1d6d534a93eaad67fb6550aa155ae6", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.19869535632088442, "res": {"No": 0.8012846874620575, "Yes": 0.19869535632088442}, "ground_truth": 0}, {"key": "f856ca4cd15992e0e88ae9113bf31b2a5ab55072", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.11577164565498245, "res": {"No": 0.8842088266501292, "Yes": 0.11577164565498245}, "ground_truth": 0}, {"key": "f856ca4cd15992e0e88ae9113bf31b2a5ab55072", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.24661210546003234, "res": {"No": 0.7533739506841562, "Yes": 0.24661210546003234}, "ground_truth": 0}, {"key": "f856ca4cd15992e0e88ae9113bf31b2a5ab55072", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4388945499000197, "res": {"No": 0.5610881251316246, "Yes": 0.4388945499000197}, "ground_truth": 1}, {"key": "f856ca4cd15992e0e88ae9113bf31b2a5ab55072", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.44727876652142967, "res": {"No": 0.5526970265609901, "Yes": 0.44727876652142967}, "ground_truth": 0}, {"key": "f856ca4cd15992e0e88ae9113bf31b2a5ab55072", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3251608161863648, "res": {"No": 0.6748221056698768, "Yes": 0.3251608161863648}, "ground_truth": 0}, {"key": "438e8eaa1c64cb59288f9954355d0100181d19a2", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2529299768264136, "res": {"No": 0.7470569936795478, "Yes": 0.2529299768264136}, "ground_truth": 0}, {"key": "438e8eaa1c64cb59288f9954355d0100181d19a2", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5353208672479901, "res": {"Yes": 0.5353208672479901, "No": 0.4646541563029832}, "ground_truth": 0}, {"key": "438e8eaa1c64cb59288f9954355d0100181d19a2", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4945483782724629, "res": {"No": 0.50542526255913, "Yes": 0.4945483782724629}, "ground_truth": 1}, {"key": "438e8eaa1c64cb59288f9954355d0100181d19a2", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.48650762709351775, "res": {"No": 0.513466495019663, "Yes": 0.48650762709351775}, "ground_truth": 0}, {"key": "438e8eaa1c64cb59288f9954355d0100181d19a2", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4498151762637414, "res": {"No": 0.5501651352012071, "Yes": 0.4498151762637414}, "ground_truth": 0}, {"key": "470618f9fd48a5d134a44b5de166b625f596a0b3", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.31840637602763167, "res": {"No": 0.6815542685373569, "Yes": 0.31840637602763167}, "ground_truth": 0}, {"key": "470618f9fd48a5d134a44b5de166b625f596a0b3", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.24331635741241586, "res": {"No": 0.7566531051837945, "Yes": 0.24331635741241586}, "ground_truth": 0}, {"key": "470618f9fd48a5d134a44b5de166b625f596a0b3", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3585130564286218, "res": {"No": 0.64146346436077, "Yes": 0.3585130564286218}, "ground_truth": 1}, {"key": "470618f9fd48a5d134a44b5de166b625f596a0b3", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.2922731700980403, "res": {"No": 0.7077089552219707, "Yes": 0.2922731700980403}, "ground_truth": 0}, {"key": "470618f9fd48a5d134a44b5de166b625f596a0b3", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.24889362067733084, "res": {"No": 0.7510856843883735, "Yes": 0.24889362067733084}, "ground_truth": 0}, {"key": "751e8f720d24c57e79cf63d9a3f72cd80c5d9619", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.1253605241230285, "res": {"No": 0.8746289235031485, "Yes": 0.1253605241230285}, "ground_truth": 0}, {"key": "751e8f720d24c57e79cf63d9a3f72cd80c5d9619", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.44806603404823037, "res": {"No": 0.5519122294318871, "Yes": 0.44806603404823037}, "ground_truth": 0}, {"key": "751e8f720d24c57e79cf63d9a3f72cd80c5d9619", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.41103013335829824, "res": {"No": 0.5889576850097655, "Yes": 0.41103013335829824}, "ground_truth": 1}, {"key": "751e8f720d24c57e79cf63d9a3f72cd80c5d9619", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.40826341006170025, "res": {"No": 0.5917174549929612, "Yes": 0.40826341006170025}, "ground_truth": 0}, {"key": "751e8f720d24c57e79cf63d9a3f72cd80c5d9619", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3051591607379488, "res": {"No": 0.6948142259082666, "Yes": 0.3051591607379488}, "ground_truth": 0}, {"key": "6a985a1090f146976d33c8b33342320a65a50211", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.19322578565236087, "res": {"No": 0.8067599618508952, "Yes": 0.19322578565236087}, "ground_truth": 0}, {"key": "6a985a1090f146976d33c8b33342320a65a50211", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3165697444698126, "res": {"No": 0.6834105512997184, "Yes": 0.3165697444698126}, "ground_truth": 0}, {"key": "6a985a1090f146976d33c8b33342320a65a50211", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4137515938884152, "res": {"No": 0.5862207405363785, "Yes": 0.4137515938884152}, "ground_truth": 1}, {"key": "6a985a1090f146976d33c8b33342320a65a50211", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3599090566005342, "res": {"No": 0.6400708280011226, "Yes": 0.3599090566005342}, "ground_truth": 0}, {"key": "6a985a1090f146976d33c8b33342320a65a50211", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3139267501703781, "res": {"No": 0.6860440777777999, "Yes": 0.3139267501703781}, "ground_truth": 0}, {"key": "4640d2f968f31bcf384d8253e055611b10f4a38b", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.36914990530453284, "res": {"No": 0.6308190024380768, "Yes": 0.36914990530453284}, "ground_truth": 0}, {"key": "4640d2f968f31bcf384d8253e055611b10f4a38b", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.35328071904333697, "res": {"No": 0.6466990626281652, "Yes": 0.35328071904333697}, "ground_truth": 0}, {"key": "4640d2f968f31bcf384d8253e055611b10f4a38b", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4805949324264139, "res": {"No": 0.5193685667351462, "Yes": 0.4805949324264139}, "ground_truth": 1}, {"key": "4640d2f968f31bcf384d8253e055611b10f4a38b", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3290203421121676, "res": {"No": 0.6709570184656855, "Yes": 0.3290203421121676}, "ground_truth": 0}, {"key": "4640d2f968f31bcf384d8253e055611b10f4a38b", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.6344215627732976, "res": {"Yes": 0.6344215627732976, "No": 0.36555106384080804}, "ground_truth": 0}, {"key": "22e0c9ba3465840583a5fd79714d2f7663c7e3a1", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.40234459743738643, "res": {"No": 0.5976330751849925, "Yes": 0.40234459743738643}, "ground_truth": 0}, {"key": "22e0c9ba3465840583a5fd79714d2f7663c7e3a1", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4392059243151255, "res": {"No": 0.5607765833100988, "Yes": 0.4392059243151255}, "ground_truth": 0}, {"key": "22e0c9ba3465840583a5fd79714d2f7663c7e3a1", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.510986248550814, "res": {"Yes": 0.510986248550814, "No": 0.48899001937090775}, "ground_truth": 1}, {"key": "22e0c9ba3465840583a5fd79714d2f7663c7e3a1", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5038311937747277, "res": {"Yes": 0.5038311937747277, "No": 0.4961468140434781}, "ground_truth": 0}, {"key": "22e0c9ba3465840583a5fd79714d2f7663c7e3a1", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.44067792462695404, "res": {"No": 0.559297392076645, "Yes": 0.44067792462695404}, "ground_truth": 0}, {"key": "10dafcc21761c60f8fc5bd832daf8f21cf0fc66d", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.28268247718077677, "res": {"No": 0.71729017008038, "Yes": 0.28268247718077677}, "ground_truth": 0}, {"key": "10dafcc21761c60f8fc5bd832daf8f21cf0fc66d", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3286253930134187, "res": {"No": 0.6713503346435004, "Yes": 0.3286253930134187}, "ground_truth": 0}, {"key": "10dafcc21761c60f8fc5bd832daf8f21cf0fc66d", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4760547549303671, "res": {"No": 0.5239209839605838, "Yes": 0.4760547549303671}, "ground_truth": 1}, {"key": "10dafcc21761c60f8fc5bd832daf8f21cf0fc66d", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.2897960729744993, "res": {"No": 0.7101856743261167, "Yes": 0.2897960729744993}, "ground_truth": 0}, {"key": "10dafcc21761c60f8fc5bd832daf8f21cf0fc66d", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4056319733717756, "res": {"No": 0.594327776216117, "Yes": 0.4056319733717756}, "ground_truth": 0}, {"key": "75da85a72e0028e1b07c65f6b771a1eaa20b4e04", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.21956890011094962, "res": {"No": 0.7804120886716023, "Yes": 0.21956890011094962}, "ground_truth": 0}, {"key": "75da85a72e0028e1b07c65f6b771a1eaa20b4e04", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.27876236456714726, "res": {"No": 0.7212130232969102, "Yes": 0.27876236456714726}, "ground_truth": 0}, {"key": "75da85a72e0028e1b07c65f6b771a1eaa20b4e04", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4464598072833111, "res": {"No": 0.5535084834673243, "Yes": 0.4464598072833111}, "ground_truth": 1}, {"key": "75da85a72e0028e1b07c65f6b771a1eaa20b4e04", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3891235914557988, "res": {"No": 0.6108523268127555, "Yes": 0.3891235914557988}, "ground_truth": 0}, {"key": "75da85a72e0028e1b07c65f6b771a1eaa20b4e04", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2839297586809201, "res": {"No": 0.7160547130977305, "Yes": 0.2839297586809201}, "ground_truth": 0}, {"key": "9166b4c77e919f7a7cc457f9df981512dea3694f", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.350142487570536, "res": {"No": 0.6498383495640927, "Yes": 0.350142487570536}, "ground_truth": 0}, {"key": "9166b4c77e919f7a7cc457f9df981512dea3694f", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5370285071187832, "res": {"Yes": 0.5370285071187832, "No": 0.46294220958372545}, "ground_truth": 0}, {"key": "9166b4c77e919f7a7cc457f9df981512dea3694f", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4144505963710193, "res": {"No": 0.5855189681326693, "Yes": 0.4144505963710193}, "ground_truth": 1}, {"key": "9166b4c77e919f7a7cc457f9df981512dea3694f", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3855864333830235, "res": {"No": 0.6143991749526997, "Yes": 0.3855864333830235}, "ground_truth": 0}, {"key": "9166b4c77e919f7a7cc457f9df981512dea3694f", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5516904328898132, "res": {"Yes": 0.5516904328898132, "No": 0.4482902127408782}, "ground_truth": 0}, {"key": "81e50c6cdbea4e55af6307d653135ef395b6d7fe", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.33200362216155427, "res": {"No": 0.6679784943837719, "Yes": 0.33200362216155427}, "ground_truth": 0}, {"key": "81e50c6cdbea4e55af6307d653135ef395b6d7fe", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.1054642337640065, "res": {"No": 0.8945091561787047, "Yes": 0.1054642337640065}, "ground_truth": 0}, {"key": "81e50c6cdbea4e55af6307d653135ef395b6d7fe", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5168738386887158, "res": {"Yes": 0.5168738386887158, "No": 0.48309709050122135}, "ground_truth": 1}, {"key": "81e50c6cdbea4e55af6307d653135ef395b6d7fe", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6610086519718512, "res": {"Yes": 0.6610086519718512, "No": 0.33895419456784887}, "ground_truth": 0}, {"key": "81e50c6cdbea4e55af6307d653135ef395b6d7fe", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.49257196855505775, "res": {"No": 0.5073962411018466, "Yes": 0.49257196855505775}, "ground_truth": 0}, {"key": "59fa6c3439d216272cc845d35627f8eb11b08a6d", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4962530008232636, "res": {"No": 0.5037199248673928, "Yes": 0.4962530008232636}, "ground_truth": 0}, {"key": "59fa6c3439d216272cc845d35627f8eb11b08a6d", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.37895217848525903, "res": {"No": 0.6210190880005697, "Yes": 0.37895217848525903}, "ground_truth": 0}, {"key": "59fa6c3439d216272cc845d35627f8eb11b08a6d", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6139580350468378, "res": {"Yes": 0.6139580350468378, "No": 0.38601795309117176}, "ground_truth": 1}, {"key": "59fa6c3439d216272cc845d35627f8eb11b08a6d", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.44567504106637346, "res": {"No": 0.5542888157558354, "Yes": 0.44567504106637346}, "ground_truth": 0}, {"key": "59fa6c3439d216272cc845d35627f8eb11b08a6d", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.20592567011292257, "res": {"No": 0.7940554054546022, "Yes": 0.20592567011292257}, "ground_truth": 0}, {"key": "2eff3ba44cfaefbff89ebc764828ae9e4d477c9b", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.24929266780906545, "res": {"No": 0.7506887954380851, "Yes": 0.24929266780906545}, "ground_truth": 0}, {"key": "2eff3ba44cfaefbff89ebc764828ae9e4d477c9b", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.38790593667502693, "res": {"No": 0.612073228020174, "Yes": 0.38790593667502693}, "ground_truth": 0}, {"key": "2eff3ba44cfaefbff89ebc764828ae9e4d477c9b", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.36310424849886147, "res": {"No": 0.6368741179320143, "Yes": 0.36310424849886147}, "ground_truth": 1}, {"key": "2eff3ba44cfaefbff89ebc764828ae9e4d477c9b", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5015001361974621, "res": {"Yes": 0.5015001361974621, "No": 0.49847327592210255}, "ground_truth": 0}, {"key": "2eff3ba44cfaefbff89ebc764828ae9e4d477c9b", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.320864937450666, "res": {"No": 0.6791156513046059, "Yes": 0.320864937450666}, "ground_truth": 0}, {"key": "74698ee383888faf5a49b32ecb8211b888270b76", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.26130324182695025, "res": {"No": 0.738666873333125, "Yes": 0.26130324182695025}, "ground_truth": 0}, {"key": "74698ee383888faf5a49b32ecb8211b888270b76", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3487871947222845, "res": {"No": 0.6511978147677934, "Yes": 0.3487871947222845}, "ground_truth": 0}, {"key": "74698ee383888faf5a49b32ecb8211b888270b76", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3257198574594892, "res": {"No": 0.6742593835771246, "Yes": 0.3257198574594892}, "ground_truth": 1}, {"key": "74698ee383888faf5a49b32ecb8211b888270b76", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4264240948947944, "res": {"No": 0.5735482577039215, "Yes": 0.4264240948947944}, "ground_truth": 0}, {"key": "74698ee383888faf5a49b32ecb8211b888270b76", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3516223671023777, "res": {"No": 0.6483529856503539, "Yes": 0.3516223671023777}, "ground_truth": 0}, {"key": "79a6d24f452d8c1cef47ab224fd2083b0d08324c", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.38726975013169884, "res": {"No": 0.6126916056083035, "Yes": 0.38726975013169884}, "ground_truth": 0}, {"key": "79a6d24f452d8c1cef47ab224fd2083b0d08324c", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5346937424571941, "res": {"Yes": 0.5346937424571941, "No": 0.4652753351619486}, "ground_truth": 0}, {"key": "79a6d24f452d8c1cef47ab224fd2083b0d08324c", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5512743394491875, "res": {"Yes": 0.5512743394491875, "No": 0.44869006858420085}, "ground_truth": 1}, {"key": "79a6d24f452d8c1cef47ab224fd2083b0d08324c", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.38256341446734066, "res": {"No": 0.6174128216258842, "Yes": 0.38256341446734066}, "ground_truth": 0}, {"key": "79a6d24f452d8c1cef47ab224fd2083b0d08324c", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4259580550007776, "res": {"No": 0.5740174112123785, "Yes": 0.4259580550007776}, "ground_truth": 0}, {"key": "50f2041a2bed7f316d3254192c4158095eeec2e9", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3565774898195357, "res": {"No": 0.6434064753242019, "Yes": 0.3565774898195357}, "ground_truth": 0}, {"key": "50f2041a2bed7f316d3254192c4158095eeec2e9", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5185483530600179, "res": {"Yes": 0.5185483530600179, "No": 0.48142828482991595}, "ground_truth": 0}, {"key": "50f2041a2bed7f316d3254192c4158095eeec2e9", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5308591007169573, "res": {"Yes": 0.5308591007169573, "No": 0.46910025971621955}, "ground_truth": 1}, {"key": "50f2041a2bed7f316d3254192c4158095eeec2e9", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5674684987374152, "res": {"Yes": 0.5674684987374152, "No": 0.4325017700575349}, "ground_truth": 0}, {"key": "50f2041a2bed7f316d3254192c4158095eeec2e9", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.40477618267709564, "res": {"No": 0.5952032411485542, "Yes": 0.40477618267709564}, "ground_truth": 0}, {"key": "de6ed8db6d639706f2c5015c699fc578e4aa43ec", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4005416191080609, "res": {"No": 0.5994234154786341, "Yes": 0.4005416191080609}, "ground_truth": 0}, {"key": "de6ed8db6d639706f2c5015c699fc578e4aa43ec", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.33295957259177944, "res": {"No": 0.6670152298292356, "Yes": 0.33295957259177944}, "ground_truth": 0}, {"key": "de6ed8db6d639706f2c5015c699fc578e4aa43ec", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.49552367318308393, "res": {"No": 0.5044505509241437, "Yes": 0.49552367318308393}, "ground_truth": 1}, {"key": "de6ed8db6d639706f2c5015c699fc578e4aa43ec", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5082862021445086, "res": {"Yes": 0.5082862021445086, "No": 0.49168849659880254}, "ground_truth": 0}, {"key": "de6ed8db6d639706f2c5015c699fc578e4aa43ec", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.39995335546951377, "res": {"No": 0.6000267448556494, "Yes": 0.39995335546951377}, "ground_truth": 0}, {"key": "8abb66a9697a8c59348fcc79de126691e394bded", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4131290149236712, "res": {"No": 0.5868477866618429, "Yes": 0.4131290149236712}, "ground_truth": 0}, {"key": "8abb66a9697a8c59348fcc79de126691e394bded", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.46244270998727494, "res": {"No": 0.5375173179809039, "Yes": 0.46244270998727494}, "ground_truth": 0}, {"key": "8abb66a9697a8c59348fcc79de126691e394bded", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.46877775161717417, "res": {"No": 0.531198853044776, "Yes": 0.46877775161717417}, "ground_truth": 1}, {"key": "8abb66a9697a8c59348fcc79de126691e394bded", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5582897645933534, "res": {"Yes": 0.5582897645933534, "No": 0.4416713612681665}, "ground_truth": 0}, {"key": "8abb66a9697a8c59348fcc79de126691e394bded", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.40654950658206596, "res": {"No": 0.5934313152120838, "Yes": 0.40654950658206596}, "ground_truth": 0}, {"key": "11ec3be451b57e4312b44eeabb3ae441435c662e", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3652821537705236, "res": {"No": 0.634691655444321, "Yes": 0.3652821537705236}, "ground_truth": 0}, {"key": "11ec3be451b57e4312b44eeabb3ae441435c662e", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4891492604442548, "res": {"No": 0.5108300639286679, "Yes": 0.4891492604442548}, "ground_truth": 0}, {"key": "11ec3be451b57e4312b44eeabb3ae441435c662e", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.32798414833919554, "res": {"No": 0.6719972975298737, "Yes": 0.32798414833919554}, "ground_truth": 1}, {"key": "11ec3be451b57e4312b44eeabb3ae441435c662e", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.34818170314458946, "res": {"No": 0.6517980267297679, "Yes": 0.34818170314458946}, "ground_truth": 0}, {"key": "11ec3be451b57e4312b44eeabb3ae441435c662e", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.32011889670297256, "res": {"No": 0.6798680797500933, "Yes": 0.32011889670297256}, "ground_truth": 0}, {"key": "eed527dece78deffed54a7cdd8516c4d57a90011", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.40993911102810004, "res": {"No": 0.5900410312552824, "Yes": 0.40993911102810004}, "ground_truth": 0}, {"key": "eed527dece78deffed54a7cdd8516c4d57a90011", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5471858072791628, "res": {"Yes": 0.5471858072791628, "No": 0.4527953280758869}, "ground_truth": 0}, {"key": "eed527dece78deffed54a7cdd8516c4d57a90011", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.37680585411575274, "res": {"No": 0.6231680209545516, "Yes": 0.37680585411575274}, "ground_truth": 1}, {"key": "eed527dece78deffed54a7cdd8516c4d57a90011", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5144037068099651, "res": {"Yes": 0.5144037068099651, "No": 0.48557141805459947}, "ground_truth": 0}, {"key": "eed527dece78deffed54a7cdd8516c4d57a90011", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3925651571732993, "res": {"No": 0.6074129234049159, "Yes": 0.3925651571732993}, "ground_truth": 0}, {"key": "803c0dc8b3d2eb528fd084b613dfc29d98151d72", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3841503755715352, "res": {"No": 0.615830177089597, "Yes": 0.3841503755715352}, "ground_truth": 0}, {"key": "803c0dc8b3d2eb528fd084b613dfc29d98151d72", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.41238157720004603, "res": {"No": 0.5875955135045149, "Yes": 0.41238157720004603}, "ground_truth": 0}, {"key": "803c0dc8b3d2eb528fd084b613dfc29d98151d72", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6506703108508423, "res": {"Yes": 0.6506703108508423, "No": 0.34929854243329533}, "ground_truth": 1}, {"key": "803c0dc8b3d2eb528fd084b613dfc29d98151d72", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4243432242736692, "res": {"No": 0.5756231994320243, "Yes": 0.4243432242736692}, "ground_truth": 0}, {"key": "803c0dc8b3d2eb528fd084b613dfc29d98151d72", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.27023400774377276, "res": {"No": 0.7297515100352268, "Yes": 0.27023400774377276}, "ground_truth": 0}, {"key": "d4fb4cca753c0f5586bd93bb124b0491711396d9", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.35096137651140114, "res": {"No": 0.6490179660471219, "Yes": 0.35096137651140114}, "ground_truth": 0}, {"key": "d4fb4cca753c0f5586bd93bb124b0491711396d9", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4309575868935807, "res": {"No": 0.5690246239257483, "Yes": 0.4309575868935807}, "ground_truth": 0}, {"key": "d4fb4cca753c0f5586bd93bb124b0491711396d9", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3946842897975376, "res": {"No": 0.6052982906863585, "Yes": 0.3946842897975376}, "ground_truth": 1}, {"key": "d4fb4cca753c0f5586bd93bb124b0491711396d9", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4964960754297266, "res": {"No": 0.5034862731777616, "Yes": 0.4964960754297266}, "ground_truth": 0}, {"key": "d4fb4cca753c0f5586bd93bb124b0491711396d9", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5064454099881436, "res": {"Yes": 0.5064454099881436, "No": 0.49353364803064464}, "ground_truth": 0}, {"key": "723443b1c16e2276a1aa03259a600044e86c2538", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.17409990114743976, "res": {"No": 0.8258828364612036, "Yes": 0.17409990114743976}, "ground_truth": 0}, {"key": "723443b1c16e2276a1aa03259a600044e86c2538", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.36179888060010856, "res": {"No": 0.6381858864511278, "Yes": 0.36179888060010856}, "ground_truth": 0}, {"key": "723443b1c16e2276a1aa03259a600044e86c2538", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4437774859241324, "res": {"No": 0.5562056383293583, "Yes": 0.4437774859241324}, "ground_truth": 1}, {"key": "723443b1c16e2276a1aa03259a600044e86c2538", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.46575652031297, "res": {"No": 0.5342206623508028, "Yes": 0.46575652031297}, "ground_truth": 0}, {"key": "723443b1c16e2276a1aa03259a600044e86c2538", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4599124976930436, "res": {"No": 0.5400602886540938, "Yes": 0.4599124976930436}, "ground_truth": 0}, {"key": "b30a8f4dbb4f8375a8a9c2ccb4f658cc66dca0f5", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4025223321742289, "res": {"No": 0.5974593266773719, "Yes": 0.4025223321742289}, "ground_truth": 0}, {"key": "b30a8f4dbb4f8375a8a9c2ccb4f658cc66dca0f5", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.33700102738980037, "res": {"No": 0.662979890716901, "Yes": 0.33700102738980037}, "ground_truth": 0}, {"key": "b30a8f4dbb4f8375a8a9c2ccb4f658cc66dca0f5", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5297984647182046, "res": {"Yes": 0.5297984647182046, "No": 0.4701836386770393}, "ground_truth": 1}, {"key": "b30a8f4dbb4f8375a8a9c2ccb4f658cc66dca0f5", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4851464038442067, "res": {"No": 0.5148296189743631, "Yes": 0.4851464038442067}, "ground_truth": 0}, {"key": "b30a8f4dbb4f8375a8a9c2ccb4f658cc66dca0f5", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4573038817243544, "res": {"No": 0.5426755779167911, "Yes": 0.4573038817243544}, "ground_truth": 0}, {"key": "f874dc1da36d2c45a65f78dc0dc9dcdf5846c3c9", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.47715335538731857, "res": {"No": 0.5228204404325059, "Yes": 0.47715335538731857}, "ground_truth": 0}, {"key": "f874dc1da36d2c45a65f78dc0dc9dcdf5846c3c9", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.32636489949066955, "res": {"No": 0.6736041871470017, "Yes": 0.32636489949066955}, "ground_truth": 0}, {"key": "f874dc1da36d2c45a65f78dc0dc9dcdf5846c3c9", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5860111278970587, "res": {"Yes": 0.5860111278970587, "No": 0.41395722158521303}, "ground_truth": 1}, {"key": "f874dc1da36d2c45a65f78dc0dc9dcdf5846c3c9", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6043512199884776, "res": {"Yes": 0.6043512199884776, "No": 0.3956098423472388}, "ground_truth": 0}, {"key": "f874dc1da36d2c45a65f78dc0dc9dcdf5846c3c9", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5803871314976703, "res": {"Yes": 0.5803871314976703, "No": 0.4195796761004377}, "ground_truth": 0}, {"key": "60fc2801c429858cc0a87e547f1c30e34a7a96f3", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.46995130313359945, "res": {"No": 0.5300318075454785, "Yes": 0.46995130313359945}, "ground_truth": 0}, {"key": "60fc2801c429858cc0a87e547f1c30e34a7a96f3", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4150342217927709, "res": {"No": 0.5849416384852756, "Yes": 0.4150342217927709}, "ground_truth": 0}, {"key": "60fc2801c429858cc0a87e547f1c30e34a7a96f3", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5364702341487158, "res": {"Yes": 0.5364702341487158, "No": 0.46350562876715906}, "ground_truth": 1}, {"key": "60fc2801c429858cc0a87e547f1c30e34a7a96f3", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5112904014021167, "res": {"Yes": 0.5112904014021167, "No": 0.48868789246382033}, "ground_truth": 0}, {"key": "60fc2801c429858cc0a87e547f1c30e34a7a96f3", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4825244432021132, "res": {"No": 0.5174480734689096, "Yes": 0.4825244432021132}, "ground_truth": 0}, {"key": "d885b755dab03a11c7e3b19455b82859a6e1f76b", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.21699256254830246, "res": {"No": 0.7829942238235199, "Yes": 0.21699256254830246}, "ground_truth": 0}, {"key": "d885b755dab03a11c7e3b19455b82859a6e1f76b", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3046948484777459, "res": {"No": 0.6952831543902623, "Yes": 0.3046948484777459}, "ground_truth": 0}, {"key": "d885b755dab03a11c7e3b19455b82859a6e1f76b", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.37798791302429513, "res": {"No": 0.6219961350551275, "Yes": 0.37798791302429513}, "ground_truth": 1}, {"key": "d885b755dab03a11c7e3b19455b82859a6e1f76b", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.34893580971998317, "res": {"No": 0.6510455109003342, "Yes": 0.34893580971998317}, "ground_truth": 0}, {"key": "d885b755dab03a11c7e3b19455b82859a6e1f76b", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2722177449601505, "res": {"No": 0.7277620198776353, "Yes": 0.2722177449601505}, "ground_truth": 0}, {"key": "9ce486aae2ad61c78b2e44d2617276a470c9f600", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.40266276853214455, "res": {"No": 0.5973087679767211, "Yes": 0.40266276853214455}, "ground_truth": 0}, {"key": "9ce486aae2ad61c78b2e44d2617276a470c9f600", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.27395703266009064, "res": {"No": 0.7260056876205083, "Yes": 0.27395703266009064}, "ground_truth": 0}, {"key": "9ce486aae2ad61c78b2e44d2617276a470c9f600", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.45396349121513896, "res": {"No": 0.5460086286357088, "Yes": 0.45396349121513896}, "ground_truth": 1}, {"key": "9ce486aae2ad61c78b2e44d2617276a470c9f600", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4964174863214781, "res": {"No": 0.503552551437908, "Yes": 0.4964174863214781}, "ground_truth": 0}, {"key": "9ce486aae2ad61c78b2e44d2617276a470c9f600", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5728690628418293, "res": {"Yes": 0.5728690628418293, "No": 0.42709953067489514}, "ground_truth": 0}, {"key": "162e54974cad1d0e067a97e37175951c26f13a0e", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.5048195703421381, "res": {"Yes": 0.5048195703421381, "No": 0.4951565320692262}, "ground_truth": 0}, {"key": "162e54974cad1d0e067a97e37175951c26f13a0e", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5221498308529704, "res": {"Yes": 0.5221498308529704, "No": 0.4778246284011253}, "ground_truth": 0}, {"key": "162e54974cad1d0e067a97e37175951c26f13a0e", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5415167556452309, "res": {"Yes": 0.5415167556452309, "No": 0.45845602612931596}, "ground_truth": 1}, {"key": "162e54974cad1d0e067a97e37175951c26f13a0e", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.41553237481351685, "res": {"No": 0.5844452630100755, "Yes": 0.41553237481351685}, "ground_truth": 0}, {"key": "162e54974cad1d0e067a97e37175951c26f13a0e", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4824030070831358, "res": {"No": 0.5175724829391908, "Yes": 0.4824030070831358}, "ground_truth": 0}, {"key": "03a0cef9c902928979ef731616fb96bd1fb10f9f", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2180212377645188, "res": {"No": 0.7819623151056522, "Yes": 0.2180212377645188}, "ground_truth": 0}, {"key": "03a0cef9c902928979ef731616fb96bd1fb10f9f", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.33393841140488983, "res": {"No": 0.6660478661022146, "Yes": 0.33393841140488983}, "ground_truth": 0}, {"key": "03a0cef9c902928979ef731616fb96bd1fb10f9f", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3740139435662017, "res": {"No": 0.6259613819253838, "Yes": 0.3740139435662017}, "ground_truth": 1}, {"key": "03a0cef9c902928979ef731616fb96bd1fb10f9f", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.45029367408351173, "res": {"No": 0.5496757280461814, "Yes": 0.45029367408351173}, "ground_truth": 0}, {"key": "03a0cef9c902928979ef731616fb96bd1fb10f9f", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4313560188667685, "res": {"No": 0.5686259172451607, "Yes": 0.4313560188667685}, "ground_truth": 0}, {"key": "6f64897fc65e1ba7531a4830a7d935ab15d8bec6", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.42367501148273723, "res": {"No": 0.5762952285666757, "Yes": 0.42367501148273723}, "ground_truth": 0}, {"key": "6f64897fc65e1ba7531a4830a7d935ab15d8bec6", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.37180660831845114, "res": {"No": 0.6281723923961582, "Yes": 0.37180660831845114}, "ground_truth": 0}, {"key": "6f64897fc65e1ba7531a4830a7d935ab15d8bec6", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.34988519240834604, "res": {"No": 0.6500907047882439, "Yes": 0.34988519240834604}, "ground_truth": 1}, {"key": "6f64897fc65e1ba7531a4830a7d935ab15d8bec6", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4214307791737222, "res": {"No": 0.5785413482311628, "Yes": 0.4214307791737222}, "ground_truth": 0}, {"key": "6f64897fc65e1ba7531a4830a7d935ab15d8bec6", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.14034149170396068, "res": {"No": 0.8596417811975297, "Yes": 0.14034149170396068}, "ground_truth": 0}, {"key": "0d7383a44955b41c5a472151a35a842751f0004c", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.39374551869465463, "res": {"No": 0.6062347207103765, "Yes": 0.39374551869465463}, "ground_truth": 0}, {"key": "0d7383a44955b41c5a472151a35a842751f0004c", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5123910889083643, "res": {"Yes": 0.5123910889083643, "No": 0.48759155551158373}, "ground_truth": 0}, {"key": "0d7383a44955b41c5a472151a35a842751f0004c", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.44582096113070996, "res": {"No": 0.5541626185605838, "Yes": 0.44582096113070996}, "ground_truth": 1}, {"key": "0d7383a44955b41c5a472151a35a842751f0004c", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4946074308741988, "res": {"No": 0.5053689865893668, "Yes": 0.4946074308741988}, "ground_truth": 0}, {"key": "0d7383a44955b41c5a472151a35a842751f0004c", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4748766466197355, "res": {"No": 0.5251064368929064, "Yes": 0.4748766466197355}, "ground_truth": 0}, {"key": "2fddefe691f7849048548a15f6533339d4a25bfd", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4814342545776602, "res": {"No": 0.5185300486262301, "Yes": 0.4814342545776602}, "ground_truth": 0}, {"key": "2fddefe691f7849048548a15f6533339d4a25bfd", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.22444362879755697, "res": {"No": 0.7755265972875716, "Yes": 0.22444362879755697}, "ground_truth": 0}, {"key": "2fddefe691f7849048548a15f6533339d4a25bfd", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2604537960319625, "res": {"No": 0.7395252889891326, "Yes": 0.2604537960319625}, "ground_truth": 1}, {"key": "2fddefe691f7849048548a15f6533339d4a25bfd", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.30190109960311745, "res": {"No": 0.6980757903158342, "Yes": 0.30190109960311745}, "ground_truth": 0}, {"key": "2fddefe691f7849048548a15f6533339d4a25bfd", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.40078610418885274, "res": {"No": 0.5991924422057172, "Yes": 0.40078610418885274}, "ground_truth": 0}, {"key": "e35ec8afe496d466d673ac67d80ec5c21b163410", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4170401620293659, "res": {"No": 0.582941150654642, "Yes": 0.4170401620293659}, "ground_truth": 0}, {"key": "e35ec8afe496d466d673ac67d80ec5c21b163410", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.40677638383331266, "res": {"No": 0.5932033033729289, "Yes": 0.40677638383331266}, "ground_truth": 0}, {"key": "e35ec8afe496d466d673ac67d80ec5c21b163410", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4591653473097044, "res": {"No": 0.5408185250175078, "Yes": 0.4591653473097044}, "ground_truth": 1}, {"key": "e35ec8afe496d466d673ac67d80ec5c21b163410", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.45430840745207773, "res": {"No": 0.5456663121357506, "Yes": 0.45430840745207773}, "ground_truth": 0}, {"key": "e35ec8afe496d466d673ac67d80ec5c21b163410", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4022094524721239, "res": {"No": 0.5977697097227125, "Yes": 0.4022094524721239}, "ground_truth": 0}, {"key": "b4aa207f0723ae8a731dbc07ecdb03a68a42031f", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3657660344956724, "res": {"No": 0.6342052745858919, "Yes": 0.3657660344956724}, "ground_truth": 0}, {"key": "b4aa207f0723ae8a731dbc07ecdb03a68a42031f", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3632307397357838, "res": {"No": 0.6367424259818429, "Yes": 0.3632307397357838}, "ground_truth": 0}, {"key": "b4aa207f0723ae8a731dbc07ecdb03a68a42031f", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3452764080405859, "res": {"No": 0.6546935600980973, "Yes": 0.3452764080405859}, "ground_truth": 1}, {"key": "b4aa207f0723ae8a731dbc07ecdb03a68a42031f", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3506816314589462, "res": {"No": 0.6492964804579139, "Yes": 0.3506816314589462}, "ground_truth": 0}, {"key": "b4aa207f0723ae8a731dbc07ecdb03a68a42031f", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3734733234945644, "res": {"No": 0.6264876609822388, "Yes": 0.3734733234945644}, "ground_truth": 0}, {"key": "a65ecd71682863f7b12ca11ab8ac23b39ae302c4", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.48969171448020643, "res": {"No": 0.5102741238085859, "Yes": 0.48969171448020643}, "ground_truth": 0}, {"key": "a65ecd71682863f7b12ca11ab8ac23b39ae302c4", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4766971478034386, "res": {"No": 0.5232758584274428, "Yes": 0.4766971478034386}, "ground_truth": 0}, {"key": "a65ecd71682863f7b12ca11ab8ac23b39ae302c4", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.486261103183325, "res": {"No": 0.5137126276763326, "Yes": 0.486261103183325}, "ground_truth": 1}, {"key": "a65ecd71682863f7b12ca11ab8ac23b39ae302c4", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4496805037715328, "res": {"No": 0.5502732257588648, "Yes": 0.4496805037715328}, "ground_truth": 0}, {"key": "a65ecd71682863f7b12ca11ab8ac23b39ae302c4", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.38624271955359835, "res": {"No": 0.6137322628036083, "Yes": 0.38624271955359835}, "ground_truth": 0}, {"key": "215a2a64c7bd36360f76ec5e159690de07d7d959", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3182106481297216, "res": {"No": 0.681773423461958, "Yes": 0.3182106481297216}, "ground_truth": 0}, {"key": "215a2a64c7bd36360f76ec5e159690de07d7d959", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.45962384604886075, "res": {"No": 0.5403527564582619, "Yes": 0.45962384604886075}, "ground_truth": 0}, {"key": "215a2a64c7bd36360f76ec5e159690de07d7d959", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3792189371904462, "res": {"No": 0.6207537397779046, "Yes": 0.3792189371904462}, "ground_truth": 1}, {"key": "215a2a64c7bd36360f76ec5e159690de07d7d959", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.28186726791546507, "res": {"No": 0.7180989040839141, "Yes": 0.28186726791546507}, "ground_truth": 0}, {"key": "215a2a64c7bd36360f76ec5e159690de07d7d959", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.396018864989491, "res": {"No": 0.6039591595333929, "Yes": 0.396018864989491}, "ground_truth": 0}, {"key": "39074eb06df4966d1e06f76ad2f0713b31335fe5", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2666531043401782, "res": {"No": 0.7333348940911432, "Yes": 0.2666531043401782}, "ground_truth": 0}, {"key": "39074eb06df4966d1e06f76ad2f0713b31335fe5", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2731594741372533, "res": {"No": 0.7268264577895773, "Yes": 0.2731594741372533}, "ground_truth": 0}, {"key": "39074eb06df4966d1e06f76ad2f0713b31335fe5", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3599001309666089, "res": {"No": 0.6400793666029205, "Yes": 0.3599001309666089}, "ground_truth": 1}, {"key": "39074eb06df4966d1e06f76ad2f0713b31335fe5", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3312995261340927, "res": {"No": 0.6686871275968758, "Yes": 0.3312995261340927}, "ground_truth": 0}, {"key": "39074eb06df4966d1e06f76ad2f0713b31335fe5", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3935698686346557, "res": {"No": 0.6064071219999931, "Yes": 0.3935698686346557}, "ground_truth": 0}, {"key": "3cae0d9bcf97b5d182b20a31aaea3f41cd7bf599", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4139050828173507, "res": {"No": 0.5860736937847806, "Yes": 0.4139050828173507}, "ground_truth": 0}, {"key": "3cae0d9bcf97b5d182b20a31aaea3f41cd7bf599", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4128619266320732, "res": {"No": 0.5871175345387746, "Yes": 0.4128619266320732}, "ground_truth": 0}, {"key": "3cae0d9bcf97b5d182b20a31aaea3f41cd7bf599", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4574300967178988, "res": {"No": 0.5425469247896979, "Yes": 0.4574300967178988}, "ground_truth": 1}, {"key": "3cae0d9bcf97b5d182b20a31aaea3f41cd7bf599", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5115342172159617, "res": {"Yes": 0.5115342172159617, "No": 0.488440918276521}, "ground_truth": 0}, {"key": "3cae0d9bcf97b5d182b20a31aaea3f41cd7bf599", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4027414444424387, "res": {"No": 0.597237513291128, "Yes": 0.4027414444424387}, "ground_truth": 0}, {"key": "6a9ead70f9423e494c466189bc6d907071547f42", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.39800536256781105, "res": {"No": 0.6019745562090478, "Yes": 0.39800536256781105}, "ground_truth": 0}, {"key": "6a9ead70f9423e494c466189bc6d907071547f42", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.46467838864908734, "res": {"No": 0.5353028432978248, "Yes": 0.46467838864908734}, "ground_truth": 0}, {"key": "6a9ead70f9423e494c466189bc6d907071547f42", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.36842014267364426, "res": {"No": 0.6315636692659142, "Yes": 0.36842014267364426}, "ground_truth": 1}, {"key": "6a9ead70f9423e494c466189bc6d907071547f42", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.524970892911928, "res": {"Yes": 0.524970892911928, "No": 0.47501221950723294}, "ground_truth": 0}, {"key": "6a9ead70f9423e494c466189bc6d907071547f42", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4396610455347792, "res": {"No": 0.5603147497615648, "Yes": 0.4396610455347792}, "ground_truth": 0}, {"key": "9a15663058028878027f6aa039fb3185c2ff52c8", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.24916899947195045, "res": {"No": 0.7508117533177479, "Yes": 0.24916899947195045}, "ground_truth": 0}, {"key": "9a15663058028878027f6aa039fb3185c2ff52c8", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.36449195467472356, "res": {"No": 0.6354822369763493, "Yes": 0.36449195467472356}, "ground_truth": 0}, {"key": "9a15663058028878027f6aa039fb3185c2ff52c8", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6025040208678564, "res": {"Yes": 0.6025040208678564, "No": 0.3974712398833641}, "ground_truth": 1}, {"key": "9a15663058028878027f6aa039fb3185c2ff52c8", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3854605869447182, "res": {"No": 0.614506833263274, "Yes": 0.3854605869447182}, "ground_truth": 0}, {"key": "9a15663058028878027f6aa039fb3185c2ff52c8", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.35326842508823025, "res": {"No": 0.6467160387013692, "Yes": 0.35326842508823025}, "ground_truth": 0}, {"key": "83cb0c7e79d4c71703e1bd5fc346fe68be8b8b13", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2700799912425332, "res": {"No": 0.7298964749743814, "Yes": 0.2700799912425332}, "ground_truth": 0}, {"key": "83cb0c7e79d4c71703e1bd5fc346fe68be8b8b13", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3848272294847077, "res": {"No": 0.6151363244300012, "Yes": 0.3848272294847077}, "ground_truth": 0}, {"key": "83cb0c7e79d4c71703e1bd5fc346fe68be8b8b13", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4755900430213459, "res": {"No": 0.5243938263236965, "Yes": 0.4755900430213459}, "ground_truth": 1}, {"key": "83cb0c7e79d4c71703e1bd5fc346fe68be8b8b13", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4156866282782449, "res": {"No": 0.5842820794559441, "Yes": 0.4156866282782449}, "ground_truth": 0}, {"key": "83cb0c7e79d4c71703e1bd5fc346fe68be8b8b13", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3334234417331277, "res": {"No": 0.666552241893005, "Yes": 0.3334234417331277}, "ground_truth": 0}, {"key": "5eb63517219a5d2aeebd21aa66d9b69c52cfb208", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.16311806748247126, "res": {"No": 0.8368510511030809, "Yes": 0.16311806748247126}, "ground_truth": 0}, {"key": "5eb63517219a5d2aeebd21aa66d9b69c52cfb208", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.35397343890682736, "res": {"No": 0.6460000356589007, "Yes": 0.35397343890682736}, "ground_truth": 0}, {"key": "5eb63517219a5d2aeebd21aa66d9b69c52cfb208", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.39506562051209565, "res": {"No": 0.604922365906995, "Yes": 0.39506562051209565}, "ground_truth": 1}, {"key": "5eb63517219a5d2aeebd21aa66d9b69c52cfb208", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3979770375341154, "res": {"No": 0.6020029098783689, "Yes": 0.3979770375341154}, "ground_truth": 0}, {"key": "5eb63517219a5d2aeebd21aa66d9b69c52cfb208", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2811315696032958, "res": {"No": 0.7188510539164876, "Yes": 0.2811315696032958}, "ground_truth": 0}, {"key": "4a1de02b50578df33ea7b0cb384bdd6aaf4ee119", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.13258223295355548, "res": {"No": 0.8674021837523965, "Yes": 0.13258223295355548}, "ground_truth": 0}, {"key": "4a1de02b50578df33ea7b0cb384bdd6aaf4ee119", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4511868877763169, "res": {"No": 0.5487674585354915, "Yes": 0.4511868877763169}, "ground_truth": 0}, {"key": "4a1de02b50578df33ea7b0cb384bdd6aaf4ee119", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5670169622029493, "res": {"Yes": 0.5670169622029493, "No": 0.4329413978401492}, "ground_truth": 1}, {"key": "4a1de02b50578df33ea7b0cb384bdd6aaf4ee119", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5250116637345821, "res": {"Yes": 0.5250116637345821, "No": 0.47496393696912165}, "ground_truth": 0}, {"key": "4a1de02b50578df33ea7b0cb384bdd6aaf4ee119", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3761283264762185, "res": {"No": 0.6238428408165269, "Yes": 0.3761283264762185}, "ground_truth": 0}, {"key": "ddedfc2a349fb607ed7efeda1660e9807454ddb7", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4424023574559041, "res": {"No": 0.5575789340111188, "Yes": 0.4424023574559041}, "ground_truth": 0}, {"key": "ddedfc2a349fb607ed7efeda1660e9807454ddb7", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3268352034835424, "res": {"No": 0.6731468978336708, "Yes": 0.3268352034835424}, "ground_truth": 0}, {"key": "ddedfc2a349fb607ed7efeda1660e9807454ddb7", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3678283094828542, "res": {"No": 0.6321506771747599, "Yes": 0.3678283094828542}, "ground_truth": 1}, {"key": "ddedfc2a349fb607ed7efeda1660e9807454ddb7", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.43756119879814037, "res": {"No": 0.5624197622315529, "Yes": 0.43756119879814037}, "ground_truth": 0}, {"key": "ddedfc2a349fb607ed7efeda1660e9807454ddb7", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.35086701568463896, "res": {"No": 0.6491133527362545, "Yes": 0.35086701568463896}, "ground_truth": 0}, {"key": "1cce8238a161fda411022c350d2b9a49072f366f", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.44018178045053646, "res": {"No": 0.5597908931921343, "Yes": 0.44018178045053646}, "ground_truth": 0}, {"key": "1cce8238a161fda411022c350d2b9a49072f366f", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.39122788605121944, "res": {"No": 0.6087547875851503, "Yes": 0.39122788605121944}, "ground_truth": 0}, {"key": "1cce8238a161fda411022c350d2b9a49072f366f", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.45375975273645536, "res": {"No": 0.546214490846831, "Yes": 0.45375975273645536}, "ground_truth": 1}, {"key": "1cce8238a161fda411022c350d2b9a49072f366f", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5009575059287771, "res": {"Yes": 0.5009575059287771, "No": 0.4990177862890571}, "ground_truth": 0}, {"key": "1cce8238a161fda411022c350d2b9a49072f366f", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.46810483581446666, "res": {"No": 0.5318830205517098, "Yes": 0.46810483581446666}, "ground_truth": 0}, {"key": "0612042c0c7d5cf6459f5435402919673c8c552f", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.15877698045946312, "res": {"No": 0.8412024101706547, "Yes": 0.15877698045946312}, "ground_truth": 0}, {"key": "0612042c0c7d5cf6459f5435402919673c8c552f", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4039685300382336, "res": {"No": 0.5960050731239962, "Yes": 0.4039685300382336}, "ground_truth": 0}, {"key": "0612042c0c7d5cf6459f5435402919673c8c552f", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.38411599565144594, "res": {"No": 0.6158455607195614, "Yes": 0.38411599565144594}, "ground_truth": 1}, {"key": "0612042c0c7d5cf6459f5435402919673c8c552f", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3095853058864427, "res": {"No": 0.69037948107352, "Yes": 0.3095853058864427}, "ground_truth": 0}, {"key": "0612042c0c7d5cf6459f5435402919673c8c552f", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2026791484163181, "res": {"No": 0.7972948012013203, "Yes": 0.2026791484163181}, "ground_truth": 0}, {"key": "cbe1f7ee7f5d619093684cc123e3908e39d79c92", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4372675625354258, "res": {"No": 0.5627148850427647, "Yes": 0.4372675625354258}, "ground_truth": 0}, {"key": "cbe1f7ee7f5d619093684cc123e3908e39d79c92", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3126936567942231, "res": {"No": 0.6872850863391585, "Yes": 0.3126936567942231}, "ground_truth": 0}, {"key": "cbe1f7ee7f5d619093684cc123e3908e39d79c92", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4828849270442093, "res": {"No": 0.5170875067977191, "Yes": 0.4828849270442093}, "ground_truth": 1}, {"key": "cbe1f7ee7f5d619093684cc123e3908e39d79c92", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.41410005732412186, "res": {"No": 0.5858768881254104, "Yes": 0.41410005732412186}, "ground_truth": 0}, {"key": "cbe1f7ee7f5d619093684cc123e3908e39d79c92", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5275556086217373, "res": {"Yes": 0.5275556086217373, "No": 0.4723974701436526}, "ground_truth": 0}, {"key": "42be47d5986a9a55b413eb57359d2d8c51b59024", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3061755649162938, "res": {"No": 0.6937937866647952, "Yes": 0.3061755649162938}, "ground_truth": 0}, {"key": "42be47d5986a9a55b413eb57359d2d8c51b59024", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3591760933610213, "res": {"No": 0.6407942413023705, "Yes": 0.3591760933610213}, "ground_truth": 0}, {"key": "42be47d5986a9a55b413eb57359d2d8c51b59024", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4846370590695669, "res": {"No": 0.515333889122117, "Yes": 0.4846370590695669}, "ground_truth": 1}, {"key": "42be47d5986a9a55b413eb57359d2d8c51b59024", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.2596527545614394, "res": {"No": 0.7403155239081063, "Yes": 0.2596527545614394}, "ground_truth": 0}, {"key": "42be47d5986a9a55b413eb57359d2d8c51b59024", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.452804234647586, "res": {"No": 0.5471276937607447, "Yes": 0.452804234647586}, "ground_truth": 0}, {"key": "9f997964709283520a65d2ce75de7c8b154ce351", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.32238206485481713, "res": {"No": 0.6775981065472181, "Yes": 0.32238206485481713}, "ground_truth": 0}, {"key": "9f997964709283520a65d2ce75de7c8b154ce351", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2677618802384923, "res": {"No": 0.7322133102693107, "Yes": 0.2677618802384923}, "ground_truth": 0}, {"key": "9f997964709283520a65d2ce75de7c8b154ce351", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3744886438434939, "res": {"No": 0.625483955552298, "Yes": 0.3744886438434939}, "ground_truth": 1}, {"key": "9f997964709283520a65d2ce75de7c8b154ce351", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.29827920547457387, "res": {"No": 0.7016989872538872, "Yes": 0.29827920547457387}, "ground_truth": 0}, {"key": "9f997964709283520a65d2ce75de7c8b154ce351", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.20756973987412555, "res": {"No": 0.7924171827128897, "Yes": 0.20756973987412555}, "ground_truth": 0}, {"key": "57da930302d81b46438b0f715a5218b7174a5611", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.11716934470541632, "res": {"No": 0.8828181193024329, "Yes": 0.11716934470541632}, "ground_truth": 0}, {"key": "57da930302d81b46438b0f715a5218b7174a5611", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.35512618569609405, "res": {"No": 0.6448522875267219, "Yes": 0.35512618569609405}, "ground_truth": 0}, {"key": "57da930302d81b46438b0f715a5218b7174a5611", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3876934612252875, "res": {"No": 0.6122854093699096, "Yes": 0.3876934612252875}, "ground_truth": 1}, {"key": "57da930302d81b46438b0f715a5218b7174a5611", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3633162179849653, "res": {"No": 0.6366676258754829, "Yes": 0.3633162179849653}, "ground_truth": 0}, {"key": "57da930302d81b46438b0f715a5218b7174a5611", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2029438985414714, "res": {"No": 0.7970384483603326, "Yes": 0.2029438985414714}, "ground_truth": 0}, {"key": "bc639974d283826b3b928bf90f0951652db71fd4", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3938790880123026, "res": {"No": 0.6060969028520037, "Yes": 0.3938790880123026}, "ground_truth": 0}, {"key": "bc639974d283826b3b928bf90f0951652db71fd4", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.36222851840825165, "res": {"No": 0.6377212967685708, "Yes": 0.36222851840825165}, "ground_truth": 0}, {"key": "bc639974d283826b3b928bf90f0951652db71fd4", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3118903485187291, "res": {"No": 0.6880686252638909, "Yes": 0.3118903485187291}, "ground_truth": 1}, {"key": "bc639974d283826b3b928bf90f0951652db71fd4", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3098372250158711, "res": {"No": 0.6901272348891865, "Yes": 0.3098372250158711}, "ground_truth": 0}, {"key": "bc639974d283826b3b928bf90f0951652db71fd4", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.363965644151151, "res": {"No": 0.6359968045163491, "Yes": 0.363965644151151}, "ground_truth": 0}, {"key": "304002bc095b57d14b728ccfb8d704ef39f28813", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4073742430264229, "res": {"No": 0.5925892407588411, "Yes": 0.4073742430264229}, "ground_truth": 0}, {"key": "304002bc095b57d14b728ccfb8d704ef39f28813", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.34041166988913935, "res": {"No": 0.6595535316934037, "Yes": 0.34041166988913935}, "ground_truth": 0}, {"key": "304002bc095b57d14b728ccfb8d704ef39f28813", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.46172635896032915, "res": {"No": 0.5382480045697468, "Yes": 0.46172635896032915}, "ground_truth": 1}, {"key": "304002bc095b57d14b728ccfb8d704ef39f28813", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4668815003902566, "res": {"No": 0.5330865954508175, "Yes": 0.4668815003902566}, "ground_truth": 0}, {"key": "304002bc095b57d14b728ccfb8d704ef39f28813", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4868193545152594, "res": {"No": 0.5131598725315838, "Yes": 0.4868193545152594}, "ground_truth": 0}, {"key": "fab941df6ecc8251b49d28715504baf4ac31dcf9", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3057922589199594, "res": {"No": 0.6941911667460993, "Yes": 0.3057922589199594}, "ground_truth": 0}, {"key": "fab941df6ecc8251b49d28715504baf4ac31dcf9", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.23704414248011302, "res": {"No": 0.7629289586207945, "Yes": 0.23704414248011302}, "ground_truth": 0}, {"key": "fab941df6ecc8251b49d28715504baf4ac31dcf9", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.26822609931150576, "res": {"No": 0.7317542466576938, "Yes": 0.26822609931150576}, "ground_truth": 1}, {"key": "fab941df6ecc8251b49d28715504baf4ac31dcf9", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.22044913301113692, "res": {"No": 0.7795293646956041, "Yes": 0.22044913301113692}, "ground_truth": 0}, {"key": "fab941df6ecc8251b49d28715504baf4ac31dcf9", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.37282327693109096, "res": {"No": 0.6271471982208164, "Yes": 0.37282327693109096}, "ground_truth": 0}, {"key": "92dec9732f4244771770150d82cd6086f40e5621", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.44802381029479615, "res": {"No": 0.5519518029574314, "Yes": 0.44802381029479615}, "ground_truth": 0}, {"key": "92dec9732f4244771770150d82cd6086f40e5621", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5025389347303688, "res": {"Yes": 0.5025389347303688, "No": 0.4974236507448868}, "ground_truth": 0}, {"key": "92dec9732f4244771770150d82cd6086f40e5621", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6167394682033678, "res": {"Yes": 0.6167394682033678, "No": 0.3832308614443613}, "ground_truth": 1}, {"key": "92dec9732f4244771770150d82cd6086f40e5621", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4326651395231293, "res": {"No": 0.5673083306628932, "Yes": 0.4326651395231293}, "ground_truth": 0}, {"key": "92dec9732f4244771770150d82cd6086f40e5621", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4655993400426893, "res": {"No": 0.5343786547586862, "Yes": 0.4655993400426893}, "ground_truth": 0}, {"key": "d2ed554726c632af6e2130a580964a261f8c2e60", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.18507380268947227, "res": {"No": 0.8149004096144471, "Yes": 0.18507380268947227}, "ground_truth": 0}, {"key": "d2ed554726c632af6e2130a580964a261f8c2e60", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2919446060553725, "res": {"No": 0.708025378919054, "Yes": 0.2919446060553725}, "ground_truth": 0}, {"key": "d2ed554726c632af6e2130a580964a261f8c2e60", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4040266652924373, "res": {"No": 0.5959430739411825, "Yes": 0.4040266652924373}, "ground_truth": 1}, {"key": "d2ed554726c632af6e2130a580964a261f8c2e60", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3107031886009417, "res": {"No": 0.6892778387343573, "Yes": 0.3107031886009417}, "ground_truth": 0}, {"key": "d2ed554726c632af6e2130a580964a261f8c2e60", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4642199216362906, "res": {"No": 0.5357466219078425, "Yes": 0.4642199216362906}, "ground_truth": 0}, {"key": "84ab5256295369509813877f5000bb7e01e7f641", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.5162105985711162, "res": {"Yes": 0.5162105985711162, "No": 0.4837644956108797}, "ground_truth": 0}, {"key": "84ab5256295369509813877f5000bb7e01e7f641", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.6053956304774104, "res": {"Yes": 0.6053956304774104, "No": 0.3945881958728821}, "ground_truth": 0}, {"key": "84ab5256295369509813877f5000bb7e01e7f641", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6311023037665602, "res": {"Yes": 0.6311023037665602, "No": 0.36887708257048735}, "ground_truth": 1}, {"key": "84ab5256295369509813877f5000bb7e01e7f641", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4415538790635166, "res": {"No": 0.5584223909118785, "Yes": 0.4415538790635166}, "ground_truth": 0}, {"key": "84ab5256295369509813877f5000bb7e01e7f641", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5105795632226313, "res": {"Yes": 0.5105795632226313, "No": 0.4893973513614303}, "ground_truth": 0}, {"key": "0e3f2fa30b711144fd8e49dedd6c248542a9083c", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.26884687483222, "res": {"No": 0.7311333752449228, "Yes": 0.26884687483222}, "ground_truth": 0}, {"key": "0e3f2fa30b711144fd8e49dedd6c248542a9083c", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.37244410727674315, "res": {"No": 0.6275319016409648, "Yes": 0.37244410727674315}, "ground_truth": 0}, {"key": "0e3f2fa30b711144fd8e49dedd6c248542a9083c", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3789013606162708, "res": {"No": 0.6210779385576439, "Yes": 0.3789013606162708}, "ground_truth": 1}, {"key": "0e3f2fa30b711144fd8e49dedd6c248542a9083c", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3501856978195198, "res": {"No": 0.6497995163846697, "Yes": 0.3501856978195198}, "ground_truth": 0}, {"key": "0e3f2fa30b711144fd8e49dedd6c248542a9083c", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3596031560017771, "res": {"No": 0.6403812031651384, "Yes": 0.3596031560017771}, "ground_truth": 0}, {"key": "67ab697c3e2d707e3b7d070199b065b1541c8925", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3871947432457497, "res": {"No": 0.6127902752817301, "Yes": 0.3871947432457497}, "ground_truth": 0}, {"key": "67ab697c3e2d707e3b7d070199b065b1541c8925", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.48191168878810325, "res": {"No": 0.5180700210944891, "Yes": 0.48191168878810325}, "ground_truth": 0}, {"key": "67ab697c3e2d707e3b7d070199b065b1541c8925", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.45062622612657305, "res": {"No": 0.5493529762958556, "Yes": 0.45062622612657305}, "ground_truth": 1}, {"key": "67ab697c3e2d707e3b7d070199b065b1541c8925", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3995477725795521, "res": {"No": 0.6004389547490505, "Yes": 0.3995477725795521}, "ground_truth": 0}, {"key": "67ab697c3e2d707e3b7d070199b065b1541c8925", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5837740509948809, "res": {"Yes": 0.5837740509948809, "No": 0.41621099407761114}, "ground_truth": 0}, {"key": "71537a701b69838dcb68ae310c7b3bfb375c1344", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4146333679279882, "res": {"No": 0.585341612027646, "Yes": 0.4146333679279882}, "ground_truth": 0}, {"key": "71537a701b69838dcb68ae310c7b3bfb375c1344", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3635257301707185, "res": {"No": 0.636448134355015, "Yes": 0.3635257301707185}, "ground_truth": 0}, {"key": "71537a701b69838dcb68ae310c7b3bfb375c1344", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4880110646123813, "res": {"No": 0.5119592228425176, "Yes": 0.4880110646123813}, "ground_truth": 1}, {"key": "71537a701b69838dcb68ae310c7b3bfb375c1344", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.430522022840014, "res": {"No": 0.5694598494414479, "Yes": 0.430522022840014}, "ground_truth": 0}, {"key": "71537a701b69838dcb68ae310c7b3bfb375c1344", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4352060421152065, "res": {"No": 0.5647614778889511, "Yes": 0.4352060421152065}, "ground_truth": 0}, {"key": "83989da5142972bb45c7f1e25ea60c6b101a9987", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.43095472965425136, "res": {"No": 0.5690208513049979, "Yes": 0.43095472965425136}, "ground_truth": 0}, {"key": "83989da5142972bb45c7f1e25ea60c6b101a9987", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.41775057294728923, "res": {"No": 0.5822148158223449, "Yes": 0.41775057294728923}, "ground_truth": 0}, {"key": "83989da5142972bb45c7f1e25ea60c6b101a9987", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5276323735473047, "res": {"Yes": 0.5276323735473047, "No": 0.4723319531582516}, "ground_truth": 1}, {"key": "83989da5142972bb45c7f1e25ea60c6b101a9987", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.47011741327405987, "res": {"No": 0.5298481303540792, "Yes": 0.47011741327405987}, "ground_truth": 0}, {"key": "83989da5142972bb45c7f1e25ea60c6b101a9987", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4202740418005758, "res": {"No": 0.5797022322182477, "Yes": 0.4202740418005758}, "ground_truth": 0}, {"key": "cc432e8a26891d7fcb62b112f868e994532a7ba7", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2816694383628527, "res": {"No": 0.7183029736973174, "Yes": 0.2816694383628527}, "ground_truth": 0}, {"key": "cc432e8a26891d7fcb62b112f868e994532a7ba7", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3622360890633995, "res": {"No": 0.6377456454324946, "Yes": 0.3622360890633995}, "ground_truth": 0}, {"key": "cc432e8a26891d7fcb62b112f868e994532a7ba7", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5127481448890574, "res": {"Yes": 0.5127481448890574, "No": 0.4872096771482246}, "ground_truth": 1}, {"key": "cc432e8a26891d7fcb62b112f868e994532a7ba7", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.40042876238205577, "res": {"No": 0.59952776019919, "Yes": 0.40042876238205577}, "ground_truth": 0}, {"key": "cc432e8a26891d7fcb62b112f868e994532a7ba7", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4295299232584147, "res": {"No": 0.5704114993126338, "Yes": 0.4295299232584147}, "ground_truth": 0}, {"key": "ef024e4ca04bd445d2915e7fe5c2c3c9b6f723a2", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3563753676898468, "res": {"No": 0.643584182993173, "Yes": 0.3563753676898468}, "ground_truth": 0}, {"key": "ef024e4ca04bd445d2915e7fe5c2c3c9b6f723a2", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3454486403278271, "res": {"No": 0.6545156516594066, "Yes": 0.3454486403278271}, "ground_truth": 0}, {"key": "ef024e4ca04bd445d2915e7fe5c2c3c9b6f723a2", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.48206240613661666, "res": {"No": 0.517904011437067, "Yes": 0.48206240613661666}, "ground_truth": 1}, {"key": "ef024e4ca04bd445d2915e7fe5c2c3c9b6f723a2", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.2893675498881418, "res": {"No": 0.7106018442902298, "Yes": 0.2893675498881418}, "ground_truth": 0}, {"key": "ef024e4ca04bd445d2915e7fe5c2c3c9b6f723a2", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.37502244555110753, "res": {"No": 0.6249495203172531, "Yes": 0.37502244555110753}, "ground_truth": 0}, {"key": "dfc606b97c43ac249d72cff2fe812213987a37a2", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.23182942843203128, "res": {"No": 0.7681599801810934, "Yes": 0.23182942843203128}, "ground_truth": 0}, {"key": "dfc606b97c43ac249d72cff2fe812213987a37a2", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.44081931678326364, "res": {"No": 0.5591525192604397, "Yes": 0.44081931678326364}, "ground_truth": 0}, {"key": "dfc606b97c43ac249d72cff2fe812213987a37a2", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3765119660160835, "res": {"No": 0.623467543842764, "Yes": 0.3765119660160835}, "ground_truth": 1}, {"key": "dfc606b97c43ac249d72cff2fe812213987a37a2", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4882274185814846, "res": {"No": 0.5117467526703509, "Yes": 0.4882274185814846}, "ground_truth": 0}, {"key": "dfc606b97c43ac249d72cff2fe812213987a37a2", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5428248342225196, "res": {"Yes": 0.5428248342225196, "No": 0.4571522923257706}, "ground_truth": 0}, {"key": "087ac92e2ca469b5ca6277950e63b2f0706d0de2", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.48336515329459767, "res": {"No": 0.5166056042727155, "Yes": 0.48336515329459767}, "ground_truth": 0}, {"key": "087ac92e2ca469b5ca6277950e63b2f0706d0de2", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3358975756047799, "res": {"No": 0.6640777387021138, "Yes": 0.3358975756047799}, "ground_truth": 0}, {"key": "087ac92e2ca469b5ca6277950e63b2f0706d0de2", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.37052024254110427, "res": {"No": 0.6294556378640425, "Yes": 0.37052024254110427}, "ground_truth": 1}, {"key": "087ac92e2ca469b5ca6277950e63b2f0706d0de2", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4063993833770081, "res": {"No": 0.5935709897726668, "Yes": 0.4063993833770081}, "ground_truth": 0}, {"key": "087ac92e2ca469b5ca6277950e63b2f0706d0de2", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3891686429017342, "res": {"No": 0.610813532813396, "Yes": 0.3891686429017342}, "ground_truth": 0}, {"key": "a1abb9ce1c76a6c390233d8ec934cfb34bc35215", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.461089920357878, "res": {"No": 0.5388898709537964, "Yes": 0.461089920357878}, "ground_truth": 0}, {"key": "a1abb9ce1c76a6c390233d8ec934cfb34bc35215", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3652817519603754, "res": {"No": 0.6346680326605282, "Yes": 0.3652817519603754}, "ground_truth": 0}, {"key": "a1abb9ce1c76a6c390233d8ec934cfb34bc35215", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6612380749991457, "res": {"Yes": 0.6612380749991457, "No": 0.33872859397587657}, "ground_truth": 1}, {"key": "a1abb9ce1c76a6c390233d8ec934cfb34bc35215", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4664359999816389, "res": {"No": 0.5335417790820174, "Yes": 0.4664359999816389}, "ground_truth": 0}, {"key": "a1abb9ce1c76a6c390233d8ec934cfb34bc35215", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4160457287010576, "res": {"No": 0.5839274694582628, "Yes": 0.4160457287010576}, "ground_truth": 0}, {"key": "99ec57979392a7f9c1a24158d5e1262cdf113f45", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.03590573309494386, "res": {"No": 0.9640765642785556, "Yes": 0.03590573309494386}, "ground_truth": 0}, {"key": "99ec57979392a7f9c1a24158d5e1262cdf113f45", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2089015480423938, "res": {"No": 0.7910643197855807, "Yes": 0.2089015480423938}, "ground_truth": 0}, {"key": "99ec57979392a7f9c1a24158d5e1262cdf113f45", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2728031789604222, "res": {"No": 0.7271699353511397, "Yes": 0.2728031789604222}, "ground_truth": 1}, {"key": "99ec57979392a7f9c1a24158d5e1262cdf113f45", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.35302090600228264, "res": {"No": 0.6469560213341132, "Yes": 0.35302090600228264}, "ground_truth": 0}, {"key": "99ec57979392a7f9c1a24158d5e1262cdf113f45", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2205348161913887, "res": {"No": 0.7794427482051005, "Yes": 0.2205348161913887}, "ground_truth": 0}, {"key": "ca5c0c816376354b82a4a7b9fa9a31b655e2d2cd", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3401388712193472, "res": {"No": 0.6598362043430759, "Yes": 0.3401388712193472}, "ground_truth": 0}, {"key": "ca5c0c816376354b82a4a7b9fa9a31b655e2d2cd", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4347856621564195, "res": {"No": 0.5651616175380078, "Yes": 0.4347856621564195}, "ground_truth": 0}, {"key": "ca5c0c816376354b82a4a7b9fa9a31b655e2d2cd", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4351812360777847, "res": {"No": 0.5647648269344449, "Yes": 0.4351812360777847}, "ground_truth": 1}, {"key": "ca5c0c816376354b82a4a7b9fa9a31b655e2d2cd", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5516005698378398, "res": {"Yes": 0.5516005698378398, "No": 0.4483617027979526}, "ground_truth": 0}, {"key": "ca5c0c816376354b82a4a7b9fa9a31b655e2d2cd", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2999286097933979, "res": {"No": 0.7000482134174176, "Yes": 0.2999286097933979}, "ground_truth": 0}, {"key": "e9787bf9462b12b7dfc0b562510a551e7d07b4d7", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.38394226087678235, "res": {"No": 0.6160391637212607, "Yes": 0.38394226087678235}, "ground_truth": 0}, {"key": "e9787bf9462b12b7dfc0b562510a551e7d07b4d7", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3479062959421214, "res": {"No": 0.6520792339342153, "Yes": 0.3479062959421214}, "ground_truth": 0}, {"key": "e9787bf9462b12b7dfc0b562510a551e7d07b4d7", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2956112004690225, "res": {"No": 0.7043745732813695, "Yes": 0.2956112004690225}, "ground_truth": 1}, {"key": "e9787bf9462b12b7dfc0b562510a551e7d07b4d7", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.37052950566295634, "res": {"No": 0.6294533907114264, "Yes": 0.37052950566295634}, "ground_truth": 0}, {"key": "e9787bf9462b12b7dfc0b562510a551e7d07b4d7", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3066606553251145, "res": {"No": 0.6933280951885061, "Yes": 0.3066606553251145}, "ground_truth": 0}, {"key": "ed8a7d5b06b4b5415725e624a15ab5ada7146894", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2405061302875637, "res": {"No": 0.7594746308044287, "Yes": 0.2405061302875637}, "ground_truth": 0}, {"key": "ed8a7d5b06b4b5415725e624a15ab5ada7146894", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3231299224330886, "res": {"No": 0.6768538420486333, "Yes": 0.3231299224330886}, "ground_truth": 0}, {"key": "ed8a7d5b06b4b5415725e624a15ab5ada7146894", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.390570819574792, "res": {"No": 0.6094073387013518, "Yes": 0.390570819574792}, "ground_truth": 1}, {"key": "ed8a7d5b06b4b5415725e624a15ab5ada7146894", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.434069421630341, "res": {"No": 0.5659033019420007, "Yes": 0.434069421630341}, "ground_truth": 0}, {"key": "ed8a7d5b06b4b5415725e624a15ab5ada7146894", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2911228313532155, "res": {"No": 0.7088578473095877, "Yes": 0.2911228313532155}, "ground_truth": 0}, {"key": "4b6f2532f6bb4cf4ae341e4fb72d384ef58f0f42", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.259971700011602, "res": {"No": 0.7400115165497165, "Yes": 0.259971700011602}, "ground_truth": 0}, {"key": "4b6f2532f6bb4cf4ae341e4fb72d384ef58f0f42", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.40667798432614816, "res": {"No": 0.5932826792850504, "Yes": 0.40667798432614816}, "ground_truth": 0}, {"key": "4b6f2532f6bb4cf4ae341e4fb72d384ef58f0f42", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5940070446557517, "res": {"Yes": 0.5940070446557517, "No": 0.4059577982419316}, "ground_truth": 1}, {"key": "4b6f2532f6bb4cf4ae341e4fb72d384ef58f0f42", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.24096600826084758, "res": {"No": 0.7590076671428839, "Yes": 0.24096600826084758}, "ground_truth": 0}, {"key": "4b6f2532f6bb4cf4ae341e4fb72d384ef58f0f42", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.18890234321979893, "res": {"No": 0.8110780907863334, "Yes": 0.18890234321979893}, "ground_truth": 0}, {"key": "46102a68e2927f930c1c2bd117ca714d188cd1ab", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.42867452113480764, "res": {"No": 0.571308520004238, "Yes": 0.42867452113480764}, "ground_truth": 0}, {"key": "46102a68e2927f930c1c2bd117ca714d188cd1ab", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.49793468259869855, "res": {"No": 0.5020477722367348, "Yes": 0.49793468259869855}, "ground_truth": 0}, {"key": "46102a68e2927f930c1c2bd117ca714d188cd1ab", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5706953781702233, "res": {"Yes": 0.5706953781702233, "No": 0.4292857490871111}, "ground_truth": 1}, {"key": "46102a68e2927f930c1c2bd117ca714d188cd1ab", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4162772809563706, "res": {"No": 0.5836977975712002, "Yes": 0.4162772809563706}, "ground_truth": 0}, {"key": "46102a68e2927f930c1c2bd117ca714d188cd1ab", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4134591624900561, "res": {"No": 0.5865230562893465, "Yes": 0.4134591624900561}, "ground_truth": 0}, {"key": "ee6568a8880e61a498aab80c41f70939d567c991", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.0566106584309784, "res": {"No": 0.9433589631170619, "Yes": 0.0566106584309784}, "ground_truth": 0}, {"key": "ee6568a8880e61a498aab80c41f70939d567c991", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.28090613629155065, "res": {"No": 0.7190627004404629, "Yes": 0.28090613629155065}, "ground_truth": 0}, {"key": "ee6568a8880e61a498aab80c41f70939d567c991", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.43208159554368236, "res": {"No": 0.5678814652383497, "Yes": 0.43208159554368236}, "ground_truth": 1}, {"key": "ee6568a8880e61a498aab80c41f70939d567c991", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3335017387489565, "res": {"No": 0.6664645827067457, "Yes": 0.3335017387489565}, "ground_truth": 0}, {"key": "ee6568a8880e61a498aab80c41f70939d567c991", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4402756988494972, "res": {"No": 0.5596925073937271, "Yes": 0.4402756988494972}, "ground_truth": 0}, {"key": "8f462be265750ebbf85a6ad8ef28cb39c59f826e", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4508423306722119, "res": {"No": 0.5491375623039974, "Yes": 0.4508423306722119}, "ground_truth": 0}, {"key": "8f462be265750ebbf85a6ad8ef28cb39c59f826e", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4594504856550991, "res": {"No": 0.5405282104596758, "Yes": 0.4594504856550991}, "ground_truth": 0}, {"key": "8f462be265750ebbf85a6ad8ef28cb39c59f826e", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4678424897859716, "res": {"No": 0.5321408654549585, "Yes": 0.4678424897859716}, "ground_truth": 1}, {"key": "8f462be265750ebbf85a6ad8ef28cb39c59f826e", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4478321687866743, "res": {"No": 0.5521462622271512, "Yes": 0.4478321687866743}, "ground_truth": 0}, {"key": "8f462be265750ebbf85a6ad8ef28cb39c59f826e", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4804479697469591, "res": {"No": 0.5195337002490927, "Yes": 0.4804479697469591}, "ground_truth": 0}, {"key": "b21258342db561f79656c3c75f2e8c8244dd6178", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.32145529380898336, "res": {"No": 0.6785210336273606, "Yes": 0.32145529380898336}, "ground_truth": 0}, {"key": "b21258342db561f79656c3c75f2e8c8244dd6178", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.24380511057330823, "res": {"No": 0.7561699246527248, "Yes": 0.24380511057330823}, "ground_truth": 0}, {"key": "b21258342db561f79656c3c75f2e8c8244dd6178", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.20102777427428967, "res": {"No": 0.7989445618828326, "Yes": 0.20102777427428967}, "ground_truth": 1}, {"key": "b21258342db561f79656c3c75f2e8c8244dd6178", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5159245216871601, "res": {"Yes": 0.5159245216871601, "No": 0.4840417333756518}, "ground_truth": 0}, {"key": "b21258342db561f79656c3c75f2e8c8244dd6178", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.282330587089846, "res": {"No": 0.7176472470745814, "Yes": 0.282330587089846}, "ground_truth": 0}, {"key": "c7b97c7c2ca7b9bedff4978dd3cae9aaef8f8100", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.5911482777892614, "res": {"Yes": 0.5911482777892614, "No": 0.40881624607268024}, "ground_truth": 0}, {"key": "c7b97c7c2ca7b9bedff4978dd3cae9aaef8f8100", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5426788339800267, "res": {"Yes": 0.5426788339800267, "No": 0.4572961670729429}, "ground_truth": 0}, {"key": "c7b97c7c2ca7b9bedff4978dd3cae9aaef8f8100", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.42885539134126494, "res": {"No": 0.5711180203803946, "Yes": 0.42885539134126494}, "ground_truth": 1}, {"key": "c7b97c7c2ca7b9bedff4978dd3cae9aaef8f8100", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.46434414957981707, "res": {"No": 0.5356406349238062, "Yes": 0.46434414957981707}, "ground_truth": 0}, {"key": "c7b97c7c2ca7b9bedff4978dd3cae9aaef8f8100", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.48162607693195364, "res": {"No": 0.5183489369756569, "Yes": 0.48162607693195364}, "ground_truth": 0}, {"key": "bdbeb9265050f2a4ce200c3802775694b7dc20e6", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.29969496647872795, "res": {"No": 0.7002891204482725, "Yes": 0.29969496647872795}, "ground_truth": 0}, {"key": "bdbeb9265050f2a4ce200c3802775694b7dc20e6", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3082102496992562, "res": {"No": 0.6917742257271655, "Yes": 0.3082102496992562}, "ground_truth": 0}, {"key": "bdbeb9265050f2a4ce200c3802775694b7dc20e6", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.37293490201448193, "res": {"No": 0.6270478659719401, "Yes": 0.37293490201448193}, "ground_truth": 1}, {"key": "bdbeb9265050f2a4ce200c3802775694b7dc20e6", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5040755355587998, "res": {"Yes": 0.5040755355587998, "No": 0.49590101435806494}, "ground_truth": 0}, {"key": "bdbeb9265050f2a4ce200c3802775694b7dc20e6", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4809101137692678, "res": {"No": 0.5190642695776673, "Yes": 0.4809101137692678}, "ground_truth": 0}, {"key": "42e93d5aef7547b8169f1f6c7735d0265a030580", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2714184915411052, "res": {"No": 0.7285708525084662, "Yes": 0.2714184915411052}, "ground_truth": 0}, {"key": "42e93d5aef7547b8169f1f6c7735d0265a030580", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3903673734660345, "res": {"No": 0.6096117803958989, "Yes": 0.3903673734660345}, "ground_truth": 0}, {"key": "42e93d5aef7547b8169f1f6c7735d0265a030580", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.36926288615280817, "res": {"No": 0.6307197067264835, "Yes": 0.36926288615280817}, "ground_truth": 1}, {"key": "42e93d5aef7547b8169f1f6c7735d0265a030580", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3772784613168853, "res": {"No": 0.6227072153312955, "Yes": 0.3772784613168853}, "ground_truth": 0}, {"key": "42e93d5aef7547b8169f1f6c7735d0265a030580", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3311952825768024, "res": {"No": 0.6687955706938155, "Yes": 0.3311952825768024}, "ground_truth": 0}, {"key": "6e7a607934780f0011aa4e84509314efe15c2685", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.11522152490967844, "res": {"No": 0.8847649389144898, "Yes": 0.11522152490967844}, "ground_truth": 0}, {"key": "6e7a607934780f0011aa4e84509314efe15c2685", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.38574683213327554, "res": {"No": 0.6142316388559671, "Yes": 0.38574683213327554}, "ground_truth": 0}, {"key": "6e7a607934780f0011aa4e84509314efe15c2685", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4711113975902623, "res": {"No": 0.5288660726346227, "Yes": 0.4711113975902623}, "ground_truth": 1}, {"key": "6e7a607934780f0011aa4e84509314efe15c2685", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4671267709566745, "res": {"No": 0.532847064172527, "Yes": 0.4671267709566745}, "ground_truth": 0}, {"key": "6e7a607934780f0011aa4e84509314efe15c2685", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.30198784824987984, "res": {"No": 0.6979914329539708, "Yes": 0.30198784824987984}, "ground_truth": 0}, {"key": "c087e2af172aed145b177c9a1a68029205bc732a", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.33839517825737525, "res": {"No": 0.6615795339009471, "Yes": 0.33839517825737525}, "ground_truth": 0}, {"key": "c087e2af172aed145b177c9a1a68029205bc732a", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3219843297482604, "res": {"No": 0.6780011465272702, "Yes": 0.3219843297482604}, "ground_truth": 0}, {"key": "c087e2af172aed145b177c9a1a68029205bc732a", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.38194450721454776, "res": {"No": 0.6180256361138167, "Yes": 0.38194450721454776}, "ground_truth": 1}, {"key": "c087e2af172aed145b177c9a1a68029205bc732a", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.2650387068704758, "res": {"No": 0.734943965566502, "Yes": 0.2650387068704758}, "ground_truth": 0}, {"key": "c087e2af172aed145b177c9a1a68029205bc732a", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.38351382855936067, "res": {"No": 0.6164595320351219, "Yes": 0.38351382855936067}, "ground_truth": 0}, {"key": "40e79de5b7a27d7161b147725936d1d47e45c8f9", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.28135963164834626, "res": {"No": 0.7186085401106802, "Yes": 0.28135963164834626}, "ground_truth": 0}, {"key": "40e79de5b7a27d7161b147725936d1d47e45c8f9", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.35126582709644993, "res": {"No": 0.6487131160442353, "Yes": 0.35126582709644993}, "ground_truth": 0}, {"key": "40e79de5b7a27d7161b147725936d1d47e45c8f9", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3706524677512177, "res": {"No": 0.6293272420107404, "Yes": 0.3706524677512177}, "ground_truth": 1}, {"key": "40e79de5b7a27d7161b147725936d1d47e45c8f9", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.42142861723937036, "res": {"No": 0.5785461038605908, "Yes": 0.42142861723937036}, "ground_truth": 0}, {"key": "40e79de5b7a27d7161b147725936d1d47e45c8f9", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.24413129601252098, "res": {"No": 0.7558512737499848, "Yes": 0.24413129601252098}, "ground_truth": 0}, {"key": "e878909ace599352d3d578796968bbd52cb299bd", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.5001552643820115, "res": {"Yes": 0.5001552643820115, "No": 0.49982530080689164}, "ground_truth": 0}, {"key": "e878909ace599352d3d578796968bbd52cb299bd", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5303191414386011, "res": {"Yes": 0.5303191414386011, "No": 0.4696593191421258}, "ground_truth": 0}, {"key": "e878909ace599352d3d578796968bbd52cb299bd", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5286791779521585, "res": {"Yes": 0.5286791779521585, "No": 0.47129625084813276}, "ground_truth": 1}, {"key": "e878909ace599352d3d578796968bbd52cb299bd", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3917961617390116, "res": {"No": 0.6081849495102049, "Yes": 0.3917961617390116}, "ground_truth": 0}, {"key": "e878909ace599352d3d578796968bbd52cb299bd", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3149801367755912, "res": {"No": 0.6849997538931838, "Yes": 0.3149801367755912}, "ground_truth": 0}, {"key": "470708aab0fe600a27423bf4a2f8c9860fe5f64b", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3840409467184152, "res": {"No": 0.6159390285340146, "Yes": 0.3840409467184152}, "ground_truth": 0}, {"key": "470708aab0fe600a27423bf4a2f8c9860fe5f64b", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.32680931916045924, "res": {"No": 0.6731808926103723, "Yes": 0.32680931916045924}, "ground_truth": 0}, {"key": "470708aab0fe600a27423bf4a2f8c9860fe5f64b", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.35727695987821695, "res": {"No": 0.6427067654845757, "Yes": 0.35727695987821695}, "ground_truth": 1}, {"key": "470708aab0fe600a27423bf4a2f8c9860fe5f64b", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.39892304913611654, "res": {"No": 0.6010561026309984, "Yes": 0.39892304913611654}, "ground_truth": 0}, {"key": "470708aab0fe600a27423bf4a2f8c9860fe5f64b", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.422809777357154, "res": {"No": 0.5771721692889484, "Yes": 0.422809777357154}, "ground_truth": 0}, {"key": "16e8cb520c874af6825d4661f6127a67efd4b6f6", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2622180944308153, "res": {"No": 0.7377620366068067, "Yes": 0.2622180944308153}, "ground_truth": 0}, {"key": "16e8cb520c874af6825d4661f6127a67efd4b6f6", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2765992143883748, "res": {"No": 0.7233769669230532, "Yes": 0.2765992143883748}, "ground_truth": 0}, {"key": "16e8cb520c874af6825d4661f6127a67efd4b6f6", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.42753452227760386, "res": {"No": 0.572435141458708, "Yes": 0.42753452227760386}, "ground_truth": 1}, {"key": "16e8cb520c874af6825d4661f6127a67efd4b6f6", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.29334842870728034, "res": {"No": 0.7066290376573748, "Yes": 0.29334842870728034}, "ground_truth": 0}, {"key": "16e8cb520c874af6825d4661f6127a67efd4b6f6", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3176479134496398, "res": {"No": 0.6823342080776925, "Yes": 0.3176479134496398}, "ground_truth": 0}, {"key": "38673348ff28676905791fe3e8db2bbda814d974", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.11232669881905717, "res": {"No": 0.8876578804401857, "Yes": 0.11232669881905717}, "ground_truth": 0}, {"key": "38673348ff28676905791fe3e8db2bbda814d974", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.20130681338281894, "res": {"No": 0.7986722560869307, "Yes": 0.20130681338281894}, "ground_truth": 0}, {"key": "38673348ff28676905791fe3e8db2bbda814d974", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5270462359704097, "res": {"Yes": 0.5270462359704097, "No": 0.4729244397877585}, "ground_truth": 1}, {"key": "38673348ff28676905791fe3e8db2bbda814d974", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3790960636203652, "res": {"No": 0.6208817399790593, "Yes": 0.3790960636203652}, "ground_truth": 0}, {"key": "38673348ff28676905791fe3e8db2bbda814d974", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.36918863485262127, "res": {"No": 0.6307516219510921, "Yes": 0.36918863485262127}, "ground_truth": 0}, {"key": "914a0163e25ecccb635b601837cc4dd552c66ce0", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3154582296190971, "res": {"No": 0.6845076009038132, "Yes": 0.3154582296190971}, "ground_truth": 0}, {"key": "914a0163e25ecccb635b601837cc4dd552c66ce0", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2657929385062581, "res": {"No": 0.7341787877408604, "Yes": 0.2657929385062581}, "ground_truth": 0}, {"key": "914a0163e25ecccb635b601837cc4dd552c66ce0", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3430443293772769, "res": {"No": 0.656928410937458, "Yes": 0.3430443293772769}, "ground_truth": 1}, {"key": "914a0163e25ecccb635b601837cc4dd552c66ce0", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.39072804424953145, "res": {"No": 0.6092539518988204, "Yes": 0.39072804424953145}, "ground_truth": 0}, {"key": "914a0163e25ecccb635b601837cc4dd552c66ce0", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.30801561476643713, "res": {"No": 0.6919705791136544, "Yes": 0.30801561476643713}, "ground_truth": 0}, {"key": "851c7314af56b6f47742970e11696948323dfe94", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2624272967694686, "res": {"No": 0.7375526525709509, "Yes": 0.2624272967694686}, "ground_truth": 0}, {"key": "851c7314af56b6f47742970e11696948323dfe94", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5614541861663632, "res": {"Yes": 0.5614541861663632, "No": 0.43851879765689916}, "ground_truth": 0}, {"key": "851c7314af56b6f47742970e11696948323dfe94", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3628610864190243, "res": {"No": 0.6371192597347195, "Yes": 0.3628610864190243}, "ground_truth": 1}, {"key": "851c7314af56b6f47742970e11696948323dfe94", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4155339746162395, "res": {"No": 0.5844386120608265, "Yes": 0.4155339746162395}, "ground_truth": 0}, {"key": "851c7314af56b6f47742970e11696948323dfe94", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.372277959786338, "res": {"No": 0.6276947798542415, "Yes": 0.372277959786338}, "ground_truth": 0}, {"key": "f29f04e8f0615b768dd756c4387e87e27b7b4c2d", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.07209488358278415, "res": {"No": 0.9278878451676954, "Yes": 0.07209488358278415}, "ground_truth": 0}, {"key": "f29f04e8f0615b768dd756c4387e87e27b7b4c2d", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2669795807761991, "res": {"No": 0.7329986198322903, "Yes": 0.2669795807761991}, "ground_truth": 0}, {"key": "f29f04e8f0615b768dd756c4387e87e27b7b4c2d", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.19706197614489201, "res": {"No": 0.8029175080608822, "Yes": 0.19706197614489201}, "ground_truth": 1}, {"key": "f29f04e8f0615b768dd756c4387e87e27b7b4c2d", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.2512794782808803, "res": {"No": 0.7487082430418308, "Yes": 0.2512794782808803}, "ground_truth": 0}, {"key": "f29f04e8f0615b768dd756c4387e87e27b7b4c2d", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.18408427682726744, "res": {"No": 0.8158947379085782, "Yes": 0.18408427682726744}, "ground_truth": 0}, {"key": "8451164e86012e6dbf2fd39f7e6ca784ff9f3624", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4237784558897183, "res": {"No": 0.57619962911845, "Yes": 0.4237784558897183}, "ground_truth": 0}, {"key": "8451164e86012e6dbf2fd39f7e6ca784ff9f3624", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3657592313107006, "res": {"No": 0.63421401399479, "Yes": 0.3657592313107006}, "ground_truth": 0}, {"key": "8451164e86012e6dbf2fd39f7e6ca784ff9f3624", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3625122544063123, "res": {"No": 0.6374641874987895, "Yes": 0.3625122544063123}, "ground_truth": 1}, {"key": "8451164e86012e6dbf2fd39f7e6ca784ff9f3624", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4144628021208112, "res": {"No": 0.5855094242512716, "Yes": 0.4144628021208112}, "ground_truth": 0}, {"key": "8451164e86012e6dbf2fd39f7e6ca784ff9f3624", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3524181789947246, "res": {"No": 0.6475586178551302, "Yes": 0.3524181789947246}, "ground_truth": 0}, {"key": "18765fda59852b1c13eb226a2da9ab0979739f44", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3153203414256631, "res": {"No": 0.684660023488969, "Yes": 0.3153203414256631}, "ground_truth": 0}, {"key": "18765fda59852b1c13eb226a2da9ab0979739f44", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.39087357846999987, "res": {"No": 0.6091090093498193, "Yes": 0.39087357846999987}, "ground_truth": 0}, {"key": "18765fda59852b1c13eb226a2da9ab0979739f44", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.43260853331953264, "res": {"No": 0.5673726273593107, "Yes": 0.43260853331953264}, "ground_truth": 1}, {"key": "18765fda59852b1c13eb226a2da9ab0979739f44", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3293083609013598, "res": {"No": 0.6706751885002746, "Yes": 0.3293083609013598}, "ground_truth": 0}, {"key": "18765fda59852b1c13eb226a2da9ab0979739f44", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.27543943808971594, "res": {"No": 0.7245452516700364, "Yes": 0.27543943808971594}, "ground_truth": 0}, {"key": "fba410776268aaf447802e66ac94e4af54f7cb7a", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3947183406799822, "res": {"No": 0.6052604607253893, "Yes": 0.3947183406799822}, "ground_truth": 0}, {"key": "fba410776268aaf447802e66ac94e4af54f7cb7a", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.41423378353822893, "res": {"No": 0.5857397503984976, "Yes": 0.41423378353822893}, "ground_truth": 0}, {"key": "fba410776268aaf447802e66ac94e4af54f7cb7a", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.34328547134283716, "res": {"No": 0.6566884459807898, "Yes": 0.34328547134283716}, "ground_truth": 1}, {"key": "fba410776268aaf447802e66ac94e4af54f7cb7a", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.34899705332855846, "res": {"No": 0.6509834041211348, "Yes": 0.34899705332855846}, "ground_truth": 0}, {"key": "fba410776268aaf447802e66ac94e4af54f7cb7a", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3449622114426984, "res": {"No": 0.6550178773861755, "Yes": 0.3449622114426984}, "ground_truth": 0}, {"key": "31494e5d6eda2042c4793ae68cea4b8304dfa0ad", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.34337387873367103, "res": {"No": 0.6566082560842613, "Yes": 0.34337387873367103}, "ground_truth": 0}, {"key": "31494e5d6eda2042c4793ae68cea4b8304dfa0ad", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.23733142428822762, "res": {"No": 0.762636614493597, "Yes": 0.23733142428822762}, "ground_truth": 0}, {"key": "31494e5d6eda2042c4793ae68cea4b8304dfa0ad", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.28349567830630307, "res": {"No": 0.7164862445600577, "Yes": 0.28349567830630307}, "ground_truth": 1}, {"key": "31494e5d6eda2042c4793ae68cea4b8304dfa0ad", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.2808896194963353, "res": {"No": 0.7190904352236809, "Yes": 0.2808896194963353}, "ground_truth": 0}, {"key": "31494e5d6eda2042c4793ae68cea4b8304dfa0ad", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4570783310684994, "res": {"No": 0.5429005852849157, "Yes": 0.4570783310684994}, "ground_truth": 0}, {"key": "564bdb9bbcd2276bdfbdce6eb1e2058f0921bd82", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.13099709913942786, "res": {"No": 0.8689835532154901, "Yes": 0.13099709913942786}, "ground_truth": 0}, {"key": "564bdb9bbcd2276bdfbdce6eb1e2058f0921bd82", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3991522723236429, "res": {"No": 0.6008144365155964, "Yes": 0.3991522723236429}, "ground_truth": 0}, {"key": "564bdb9bbcd2276bdfbdce6eb1e2058f0921bd82", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4012563014953179, "res": {"No": 0.598718015783188, "Yes": 0.4012563014953179}, "ground_truth": 1}, {"key": "564bdb9bbcd2276bdfbdce6eb1e2058f0921bd82", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.39702520962346943, "res": {"No": 0.6029412298138811, "Yes": 0.39702520962346943}, "ground_truth": 0}, {"key": "564bdb9bbcd2276bdfbdce6eb1e2058f0921bd82", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3189522735775298, "res": {"No": 0.681025519477158, "Yes": 0.3189522735775298}, "ground_truth": 0}, {"key": "acbacec078274c02aeae06061357d02d950fec33", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.30904666316710605, "res": {"No": 0.6909432402844861, "Yes": 0.30904666316710605}, "ground_truth": 0}, {"key": "acbacec078274c02aeae06061357d02d950fec33", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3945558251851564, "res": {"No": 0.6054199073289402, "Yes": 0.3945558251851564}, "ground_truth": 0}, {"key": "acbacec078274c02aeae06061357d02d950fec33", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4081285012995758, "res": {"No": 0.5918492037150311, "Yes": 0.4081285012995758}, "ground_truth": 1}, {"key": "acbacec078274c02aeae06061357d02d950fec33", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.516210970242881, "res": {"Yes": 0.516210970242881, "No": 0.48376299594326777}, "ground_truth": 0}, {"key": "acbacec078274c02aeae06061357d02d950fec33", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.48848896368807726, "res": {"No": 0.5114947794481598, "Yes": 0.48848896368807726}, "ground_truth": 0}, {"key": "04327bc3e426eee01ed61d369fd52415aa300ddc", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.21967901956210462, "res": {"No": 0.7803032755965819, "Yes": 0.21967901956210462}, "ground_truth": 0}, {"key": "04327bc3e426eee01ed61d369fd52415aa300ddc", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.1166387182176324, "res": {"No": 0.8833373195341422, "Yes": 0.1166387182176324}, "ground_truth": 0}, {"key": "04327bc3e426eee01ed61d369fd52415aa300ddc", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.21582055078338913, "res": {"No": 0.784165267864131, "Yes": 0.21582055078338913}, "ground_truth": 1}, {"key": "04327bc3e426eee01ed61d369fd52415aa300ddc", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.25300948521221844, "res": {"No": 0.7469669488566402, "Yes": 0.25300948521221844}, "ground_truth": 0}, {"key": "04327bc3e426eee01ed61d369fd52415aa300ddc", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.0798048260131286, "res": {"No": 0.9201703988022787, "Yes": 0.0798048260131286}, "ground_truth": 0}, {"key": "b1f3d2adb48370e5ecadff8e6e3baff407b01f0e", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.43923547069143537, "res": {"No": 0.5607319753072034, "Yes": 0.43923547069143537}, "ground_truth": 0}, {"key": "b1f3d2adb48370e5ecadff8e6e3baff407b01f0e", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4203913566561689, "res": {"No": 0.5795665977657585, "Yes": 0.4203913566561689}, "ground_truth": 0}, {"key": "b1f3d2adb48370e5ecadff8e6e3baff407b01f0e", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.44197895993255215, "res": {"No": 0.5579810766441404, "Yes": 0.44197895993255215}, "ground_truth": 1}, {"key": "b1f3d2adb48370e5ecadff8e6e3baff407b01f0e", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5378691521597864, "res": {"Yes": 0.5378691521597864, "No": 0.46211211693190296}, "ground_truth": 0}, {"key": "b1f3d2adb48370e5ecadff8e6e3baff407b01f0e", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.41520000296056625, "res": {"No": 0.5847780707308036, "Yes": 0.41520000296056625}, "ground_truth": 0}, {"key": "d7eaf2e5ca91bf54c05ead867e5e90728d44e218", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.06376305063830887, "res": {"No": 0.9362285171950717, "Yes": 0.06376305063830887}, "ground_truth": 0}, {"key": "d7eaf2e5ca91bf54c05ead867e5e90728d44e218", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.40922293421765615, "res": {"No": 0.5907360956307419, "Yes": 0.40922293421765615}, "ground_truth": 0}, {"key": "d7eaf2e5ca91bf54c05ead867e5e90728d44e218", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.1520899738554057, "res": {"No": 0.8478966462841737, "Yes": 0.1520899738554057}, "ground_truth": 1}, {"key": "d7eaf2e5ca91bf54c05ead867e5e90728d44e218", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3153219495635051, "res": {"No": 0.684659681159043, "Yes": 0.3153219495635051}, "ground_truth": 0}, {"key": "d7eaf2e5ca91bf54c05ead867e5e90728d44e218", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.30568723731751324, "res": {"No": 0.6942877007392056, "Yes": 0.30568723731751324}, "ground_truth": 0}, {"key": "1fcce9f667de2a14d76aaa6cdb3f1e6f7900e463", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2803432874955929, "res": {"No": 0.7196244501967529, "Yes": 0.2803432874955929}, "ground_truth": 0}, {"key": "1fcce9f667de2a14d76aaa6cdb3f1e6f7900e463", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.41148683174365336, "res": {"No": 0.5884919095234555, "Yes": 0.41148683174365336}, "ground_truth": 0}, {"key": "1fcce9f667de2a14d76aaa6cdb3f1e6f7900e463", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4287231527308171, "res": {"No": 0.571249107007687, "Yes": 0.4287231527308171}, "ground_truth": 1}, {"key": "1fcce9f667de2a14d76aaa6cdb3f1e6f7900e463", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3293550272023288, "res": {"No": 0.6706244001913731, "Yes": 0.3293550272023288}, "ground_truth": 0}, {"key": "1fcce9f667de2a14d76aaa6cdb3f1e6f7900e463", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2650164445541291, "res": {"No": 0.7349636182308962, "Yes": 0.2650164445541291}, "ground_truth": 0}, {"key": "892f75a4d31560b60aa588faa6f4bf51322b8c18", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4725040550352407, "res": {"No": 0.5274703836222693, "Yes": 0.4725040550352407}, "ground_truth": 0}, {"key": "892f75a4d31560b60aa588faa6f4bf51322b8c18", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4392773889325005, "res": {"No": 0.5606945757317401, "Yes": 0.4392773889325005}, "ground_truth": 0}, {"key": "892f75a4d31560b60aa588faa6f4bf51322b8c18", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4937142107257586, "res": {"No": 0.5062674965865469, "Yes": 0.4937142107257586}, "ground_truth": 1}, {"key": "892f75a4d31560b60aa588faa6f4bf51322b8c18", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.45818602921972124, "res": {"No": 0.5417916830714483, "Yes": 0.45818602921972124}, "ground_truth": 0}, {"key": "892f75a4d31560b60aa588faa6f4bf51322b8c18", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4611675744387564, "res": {"No": 0.5388079389779542, "Yes": 0.4611675744387564}, "ground_truth": 0}, {"key": "acf12bd8d1142648b1b522a3a925e14e48b8f068", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3615193904340907, "res": {"No": 0.6384572241737744, "Yes": 0.3615193904340907}, "ground_truth": 0}, {"key": "acf12bd8d1142648b1b522a3a925e14e48b8f068", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3742139074546264, "res": {"No": 0.6257587409564607, "Yes": 0.3742139074546264}, "ground_truth": 0}, {"key": "acf12bd8d1142648b1b522a3a925e14e48b8f068", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.489334697829387, "res": {"No": 0.5106388449511593, "Yes": 0.489334697829387}, "ground_truth": 1}, {"key": "acf12bd8d1142648b1b522a3a925e14e48b8f068", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.354252550754903, "res": {"No": 0.6457170721137983, "Yes": 0.354252550754903}, "ground_truth": 0}, {"key": "acf12bd8d1142648b1b522a3a925e14e48b8f068", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.379834426861593, "res": {"No": 0.6201469579985641, "Yes": 0.379834426861593}, "ground_truth": 0}, {"key": "c8caeb08b05cc56e1ac29146793ceb07e4c0de69", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2619996267081785, "res": {"No": 0.7379844684971041, "Yes": 0.2619996267081785}, "ground_truth": 0}, {"key": "c8caeb08b05cc56e1ac29146793ceb07e4c0de69", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.26561770585263206, "res": {"No": 0.734355672715767, "Yes": 0.26561770585263206}, "ground_truth": 0}, {"key": "c8caeb08b05cc56e1ac29146793ceb07e4c0de69", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.41680716424628533, "res": {"No": 0.5831613341232011, "Yes": 0.41680716424628533}, "ground_truth": 1}, {"key": "c8caeb08b05cc56e1ac29146793ceb07e4c0de69", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5103035674749714, "res": {"Yes": 0.5103035674749714, "No": 0.48967046232097317}, "ground_truth": 0}, {"key": "c8caeb08b05cc56e1ac29146793ceb07e4c0de69", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3008903699537568, "res": {"No": 0.6990881751480268, "Yes": 0.3008903699537568}, "ground_truth": 0}, {"key": "ed7ed4440f5f7082230ae745d240350a4f20b8b7", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3641958159810325, "res": {"No": 0.6357875578370992, "Yes": 0.3641958159810325}, "ground_truth": 0}, {"key": "ed7ed4440f5f7082230ae745d240350a4f20b8b7", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4662169757608419, "res": {"No": 0.5337573201321735, "Yes": 0.4662169757608419}, "ground_truth": 0}, {"key": "ed7ed4440f5f7082230ae745d240350a4f20b8b7", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.49290670115453167, "res": {"No": 0.5070742177549076, "Yes": 0.49290670115453167}, "ground_truth": 1}, {"key": "ed7ed4440f5f7082230ae745d240350a4f20b8b7", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.40499650562469214, "res": {"No": 0.5949765416571658, "Yes": 0.40499650562469214}, "ground_truth": 0}, {"key": "ed7ed4440f5f7082230ae745d240350a4f20b8b7", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.30420173368848286, "res": {"No": 0.6957870417253629, "Yes": 0.30420173368848286}, "ground_truth": 0}, {"key": "25b7adf8bfe43c2953a874b18c7766370158638a", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.40913752193715747, "res": {"No": 0.5908201042764137, "Yes": 0.40913752193715747}, "ground_truth": 0}, {"key": "25b7adf8bfe43c2953a874b18c7766370158638a", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.18165109359349824, "res": {"No": 0.8183320464410423, "Yes": 0.18165109359349824}, "ground_truth": 0}, {"key": "25b7adf8bfe43c2953a874b18c7766370158638a", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.34544028057188453, "res": {"No": 0.6545160050979539, "Yes": 0.34544028057188453}, "ground_truth": 1}, {"key": "25b7adf8bfe43c2953a874b18c7766370158638a", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4646992299421807, "res": {"No": 0.535251312173591, "Yes": 0.4646992299421807}, "ground_truth": 0}, {"key": "25b7adf8bfe43c2953a874b18c7766370158638a", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.31561445165815616, "res": {"No": 0.6843608376747333, "Yes": 0.31561445165815616}, "ground_truth": 0}, {"key": "771c125e3b0c54f1c3367b2857f15d689f081163", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.40219840795220096, "res": {"No": 0.5977795132263405, "Yes": 0.40219840795220096}, "ground_truth": 0}, {"key": "771c125e3b0c54f1c3367b2857f15d689f081163", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.39345292098409307, "res": {"No": 0.6065276270669164, "Yes": 0.39345292098409307}, "ground_truth": 0}, {"key": "771c125e3b0c54f1c3367b2857f15d689f081163", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3383583967006125, "res": {"No": 0.6616212147245151, "Yes": 0.3383583967006125}, "ground_truth": 1}, {"key": "771c125e3b0c54f1c3367b2857f15d689f081163", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3944661686163866, "res": {"No": 0.6055144207530551, "Yes": 0.3944661686163866}, "ground_truth": 0}, {"key": "771c125e3b0c54f1c3367b2857f15d689f081163", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3875656985169186, "res": {"No": 0.6124117675006848, "Yes": 0.3875656985169186}, "ground_truth": 0}, {"key": "4ad70f84c363e5b594da00983e2b40600f12ca54", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.22594659792467187, "res": {"No": 0.7740372886519158, "Yes": 0.22594659792467187}, "ground_truth": 0}, {"key": "4ad70f84c363e5b594da00983e2b40600f12ca54", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3249711726905882, "res": {"No": 0.6750102183013276, "Yes": 0.3249711726905882}, "ground_truth": 0}, {"key": "4ad70f84c363e5b594da00983e2b40600f12ca54", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3731528715513278, "res": {"No": 0.6268234418183117, "Yes": 0.3731528715513278}, "ground_truth": 1}, {"key": "4ad70f84c363e5b594da00983e2b40600f12ca54", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3058278552105616, "res": {"No": 0.6941514601469809, "Yes": 0.3058278552105616}, "ground_truth": 0}, {"key": "4ad70f84c363e5b594da00983e2b40600f12ca54", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.40893228458840225, "res": {"No": 0.591054470001421, "Yes": 0.40893228458840225}, "ground_truth": 0}, {"key": "cbb5654e979eb27e0af71068cd4e4e5191df35ee", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.5268789253524552, "res": {"Yes": 0.5268789253524552, "No": 0.4730891170960097}, "ground_truth": 0}, {"key": "cbb5654e979eb27e0af71068cd4e4e5191df35ee", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2827514601207853, "res": {"No": 0.7172284786059312, "Yes": 0.2827514601207853}, "ground_truth": 0}, {"key": "cbb5654e979eb27e0af71068cd4e4e5191df35ee", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4977213483624894, "res": {"No": 0.5022597564452032, "Yes": 0.4977213483624894}, "ground_truth": 1}, {"key": "cbb5654e979eb27e0af71068cd4e4e5191df35ee", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.47205917051172136, "res": {"No": 0.5279214211134409, "Yes": 0.47205917051172136}, "ground_truth": 0}, {"key": "cbb5654e979eb27e0af71068cd4e4e5191df35ee", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4399353036675728, "res": {"No": 0.5600432870145664, "Yes": 0.4399353036675728}, "ground_truth": 0}, {"key": "bde6b7e505c05a15a9afe44739e7f016ce169735", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4619168880866276, "res": {"No": 0.5380574177890746, "Yes": 0.4619168880866276}, "ground_truth": 0}, {"key": "bde6b7e505c05a15a9afe44739e7f016ce169735", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4654868648296879, "res": {"No": 0.5344666741713245, "Yes": 0.4654868648296879}, "ground_truth": 0}, {"key": "bde6b7e505c05a15a9afe44739e7f016ce169735", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.49512337749786633, "res": {"No": 0.5048358762775996, "Yes": 0.49512337749786633}, "ground_truth": 1}, {"key": "bde6b7e505c05a15a9afe44739e7f016ce169735", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5527499221974479, "res": {"Yes": 0.5527499221974479, "No": 0.4472046764580171}, "ground_truth": 0}, {"key": "bde6b7e505c05a15a9afe44739e7f016ce169735", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.36019609049696577, "res": {"No": 0.639758293735807, "Yes": 0.36019609049696577}, "ground_truth": 0}, {"key": "abc4744227c6bf0ddbf64cac2269bd4c6d02938b", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.21944900432422912, "res": {"No": 0.7805366133540422, "Yes": 0.21944900432422912}, "ground_truth": 0}, {"key": "abc4744227c6bf0ddbf64cac2269bd4c6d02938b", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.28980862141613084, "res": {"No": 0.7101744392776183, "Yes": 0.28980862141613084}, "ground_truth": 0}, {"key": "abc4744227c6bf0ddbf64cac2269bd4c6d02938b", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.433466857054153, "res": {"No": 0.5664590993347862, "Yes": 0.433466857054153}, "ground_truth": 1}, {"key": "abc4744227c6bf0ddbf64cac2269bd4c6d02938b", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4781893764066814, "res": {"No": 0.5217685918619436, "Yes": 0.4781893764066814}, "ground_truth": 0}, {"key": "abc4744227c6bf0ddbf64cac2269bd4c6d02938b", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4160217651572411, "res": {"No": 0.583949542333783, "Yes": 0.4160217651572411}, "ground_truth": 0}, {"key": "f7e5e75031e9f099564ed046bd822ef1bedb3188", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.5137665189575241, "res": {"Yes": 0.5137665189575241, "No": 0.4862147403975174}, "ground_truth": 0}, {"key": "f7e5e75031e9f099564ed046bd822ef1bedb3188", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3363303558872818, "res": {"No": 0.6636486109222259, "Yes": 0.3363303558872818}, "ground_truth": 0}, {"key": "f7e5e75031e9f099564ed046bd822ef1bedb3188", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5203251366570426, "res": {"Yes": 0.5203251366570426, "No": 0.47965508801901136}, "ground_truth": 1}, {"key": "f7e5e75031e9f099564ed046bd822ef1bedb3188", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.48811993250203806, "res": {"No": 0.5118507808873282, "Yes": 0.48811993250203806}, "ground_truth": 0}, {"key": "f7e5e75031e9f099564ed046bd822ef1bedb3188", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.46678855353669524, "res": {"No": 0.5331919597659083, "Yes": 0.46678855353669524}, "ground_truth": 0}, {"key": "9a8ee7eb582f293c617eecab5f37374d9400bb04", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.42210397881510653, "res": {"No": 0.5778673412848437, "Yes": 0.42210397881510653}, "ground_truth": 0}, {"key": "9a8ee7eb582f293c617eecab5f37374d9400bb04", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.23083814154681356, "res": {"No": 0.7691295947060846, "Yes": 0.23083814154681356}, "ground_truth": 0}, {"key": "9a8ee7eb582f293c617eecab5f37374d9400bb04", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5673300419681508, "res": {"Yes": 0.5673300419681508, "No": 0.43263961303293624}, "ground_truth": 1}, {"key": "9a8ee7eb582f293c617eecab5f37374d9400bb04", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4878177382916233, "res": {"No": 0.5121479657952467, "Yes": 0.4878177382916233}, "ground_truth": 0}, {"key": "9a8ee7eb582f293c617eecab5f37374d9400bb04", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5092481024744919, "res": {"Yes": 0.5092481024744919, "No": 0.4907227866184516}, "ground_truth": 0}, {"key": "a745e5a95be983cde00f3d472062c0627db3113c", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.5594219502877091, "res": {"Yes": 0.5594219502877091, "No": 0.4405474415879322}, "ground_truth": 0}, {"key": "a745e5a95be983cde00f3d472062c0627db3113c", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.45218029316374814, "res": {"No": 0.5477991904360989, "Yes": 0.45218029316374814}, "ground_truth": 0}, {"key": "a745e5a95be983cde00f3d472062c0627db3113c", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6207582030133388, "res": {"Yes": 0.6207582030133388, "No": 0.37922525882281977}, "ground_truth": 1}, {"key": "a745e5a95be983cde00f3d472062c0627db3113c", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5170471341813557, "res": {"Yes": 0.5170471341813557, "No": 0.48292273841429273}, "ground_truth": 0}, {"key": "a745e5a95be983cde00f3d472062c0627db3113c", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4410765040542034, "res": {"No": 0.5589038914702018, "Yes": 0.4410765040542034}, "ground_truth": 0}, {"key": "698cdb1268d4da1240d7f540bd4a5af4e229d00f", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.43889806107046353, "res": {"No": 0.5610808310334098, "Yes": 0.43889806107046353}, "ground_truth": 0}, {"key": "698cdb1268d4da1240d7f540bd4a5af4e229d00f", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3397863640127246, "res": {"No": 0.6601803326464729, "Yes": 0.3397863640127246}, "ground_truth": 0}, {"key": "698cdb1268d4da1240d7f540bd4a5af4e229d00f", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4144370440580995, "res": {"No": 0.5855389053929642, "Yes": 0.4144370440580995}, "ground_truth": 1}, {"key": "698cdb1268d4da1240d7f540bd4a5af4e229d00f", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3417268312898472, "res": {"No": 0.658249822671238, "Yes": 0.3417268312898472}, "ground_truth": 0}, {"key": "698cdb1268d4da1240d7f540bd4a5af4e229d00f", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2894541705579599, "res": {"No": 0.7105204850369647, "Yes": 0.2894541705579599}, "ground_truth": 0}, {"key": "708c903fbf6a7c3c84f84ff34496eb0cb51f44e4", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.21445593348881117, "res": {"No": 0.7855251923994895, "Yes": 0.21445593348881117}, "ground_truth": 0}, {"key": "708c903fbf6a7c3c84f84ff34496eb0cb51f44e4", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.45663161830983984, "res": {"No": 0.5433422461646306, "Yes": 0.45663161830983984}, "ground_truth": 0}, {"key": "708c903fbf6a7c3c84f84ff34496eb0cb51f44e4", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4733571980075933, "res": {"No": 0.5266217131766691, "Yes": 0.4733571980075933}, "ground_truth": 1}, {"key": "708c903fbf6a7c3c84f84ff34496eb0cb51f44e4", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.47742387302083544, "res": {"No": 0.5225623876866261, "Yes": 0.47742387302083544}, "ground_truth": 0}, {"key": "708c903fbf6a7c3c84f84ff34496eb0cb51f44e4", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4464307749460401, "res": {"No": 0.5535463779552159, "Yes": 0.4464307749460401}, "ground_truth": 0}, {"key": "26a1bafbde5cc970d5aee024dc8ef418587018a9", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3726086894368602, "res": {"No": 0.627359147065836, "Yes": 0.3726086894368602}, "ground_truth": 0}, {"key": "26a1bafbde5cc970d5aee024dc8ef418587018a9", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2805309279573723, "res": {"No": 0.7194444658483615, "Yes": 0.2805309279573723}, "ground_truth": 0}, {"key": "26a1bafbde5cc970d5aee024dc8ef418587018a9", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3873855107397454, "res": {"No": 0.6125895641927184, "Yes": 0.3873855107397454}, "ground_truth": 1}, {"key": "26a1bafbde5cc970d5aee024dc8ef418587018a9", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.45593022341640443, "res": {"No": 0.5440329848766112, "Yes": 0.45593022341640443}, "ground_truth": 0}, {"key": "26a1bafbde5cc970d5aee024dc8ef418587018a9", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3761217819002758, "res": {"No": 0.6238546066034556, "Yes": 0.3761217819002758}, "ground_truth": 0}, {"key": "6832312991fe5d87d0e300374ec7a699a921f06a", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.37526992948203675, "res": {"No": 0.624707424280405, "Yes": 0.37526992948203675}, "ground_truth": 0}, {"key": "6832312991fe5d87d0e300374ec7a699a921f06a", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2436975918540312, "res": {"No": 0.7562647618696189, "Yes": 0.2436975918540312}, "ground_truth": 0}, {"key": "6832312991fe5d87d0e300374ec7a699a921f06a", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.38441645339709396, "res": {"No": 0.6155659441139647, "Yes": 0.38441645339709396}, "ground_truth": 1}, {"key": "6832312991fe5d87d0e300374ec7a699a921f06a", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.34423046139728103, "res": {"No": 0.6557475476537878, "Yes": 0.34423046139728103}, "ground_truth": 0}, {"key": "6832312991fe5d87d0e300374ec7a699a921f06a", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.35066921754892016, "res": {"No": 0.6493192971371239, "Yes": 0.35066921754892016}, "ground_truth": 0}, {"key": "9c5d5d672c2549a594b801897a27dde1492e11d5", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.41071142501956964, "res": {"No": 0.5892636631959093, "Yes": 0.41071142501956964}, "ground_truth": 0}, {"key": "9c5d5d672c2549a594b801897a27dde1492e11d5", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.20610947898074028, "res": {"No": 0.7938688084810814, "Yes": 0.20610947898074028}, "ground_truth": 0}, {"key": "9c5d5d672c2549a594b801897a27dde1492e11d5", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3414544469286544, "res": {"No": 0.6585122204239025, "Yes": 0.3414544469286544}, "ground_truth": 1}, {"key": "9c5d5d672c2549a594b801897a27dde1492e11d5", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5229468737001685, "res": {"Yes": 0.5229468737001685, "No": 0.47702303397686957}, "ground_truth": 0}, {"key": "9c5d5d672c2549a594b801897a27dde1492e11d5", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.37536992972348854, "res": {"No": 0.6245998339395931, "Yes": 0.37536992972348854}, "ground_truth": 0}, {"key": "c32fa9084fe8cd565890a940794e505d5f7ba4c2", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.39716055873173095, "res": {"No": 0.6028155598129372, "Yes": 0.39716055873173095}, "ground_truth": 0}, {"key": "c32fa9084fe8cd565890a940794e505d5f7ba4c2", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5089411947748634, "res": {"Yes": 0.5089411947748634, "No": 0.49103636047937144}, "ground_truth": 0}, {"key": "c32fa9084fe8cd565890a940794e505d5f7ba4c2", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5080844017637545, "res": {"Yes": 0.5080844017637545, "No": 0.4918984432279201}, "ground_truth": 1}, {"key": "c32fa9084fe8cd565890a940794e505d5f7ba4c2", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3915525520471081, "res": {"No": 0.6084296411164453, "Yes": 0.3915525520471081}, "ground_truth": 0}, {"key": "c32fa9084fe8cd565890a940794e505d5f7ba4c2", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4083051081330456, "res": {"No": 0.5916638655698961, "Yes": 0.4083051081330456}, "ground_truth": 0}, {"key": "41b5dd6064864c16fbde9b46d37c9f7d9a56e705", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.15323089155480785, "res": {"No": 0.8467497606104776, "Yes": 0.15323089155480785}, "ground_truth": 0}, {"key": "41b5dd6064864c16fbde9b46d37c9f7d9a56e705", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.09100541215527663, "res": {"No": 0.9089824744713827, "Yes": 0.09100541215527663}, "ground_truth": 0}, {"key": "41b5dd6064864c16fbde9b46d37c9f7d9a56e705", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.23258977487233104, "res": {"No": 0.7673848986138987, "Yes": 0.23258977487233104}, "ground_truth": 1}, {"key": "41b5dd6064864c16fbde9b46d37c9f7d9a56e705", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5660733248300632, "res": {"Yes": 0.5660733248300632, "No": 0.4338943533393037}, "ground_truth": 0}, {"key": "41b5dd6064864c16fbde9b46d37c9f7d9a56e705", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.164608722506771, "res": {"No": 0.835373557434681, "Yes": 0.164608722506771}, "ground_truth": 0}, {"key": "b2d3ba739af767da7aa25c3ba8e1d186168c3394", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3508250895813992, "res": {"No": 0.649145932553012, "Yes": 0.3508250895813992}, "ground_truth": 0}, {"key": "b2d3ba739af767da7aa25c3ba8e1d186168c3394", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.37302139564680814, "res": {"No": 0.6269492704472923, "Yes": 0.37302139564680814}, "ground_truth": 0}, {"key": "b2d3ba739af767da7aa25c3ba8e1d186168c3394", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.30039719400438936, "res": {"No": 0.6995789674282129, "Yes": 0.30039719400438936}, "ground_truth": 1}, {"key": "b2d3ba739af767da7aa25c3ba8e1d186168c3394", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4136341051199457, "res": {"No": 0.5863337865179815, "Yes": 0.4136341051199457}, "ground_truth": 0}, {"key": "b2d3ba739af767da7aa25c3ba8e1d186168c3394", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.30635999936912645, "res": {"No": 0.6936240360950275, "Yes": 0.30635999936912645}, "ground_truth": 0}, {"key": "519fd9b5c5ca82979cdfee3c990c3590838d93c7", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.44131602067523346, "res": {"No": 0.5586626367996592, "Yes": 0.44131602067523346}, "ground_truth": 0}, {"key": "519fd9b5c5ca82979cdfee3c990c3590838d93c7", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.476217736365661, "res": {"No": 0.5237675814019397, "Yes": 0.476217736365661}, "ground_truth": 0}, {"key": "519fd9b5c5ca82979cdfee3c990c3590838d93c7", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5522351649318455, "res": {"Yes": 0.5522351649318455, "No": 0.44774880810958556}, "ground_truth": 1}, {"key": "519fd9b5c5ca82979cdfee3c990c3590838d93c7", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4317019324751437, "res": {"No": 0.568286833027446, "Yes": 0.4317019324751437}, "ground_truth": 0}, {"key": "519fd9b5c5ca82979cdfee3c990c3590838d93c7", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.42660689943408064, "res": {"No": 0.5733740402082086, "Yes": 0.42660689943408064}, "ground_truth": 0}, {"key": "60f518dff20b2936b104035b1fd9584dd7d16def", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.1857737209321224, "res": {"No": 0.8142122643163431, "Yes": 0.1857737209321224}, "ground_truth": 0}, {"key": "60f518dff20b2936b104035b1fd9584dd7d16def", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4998145446821546, "res": {"No": 0.5001521334198564, "Yes": 0.4998145446821546}, "ground_truth": 0}, {"key": "60f518dff20b2936b104035b1fd9584dd7d16def", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.45830645633162953, "res": {"No": 0.5416675186474749, "Yes": 0.45830645633162953}, "ground_truth": 1}, {"key": "60f518dff20b2936b104035b1fd9584dd7d16def", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3396652177969718, "res": {"No": 0.66031393365341, "Yes": 0.3396652177969718}, "ground_truth": 0}, {"key": "60f518dff20b2936b104035b1fd9584dd7d16def", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3894730918044896, "res": {"No": 0.6105025307911016, "Yes": 0.3894730918044896}, "ground_truth": 0}, {"key": "b82546b760716f0a2b26606b07af0fe0dd61d6ff", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.45522297278026047, "res": {"No": 0.5447474424905014, "Yes": 0.45522297278026047}, "ground_truth": 0}, {"key": "b82546b760716f0a2b26606b07af0fe0dd61d6ff", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.400924479434212, "res": {"No": 0.5990244282372449, "Yes": 0.400924479434212}, "ground_truth": 0}, {"key": "b82546b760716f0a2b26606b07af0fe0dd61d6ff", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3038868046626276, "res": {"No": 0.6960656697566889, "Yes": 0.3038868046626276}, "ground_truth": 1}, {"key": "b82546b760716f0a2b26606b07af0fe0dd61d6ff", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.28257510650404716, "res": {"No": 0.7158166358507347, "Yes": 0.28257510650404716}, "ground_truth": 0}, {"key": "b82546b760716f0a2b26606b07af0fe0dd61d6ff", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4190725012580086, "res": {"No": 0.5808931186898606, "Yes": 0.4190725012580086}, "ground_truth": 0}, {"key": "7990fde4a924299fab79f6c8e2b447ee5ffcd1fc", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3167963945329801, "res": {"No": 0.6831789143875027, "Yes": 0.3167963945329801}, "ground_truth": 0}, {"key": "7990fde4a924299fab79f6c8e2b447ee5ffcd1fc", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.38312164088646855, "res": {"No": 0.6168542355932695, "Yes": 0.38312164088646855}, "ground_truth": 0}, {"key": "7990fde4a924299fab79f6c8e2b447ee5ffcd1fc", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4397256804590233, "res": {"No": 0.5602421376753606, "Yes": 0.4397256804590233}, "ground_truth": 1}, {"key": "7990fde4a924299fab79f6c8e2b447ee5ffcd1fc", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.31255828974991123, "res": {"No": 0.6874108366483631, "Yes": 0.31255828974991123}, "ground_truth": 0}, {"key": "7990fde4a924299fab79f6c8e2b447ee5ffcd1fc", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3223085378918926, "res": {"No": 0.6776644669790778, "Yes": 0.3223085378918926}, "ground_truth": 0}, {"key": "504f6f6e1aea6bb73c728d12a1342faa6828aace", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.11921210231695538, "res": {"No": 0.8807370722356944, "Yes": 0.11921210231695538}, "ground_truth": 0}, {"key": "504f6f6e1aea6bb73c728d12a1342faa6828aace", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2835527517305936, "res": {"No": 0.7164145278764572, "Yes": 0.2835527517305936}, "ground_truth": 0}, {"key": "504f6f6e1aea6bb73c728d12a1342faa6828aace", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.27769044302714185, "res": {"No": 0.7222869221006651, "Yes": 0.27769044302714185}, "ground_truth": 1}, {"key": "504f6f6e1aea6bb73c728d12a1342faa6828aace", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.42662200158562713, "res": {"No": 0.5733560537466863, "Yes": 0.42662200158562713}, "ground_truth": 0}, {"key": "504f6f6e1aea6bb73c728d12a1342faa6828aace", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.15305347635453911, "res": {"No": 0.84693322881959, "Yes": 0.15305347635453911}, "ground_truth": 0}, {"key": "ef42653bf9337de184e74be0160b807a41b526fd", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4295173253305151, "res": {"No": 0.5704491762364301, "Yes": 0.4295173253305151}, "ground_truth": 0}, {"key": "ef42653bf9337de184e74be0160b807a41b526fd", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2681108405049135, "res": {"No": 0.7318417038336114, "Yes": 0.2681108405049135}, "ground_truth": 0}, {"key": "ef42653bf9337de184e74be0160b807a41b526fd", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.311618841459114, "res": {"No": 0.688338194536102, "Yes": 0.311618841459114}, "ground_truth": 1}, {"key": "ef42653bf9337de184e74be0160b807a41b526fd", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3564077280423708, "res": {"No": 0.6435763570570896, "Yes": 0.3564077280423708}, "ground_truth": 0}, {"key": "ef42653bf9337de184e74be0160b807a41b526fd", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.11454544902837101, "res": {"No": 0.8854209919177434, "Yes": 0.11454544902837101}, "ground_truth": 0}, {"key": "26d0a5624f3ee7605833c641da5d88e2259b1162", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.35180145880458397, "res": {"No": 0.6481810263453512, "Yes": 0.35180145880458397}, "ground_truth": 0}, {"key": "26d0a5624f3ee7605833c641da5d88e2259b1162", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2539305689299989, "res": {"No": 0.7460473865136195, "Yes": 0.2539305689299989}, "ground_truth": 0}, {"key": "26d0a5624f3ee7605833c641da5d88e2259b1162", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2965032201688882, "res": {"No": 0.703473227007343, "Yes": 0.2965032201688882}, "ground_truth": 1}, {"key": "26d0a5624f3ee7605833c641da5d88e2259b1162", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3450596425278315, "res": {"No": 0.6549167831789471, "Yes": 0.3450596425278315}, "ground_truth": 0}, {"key": "26d0a5624f3ee7605833c641da5d88e2259b1162", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2309859714403317, "res": {"No": 0.7689941475296934, "Yes": 0.2309859714403317}, "ground_truth": 0}, {"key": "7e43e9a2c67829721ff9f68147285c579e2c8a70", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.5004984886058264, "res": {"Yes": 0.5004984886058264, "No": 0.49947426686919727}, "ground_truth": 0}, {"key": "7e43e9a2c67829721ff9f68147285c579e2c8a70", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.403960430550404, "res": {"No": 0.5960192582135385, "Yes": 0.403960430550404}, "ground_truth": 0}, {"key": "7e43e9a2c67829721ff9f68147285c579e2c8a70", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.43830840297414564, "res": {"No": 0.561676341381815, "Yes": 0.43830840297414564}, "ground_truth": 1}, {"key": "7e43e9a2c67829721ff9f68147285c579e2c8a70", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.44445248283550465, "res": {"No": 0.5555269256833496, "Yes": 0.44445248283550465}, "ground_truth": 0}, {"key": "7e43e9a2c67829721ff9f68147285c579e2c8a70", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3926756209190288, "res": {"No": 0.6073092407948424, "Yes": 0.3926756209190288}, "ground_truth": 0}, {"key": "2137ee3e39b5c0d12e17bbd1e10eb75495a3a83b", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3129684162759491, "res": {"No": 0.6870051571633309, "Yes": 0.3129684162759491}, "ground_truth": 0}, {"key": "2137ee3e39b5c0d12e17bbd1e10eb75495a3a83b", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3255110077281834, "res": {"No": 0.674459641376478, "Yes": 0.3255110077281834}, "ground_truth": 0}, {"key": "2137ee3e39b5c0d12e17bbd1e10eb75495a3a83b", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.49415087176040534, "res": {"No": 0.5058230168243479, "Yes": 0.49415087176040534}, "ground_truth": 1}, {"key": "2137ee3e39b5c0d12e17bbd1e10eb75495a3a83b", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4223493093774901, "res": {"No": 0.5776200958642531, "Yes": 0.4223493093774901}, "ground_truth": 0}, {"key": "2137ee3e39b5c0d12e17bbd1e10eb75495a3a83b", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2919844592140152, "res": {"No": 0.7079937025722064, "Yes": 0.2919844592140152}, "ground_truth": 0}, {"key": "31defcc44013caae59c0bdbcedd16be150413bd5", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3575758398163537, "res": {"No": 0.6424021175784637, "Yes": 0.3575758398163537}, "ground_truth": 0}, {"key": "31defcc44013caae59c0bdbcedd16be150413bd5", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4890152812478171, "res": {"No": 0.510963918939658, "Yes": 0.4890152812478171}, "ground_truth": 0}, {"key": "31defcc44013caae59c0bdbcedd16be150413bd5", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5103471492605495, "res": {"Yes": 0.5103471492605495, "No": 0.4896291554631195}, "ground_truth": 1}, {"key": "31defcc44013caae59c0bdbcedd16be150413bd5", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5108399230440418, "res": {"Yes": 0.5108399230440418, "No": 0.48913818623035893}, "ground_truth": 0}, {"key": "31defcc44013caae59c0bdbcedd16be150413bd5", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4193369355856207, "res": {"No": 0.5806435625377386, "Yes": 0.4193369355856207}, "ground_truth": 0}, {"key": "db73dc3466e4e92672d716c1eb2ac9430ac69951", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.36725935174667884, "res": {"No": 0.632708448391502, "Yes": 0.36725935174667884}, "ground_truth": 0}, {"key": "db73dc3466e4e92672d716c1eb2ac9430ac69951", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4267341624524016, "res": {"No": 0.5732432111803929, "Yes": 0.4267341624524016}, "ground_truth": 0}, {"key": "db73dc3466e4e92672d716c1eb2ac9430ac69951", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.446196696786356, "res": {"No": 0.5537808489553738, "Yes": 0.446196696786356}, "ground_truth": 1}, {"key": "db73dc3466e4e92672d716c1eb2ac9430ac69951", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4073768094922383, "res": {"No": 0.5925952437182549, "Yes": 0.4073768094922383}, "ground_truth": 0}, {"key": "db73dc3466e4e92672d716c1eb2ac9430ac69951", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.41800435410186226, "res": {"No": 0.5819765640597782, "Yes": 0.41800435410186226}, "ground_truth": 0}, {"key": "3f6308f2f5b6ccdb89d7b6abe1b9490cc30b8e5f", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.24324541670625205, "res": {"No": 0.7567298715352228, "Yes": 0.24324541670625205}, "ground_truth": 0}, {"key": "3f6308f2f5b6ccdb89d7b6abe1b9490cc30b8e5f", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4396836183122952, "res": {"No": 0.5602761790218114, "Yes": 0.4396836183122952}, "ground_truth": 0}, {"key": "3f6308f2f5b6ccdb89d7b6abe1b9490cc30b8e5f", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.44969522655223687, "res": {"No": 0.5502702432860638, "Yes": 0.44969522655223687}, "ground_truth": 1}, {"key": "3f6308f2f5b6ccdb89d7b6abe1b9490cc30b8e5f", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4187015516870876, "res": {"No": 0.5812662880625435, "Yes": 0.4187015516870876}, "ground_truth": 0}, {"key": "3f6308f2f5b6ccdb89d7b6abe1b9490cc30b8e5f", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.43832543596963414, "res": {"No": 0.5616446075650157, "Yes": 0.43832543596963414}, "ground_truth": 0}, {"key": "068d37f7a1ee822245dbf9d0d8ddfc281919d7ee", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.39491540562178673, "res": {"No": 0.6050523292885062, "Yes": 0.39491540562178673}, "ground_truth": 0}, {"key": "068d37f7a1ee822245dbf9d0d8ddfc281919d7ee", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2631942832346382, "res": {"No": 0.7367765657421148, "Yes": 0.2631942832346382}, "ground_truth": 0}, {"key": "068d37f7a1ee822245dbf9d0d8ddfc281919d7ee", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2863019373637457, "res": {"No": 0.7136769785540541, "Yes": 0.2863019373637457}, "ground_truth": 1}, {"key": "068d37f7a1ee822245dbf9d0d8ddfc281919d7ee", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.25303800098803564, "res": {"No": 0.7469300197235844, "Yes": 0.25303800098803564}, "ground_truth": 0}, {"key": "068d37f7a1ee822245dbf9d0d8ddfc281919d7ee", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3188361641937162, "res": {"No": 0.6811425705808486, "Yes": 0.3188361641937162}, "ground_truth": 0}, {"key": "bb1e2c23f3fc6edb0ee4bed365cb5567c87a5aa9", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.38738780794263533, "res": {"No": 0.6125920451854773, "Yes": 0.38738780794263533}, "ground_truth": 0}, {"key": "bb1e2c23f3fc6edb0ee4bed365cb5567c87a5aa9", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.505335709137272, "res": {"Yes": 0.505335709137272, "No": 0.4946314693794702}, "ground_truth": 0}, {"key": "bb1e2c23f3fc6edb0ee4bed365cb5567c87a5aa9", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4283673530550409, "res": {"No": 0.5716073925085476, "Yes": 0.4283673530550409}, "ground_truth": 1}, {"key": "bb1e2c23f3fc6edb0ee4bed365cb5567c87a5aa9", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4437861396694817, "res": {"No": 0.5561910325610704, "Yes": 0.4437861396694817}, "ground_truth": 0}, {"key": "bb1e2c23f3fc6edb0ee4bed365cb5567c87a5aa9", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5248746535709514, "res": {"Yes": 0.5248746535709514, "No": 0.47510362540181045}, "ground_truth": 0}, {"key": "7c72b57e65ddefc77362ed64f7dbc0aebb87ac81", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2991572572115812, "res": {"No": 0.700818963456025, "Yes": 0.2991572572115812}, "ground_truth": 0}, {"key": "7c72b57e65ddefc77362ed64f7dbc0aebb87ac81", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.306703652162999, "res": {"No": 0.6932612962446255, "Yes": 0.306703652162999}, "ground_truth": 0}, {"key": "7c72b57e65ddefc77362ed64f7dbc0aebb87ac81", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4193603520138844, "res": {"No": 0.5806117441423436, "Yes": 0.4193603520138844}, "ground_truth": 1}, {"key": "7c72b57e65ddefc77362ed64f7dbc0aebb87ac81", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.30048113670664367, "res": {"No": 0.699478284273268, "Yes": 0.30048113670664367}, "ground_truth": 0}, {"key": "7c72b57e65ddefc77362ed64f7dbc0aebb87ac81", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.1776937528568996, "res": {"No": 0.8222872848777549, "Yes": 0.1776937528568996}, "ground_truth": 0}, {"key": "37a335b36b8d2472825c4624e5449b995726b7a3", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.20563839843972612, "res": {"No": 0.7943282738841915, "Yes": 0.20563839843972612}, "ground_truth": 0}, {"key": "37a335b36b8d2472825c4624e5449b995726b7a3", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4860943004772402, "res": {"No": 0.5138636608381308, "Yes": 0.4860943004772402}, "ground_truth": 0}, {"key": "37a335b36b8d2472825c4624e5449b995726b7a3", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.33132523597484964, "res": {"No": 0.6686395856322405, "Yes": 0.33132523597484964}, "ground_truth": 1}, {"key": "37a335b36b8d2472825c4624e5449b995726b7a3", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4156440932419154, "res": {"No": 0.5842847087312175, "Yes": 0.4156440932419154}, "ground_truth": 0}, {"key": "37a335b36b8d2472825c4624e5449b995726b7a3", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.30173923345168846, "res": {"No": 0.6982380000881357, "Yes": 0.30173923345168846}, "ground_truth": 0}, {"key": "da894eb6e5cc104f90ac65e8a124bfcfbfc6578d", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.31479460514198965, "res": {"No": 0.6851736151910538, "Yes": 0.31479460514198965}, "ground_truth": 0}, {"key": "da894eb6e5cc104f90ac65e8a124bfcfbfc6578d", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.46155496101805726, "res": {"No": 0.5384208583708625, "Yes": 0.46155496101805726}, "ground_truth": 0}, {"key": "da894eb6e5cc104f90ac65e8a124bfcfbfc6578d", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.28472792608310027, "res": {"No": 0.7152504842615479, "Yes": 0.28472792608310027}, "ground_truth": 1}, {"key": "da894eb6e5cc104f90ac65e8a124bfcfbfc6578d", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.40153286235132224, "res": {"No": 0.5984385695123408, "Yes": 0.40153286235132224}, "ground_truth": 0}, {"key": "da894eb6e5cc104f90ac65e8a124bfcfbfc6578d", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4515212057034522, "res": {"No": 0.5484507684474316, "Yes": 0.4515212057034522}, "ground_truth": 0}, {"key": "b7697e424512e656efe5f2aca17bc33cfe0a2b20", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3197409635204811, "res": {"No": 0.6802437698356312, "Yes": 0.3197409635204811}, "ground_truth": 0}, {"key": "b7697e424512e656efe5f2aca17bc33cfe0a2b20", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.6055026133369731, "res": {"Yes": 0.6055026133369731, "No": 0.39447577792929506}, "ground_truth": 0}, {"key": "b7697e424512e656efe5f2aca17bc33cfe0a2b20", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5084589979896078, "res": {"Yes": 0.5084589979896078, "No": 0.49152366108211165}, "ground_truth": 1}, {"key": "b7697e424512e656efe5f2aca17bc33cfe0a2b20", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.44629014015784263, "res": {"No": 0.5536929432233043, "Yes": 0.44629014015784263}, "ground_truth": 0}, {"key": "b7697e424512e656efe5f2aca17bc33cfe0a2b20", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.450148995458828, "res": {"No": 0.5498346182852237, "Yes": 0.450148995458828}, "ground_truth": 0}, {"key": "2292d4348e59aafbe93f3040af23fdff448a6cac", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.35542547003578967, "res": {"No": 0.6445493426192934, "Yes": 0.35542547003578967}, "ground_truth": 0}, {"key": "2292d4348e59aafbe93f3040af23fdff448a6cac", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.23699026846954055, "res": {"No": 0.7629883625820788, "Yes": 0.23699026846954055}, "ground_truth": 0}, {"key": "2292d4348e59aafbe93f3040af23fdff448a6cac", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2978351599658873, "res": {"No": 0.7021419692530102, "Yes": 0.2978351599658873}, "ground_truth": 1}, {"key": "2292d4348e59aafbe93f3040af23fdff448a6cac", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.17874604920534515, "res": {"No": 0.8212271031984911, "Yes": 0.17874604920534515}, "ground_truth": 0}, {"key": "2292d4348e59aafbe93f3040af23fdff448a6cac", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.059878306583456394, "res": {"No": 0.9401086549742542, "Yes": 0.059878306583456394}, "ground_truth": 0}, {"key": "8ed7e0c5e15275537f2c1d5d945e95709d7dc6bf", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2921539177524195, "res": {"No": 0.7078251139474261, "Yes": 0.2921539177524195}, "ground_truth": 0}, {"key": "8ed7e0c5e15275537f2c1d5d945e95709d7dc6bf", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.41348774590993354, "res": {"No": 0.5864942587142691, "Yes": 0.41348774590993354}, "ground_truth": 0}, {"key": "8ed7e0c5e15275537f2c1d5d945e95709d7dc6bf", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3586843948964838, "res": {"No": 0.6412952626236581, "Yes": 0.3586843948964838}, "ground_truth": 1}, {"key": "8ed7e0c5e15275537f2c1d5d945e95709d7dc6bf", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3919534093320935, "res": {"No": 0.6080334391052269, "Yes": 0.3919534093320935}, "ground_truth": 0}, {"key": "8ed7e0c5e15275537f2c1d5d945e95709d7dc6bf", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.38148846676461984, "res": {"No": 0.618495712046888, "Yes": 0.38148846676461984}, "ground_truth": 0}, {"key": "6b22be31a2cefb2fdd302b8ff788335f23f5dbad", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.32347139255579, "res": {"No": 0.6764973016980619, "Yes": 0.32347139255579}, "ground_truth": 0}, {"key": "6b22be31a2cefb2fdd302b8ff788335f23f5dbad", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.31240517370442855, "res": {"No": 0.6875631906448338, "Yes": 0.31240517370442855}, "ground_truth": 0}, {"key": "6b22be31a2cefb2fdd302b8ff788335f23f5dbad", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.27884606159128505, "res": {"No": 0.7211309755586238, "Yes": 0.27884606159128505}, "ground_truth": 1}, {"key": "6b22be31a2cefb2fdd302b8ff788335f23f5dbad", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.2310317573971096, "res": {"No": 0.7689471249753012, "Yes": 0.2310317573971096}, "ground_truth": 0}, {"key": "6b22be31a2cefb2fdd302b8ff788335f23f5dbad", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3099349011021386, "res": {"No": 0.6900446454039684, "Yes": 0.3099349011021386}, "ground_truth": 0}, {"key": "686caba1039a286aca406e0bbac00aebb198fda5", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.474664329261067, "res": {"No": 0.5253133170607727, "Yes": 0.474664329261067}, "ground_truth": 0}, {"key": "686caba1039a286aca406e0bbac00aebb198fda5", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.25374448064891, "res": {"No": 0.7462361305318489, "Yes": 0.25374448064891}, "ground_truth": 0}, {"key": "686caba1039a286aca406e0bbac00aebb198fda5", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.39999837275297195, "res": {"No": 0.5999763747247346, "Yes": 0.39999837275297195}, "ground_truth": 1}, {"key": "686caba1039a286aca406e0bbac00aebb198fda5", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.40533856424406856, "res": {"No": 0.5946335176060843, "Yes": 0.40533856424406856}, "ground_truth": 0}, {"key": "686caba1039a286aca406e0bbac00aebb198fda5", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.1926296009689229, "res": {"No": 0.8073507667039302, "Yes": 0.1926296009689229}, "ground_truth": 0}, {"key": "3c84e13d5941fdbe626183cdf1dca9b5df6c8f92", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3643206837078565, "res": {"No": 0.635663565923903, "Yes": 0.3643206837078565}, "ground_truth": 0}, {"key": "3c84e13d5941fdbe626183cdf1dca9b5df6c8f92", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4347113201646122, "res": {"No": 0.565269794171621, "Yes": 0.4347113201646122}, "ground_truth": 0}, {"key": "3c84e13d5941fdbe626183cdf1dca9b5df6c8f92", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.44056803325657556, "res": {"No": 0.5594054252073744, "Yes": 0.44056803325657556}, "ground_truth": 1}, {"key": "3c84e13d5941fdbe626183cdf1dca9b5df6c8f92", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4426715526773703, "res": {"No": 0.5573077043950783, "Yes": 0.4426715526773703}, "ground_truth": 0}, {"key": "3c84e13d5941fdbe626183cdf1dca9b5df6c8f92", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.43127066195697955, "res": {"No": 0.5686934171475776, "Yes": 0.43127066195697955}, "ground_truth": 0}, {"key": "7f5fd7614f32586747f65545bebba418c3679d12", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.31402094232354566, "res": {"No": 0.6859580189463957, "Yes": 0.31402094232354566}, "ground_truth": 0}, {"key": "7f5fd7614f32586747f65545bebba418c3679d12", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.26716615787512193, "res": {"No": 0.7327976738196523, "Yes": 0.26716615787512193}, "ground_truth": 0}, {"key": "7f5fd7614f32586747f65545bebba418c3679d12", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4536288619296976, "res": {"No": 0.5463507282661222, "Yes": 0.4536288619296976}, "ground_truth": 1}, {"key": "7f5fd7614f32586747f65545bebba418c3679d12", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3416838105899977, "res": {"No": 0.6582987916969267, "Yes": 0.3416838105899977}, "ground_truth": 0}, {"key": "7f5fd7614f32586747f65545bebba418c3679d12", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3181494939202588, "res": {"No": 0.6818323312305241, "Yes": 0.3181494939202588}, "ground_truth": 0}, {"key": "186210bbc060c272ff9a2412ec2e711a3e4a5e6e", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.29430215538537224, "res": {"No": 0.7056817656358864, "Yes": 0.29430215538537224}, "ground_truth": 0}, {"key": "186210bbc060c272ff9a2412ec2e711a3e4a5e6e", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.40938819543026445, "res": {"No": 0.5905926646000232, "Yes": 0.40938819543026445}, "ground_truth": 0}, {"key": "186210bbc060c272ff9a2412ec2e711a3e4a5e6e", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4586917514459178, "res": {"No": 0.5412849278083458, "Yes": 0.4586917514459178}, "ground_truth": 1}, {"key": "186210bbc060c272ff9a2412ec2e711a3e4a5e6e", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3880386736384954, "res": {"No": 0.6119440941931596, "Yes": 0.3880386736384954}, "ground_truth": 0}, {"key": "186210bbc060c272ff9a2412ec2e711a3e4a5e6e", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4460505020841294, "res": {"No": 0.5539268171322277, "Yes": 0.4460505020841294}, "ground_truth": 0}, {"key": "03392835ea832d7940082c421a21778c70701ff0", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.30688226701902765, "res": {"No": 0.69310167041335, "Yes": 0.30688226701902765}, "ground_truth": 0}, {"key": "03392835ea832d7940082c421a21778c70701ff0", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.14653769204985664, "res": {"No": 0.8534423196432472, "Yes": 0.14653769204985664}, "ground_truth": 0}, {"key": "03392835ea832d7940082c421a21778c70701ff0", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4476356315170195, "res": {"No": 0.5523409833265249, "Yes": 0.4476356315170195}, "ground_truth": 1}, {"key": "03392835ea832d7940082c421a21778c70701ff0", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4326944103098912, "res": {"No": 0.56727961991481, "Yes": 0.4326944103098912}, "ground_truth": 0}, {"key": "03392835ea832d7940082c421a21778c70701ff0", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.46758359191594956, "res": {"No": 0.5323954706055996, "Yes": 0.46758359191594956}, "ground_truth": 0}, {"key": "e2b96982b60cbd10b39c1db4282023d0ac1f3f2a", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2100422453842308, "res": {"No": 0.7899321130986676, "Yes": 0.2100422453842308}, "ground_truth": 0}, {"key": "e2b96982b60cbd10b39c1db4282023d0ac1f3f2a", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2633273615775307, "res": {"No": 0.736641821630965, "Yes": 0.2633273615775307}, "ground_truth": 0}, {"key": "e2b96982b60cbd10b39c1db4282023d0ac1f3f2a", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4170978843820854, "res": {"No": 0.5828695115895374, "Yes": 0.4170978843820854}, "ground_truth": 1}, {"key": "e2b96982b60cbd10b39c1db4282023d0ac1f3f2a", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4004313051127701, "res": {"No": 0.5995430123794162, "Yes": 0.4004313051127701}, "ground_truth": 0}, {"key": "e2b96982b60cbd10b39c1db4282023d0ac1f3f2a", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.21463550824669383, "res": {"No": 0.7853437020625753, "Yes": 0.21463550824669383}, "ground_truth": 0}, {"key": "31310358d02c98abba71916e8cb4ef7477a2d82a", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4872431252406535, "res": {"No": 0.5127325063091278, "Yes": 0.4872431252406535}, "ground_truth": 0}, {"key": "31310358d02c98abba71916e8cb4ef7477a2d82a", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4355314541171487, "res": {"No": 0.5644490253506788, "Yes": 0.4355314541171487}, "ground_truth": 0}, {"key": "31310358d02c98abba71916e8cb4ef7477a2d82a", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3677558912185291, "res": {"No": 0.6322251994888441, "Yes": 0.3677558912185291}, "ground_truth": 1}, {"key": "31310358d02c98abba71916e8cb4ef7477a2d82a", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.29287651772733725, "res": {"No": 0.7071074531362795, "Yes": 0.29287651772733725}, "ground_truth": 0}, {"key": "31310358d02c98abba71916e8cb4ef7477a2d82a", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.38145469127103565, "res": {"No": 0.6185235264244604, "Yes": 0.38145469127103565}, "ground_truth": 0}, {"key": "ec0e9a0c77bbb4bba73ef5177413c666e691850d", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.35610305844863605, "res": {"No": 0.6438718392874445, "Yes": 0.35610305844863605}, "ground_truth": 0}, {"key": "ec0e9a0c77bbb4bba73ef5177413c666e691850d", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.28052565402550184, "res": {"No": 0.7194555741566702, "Yes": 0.28052565402550184}, "ground_truth": 0}, {"key": "ec0e9a0c77bbb4bba73ef5177413c666e691850d", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.16621678647607707, "res": {"No": 0.8337640335915878, "Yes": 0.16621678647607707}, "ground_truth": 1}, {"key": "ec0e9a0c77bbb4bba73ef5177413c666e691850d", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.2598164392990075, "res": {"No": 0.7401580385334956, "Yes": 0.2598164392990075}, "ground_truth": 0}, {"key": "ec0e9a0c77bbb4bba73ef5177413c666e691850d", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.10187286140347301, "res": {"No": 0.898110120121948, "Yes": 0.10187286140347301}, "ground_truth": 0}, {"key": "02311be4aab97a81d2262963896b975c27f6503d", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3075149423217198, "res": {"No": 0.6924604530918002, "Yes": 0.3075149423217198}, "ground_truth": 0}, {"key": "02311be4aab97a81d2262963896b975c27f6503d", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.12833725184133835, "res": {"No": 0.8716494698564536, "Yes": 0.12833725184133835}, "ground_truth": 0}, {"key": "02311be4aab97a81d2262963896b975c27f6503d", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.45039988386737306, "res": {"No": 0.5495684418155435, "Yes": 0.45039988386737306}, "ground_truth": 1}, {"key": "02311be4aab97a81d2262963896b975c27f6503d", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4145505739116707, "res": {"No": 0.5854201174552989, "Yes": 0.4145505739116707}, "ground_truth": 0}, {"key": "02311be4aab97a81d2262963896b975c27f6503d", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.33132497091476687, "res": {"No": 0.6686453827625785, "Yes": 0.33132497091476687}, "ground_truth": 0}, {"key": "a18b463219c663ef55a0cbbf81ae489cdf877b62", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.37056891727705116, "res": {"No": 0.6294074485851167, "Yes": 0.37056891727705116}, "ground_truth": 0}, {"key": "a18b463219c663ef55a0cbbf81ae489cdf877b62", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4593700429180281, "res": {"No": 0.5405954293218926, "Yes": 0.4593700429180281}, "ground_truth": 0}, {"key": "a18b463219c663ef55a0cbbf81ae489cdf877b62", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5628468571564328, "res": {"Yes": 0.5628468571564328, "No": 0.43712661019600246}, "ground_truth": 1}, {"key": "a18b463219c663ef55a0cbbf81ae489cdf877b62", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.411102748244877, "res": {"No": 0.5888730225386528, "Yes": 0.411102748244877}, "ground_truth": 0}, {"key": "a18b463219c663ef55a0cbbf81ae489cdf877b62", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.44366278426984856, "res": {"No": 0.5563037616581828, "Yes": 0.44366278426984856}, "ground_truth": 0}, {"key": "aa966ac9e3611ac2cef8d3fb9ceb3b58c150e238", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4100517941775841, "res": {"No": 0.5899072251235826, "Yes": 0.4100517941775841}, "ground_truth": 0}, {"key": "aa966ac9e3611ac2cef8d3fb9ceb3b58c150e238", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.18616157491804672, "res": {"No": 0.8138046007617638, "Yes": 0.18616157491804672}, "ground_truth": 0}, {"key": "aa966ac9e3611ac2cef8d3fb9ceb3b58c150e238", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5378687003498883, "res": {"Yes": 0.5378687003498883, "No": 0.4620817756464205}, "ground_truth": 1}, {"key": "aa966ac9e3611ac2cef8d3fb9ceb3b58c150e238", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.2639431869346483, "res": {"No": 0.7360079195890082, "Yes": 0.2639431869346483}, "ground_truth": 0}, {"key": "aa966ac9e3611ac2cef8d3fb9ceb3b58c150e238", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.306144550902454, "res": {"No": 0.6938306142163859, "Yes": 0.306144550902454}, "ground_truth": 0}, {"key": "5215d4e6e945ba390786f1720cc5a8abf92c32fd", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.09253262443981043, "res": {"No": 0.9074553728274692, "Yes": 0.09253262443981043}, "ground_truth": 0}, {"key": "5215d4e6e945ba390786f1720cc5a8abf92c32fd", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3503661950107135, "res": {"No": 0.6496188062797017, "Yes": 0.3503661950107135}, "ground_truth": 0}, {"key": "5215d4e6e945ba390786f1720cc5a8abf92c32fd", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.40753954828672345, "res": {"No": 0.5924487841275992, "Yes": 0.40753954828672345}, "ground_truth": 1}, {"key": "5215d4e6e945ba390786f1720cc5a8abf92c32fd", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3521982682086927, "res": {"No": 0.647791729101052, "Yes": 0.3521982682086927}, "ground_truth": 0}, {"key": "5215d4e6e945ba390786f1720cc5a8abf92c32fd", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.37278422946601664, "res": {"No": 0.6272023080663397, "Yes": 0.37278422946601664}, "ground_truth": 0}, {"key": "e7297f59909379c8eb07ea9bc2377b5e3980073d", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.20475615410945847, "res": {"No": 0.7952086865257915, "Yes": 0.20475615410945847}, "ground_truth": 0}, {"key": "e7297f59909379c8eb07ea9bc2377b5e3980073d", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.618153512665406, "res": {"Yes": 0.618153512665406, "No": 0.3817795390045099}, "ground_truth": 0}, {"key": "e7297f59909379c8eb07ea9bc2377b5e3980073d", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.34151611917060554, "res": {"No": 0.6584465700282527, "Yes": 0.34151611917060554}, "ground_truth": 1}, {"key": "e7297f59909379c8eb07ea9bc2377b5e3980073d", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3944912574625275, "res": {"No": 0.605463983502547, "Yes": 0.3944912574625275}, "ground_truth": 0}, {"key": "e7297f59909379c8eb07ea9bc2377b5e3980073d", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.25568882530654086, "res": {"No": 0.7442888948412381, "Yes": 0.25568882530654086}, "ground_truth": 0}, {"key": "6c5f775006a225e617c1fa4cf0c439579e1443a2", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.5586621898697286, "res": {"Yes": 0.5586621898697286, "No": 0.44131653260211434}, "ground_truth": 0}, {"key": "6c5f775006a225e617c1fa4cf0c439579e1443a2", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5252442429013047, "res": {"Yes": 0.5252442429013047, "No": 0.4747395220459333}, "ground_truth": 0}, {"key": "6c5f775006a225e617c1fa4cf0c439579e1443a2", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.44894674782485705, "res": {"No": 0.551035540552069, "Yes": 0.44894674782485705}, "ground_truth": 1}, {"key": "6c5f775006a225e617c1fa4cf0c439579e1443a2", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4803870047679079, "res": {"No": 0.5195926187113146, "Yes": 0.4803870047679079}, "ground_truth": 0}, {"key": "6c5f775006a225e617c1fa4cf0c439579e1443a2", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.44533209243762356, "res": {"No": 0.5546530476977891, "Yes": 0.44533209243762356}, "ground_truth": 0}, {"key": "46ce664a42eac4474d25c435a28790ec25137603", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.164278421259336, "res": {"No": 0.8357063697085706, "Yes": 0.164278421259336}, "ground_truth": 0}, {"key": "46ce664a42eac4474d25c435a28790ec25137603", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.38120671192452565, "res": {"No": 0.618772519877457, "Yes": 0.38120671192452565}, "ground_truth": 0}, {"key": "46ce664a42eac4474d25c435a28790ec25137603", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5282805456113172, "res": {"Yes": 0.5282805456113172, "No": 0.47169782485099426}, "ground_truth": 1}, {"key": "46ce664a42eac4474d25c435a28790ec25137603", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4509206172223367, "res": {"No": 0.5490621708924319, "Yes": 0.4509206172223367}, "ground_truth": 0}, {"key": "46ce664a42eac4474d25c435a28790ec25137603", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.45047445824524274, "res": {"No": 0.5494917274158557, "Yes": 0.45047445824524274}, "ground_truth": 0}, {"key": "693d73ab049f7d8f3c897922b3f7269a96a1a4fa", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3342970199146818, "res": {"No": 0.6656880310715552, "Yes": 0.3342970199146818}, "ground_truth": 0}, {"key": "693d73ab049f7d8f3c897922b3f7269a96a1a4fa", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.41352541635944884, "res": {"No": 0.5864593046980897, "Yes": 0.41352541635944884}, "ground_truth": 0}, {"key": "693d73ab049f7d8f3c897922b3f7269a96a1a4fa", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3578572196241233, "res": {"No": 0.642124531373946, "Yes": 0.3578572196241233}, "ground_truth": 1}, {"key": "693d73ab049f7d8f3c897922b3f7269a96a1a4fa", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.42267876890795875, "res": {"No": 0.5773059733050759, "Yes": 0.42267876890795875}, "ground_truth": 0}, {"key": "693d73ab049f7d8f3c897922b3f7269a96a1a4fa", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.49808421989550095, "res": {"No": 0.5018959759431667, "Yes": 0.49808421989550095}, "ground_truth": 0}, {"key": "48e4930e3c380330b609f460f11520be0e29d5fd", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.6074424322414987, "res": {"Yes": 0.6074424322414987, "No": 0.3925348326862061}, "ground_truth": 0}, {"key": "48e4930e3c380330b609f460f11520be0e29d5fd", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5678029440184189, "res": {"Yes": 0.5678029440184189, "No": 0.43216525031686615}, "ground_truth": 0}, {"key": "48e4930e3c380330b609f460f11520be0e29d5fd", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5263580259958973, "res": {"Yes": 0.5263580259958973, "No": 0.47361494014293004}, "ground_truth": 1}, {"key": "48e4930e3c380330b609f460f11520be0e29d5fd", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4520664484995743, "res": {"No": 0.5479151389259992, "Yes": 0.4520664484995743}, "ground_truth": 0}, {"key": "48e4930e3c380330b609f460f11520be0e29d5fd", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.42772051458753885, "res": {"No": 0.572250698230955, "Yes": 0.42772051458753885}, "ground_truth": 0}, {"key": "53f3e11ada213ff55b987692a2eb0c9b6d890d5e", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.405523035254772, "res": {"No": 0.5944629060523885, "Yes": 0.405523035254772}, "ground_truth": 0}, {"key": "53f3e11ada213ff55b987692a2eb0c9b6d890d5e", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.39684628686861956, "res": {"No": 0.6031352573670825, "Yes": 0.39684628686861956}, "ground_truth": 0}, {"key": "53f3e11ada213ff55b987692a2eb0c9b6d890d5e", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4324315807162737, "res": {"No": 0.5675539091734493, "Yes": 0.4324315807162737}, "ground_truth": 1}, {"key": "53f3e11ada213ff55b987692a2eb0c9b6d890d5e", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3988269761010417, "res": {"No": 0.6011589521286822, "Yes": 0.3988269761010417}, "ground_truth": 0}, {"key": "53f3e11ada213ff55b987692a2eb0c9b6d890d5e", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3194603541600458, "res": {"No": 0.6805249318607746, "Yes": 0.3194603541600458}, "ground_truth": 0}, {"key": "d2ee859acf7e35bea229f968d53240c2f1c2845c", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.24335677561602093, "res": {"No": 0.7566272886202787, "Yes": 0.24335677561602093}, "ground_truth": 0}, {"key": "d2ee859acf7e35bea229f968d53240c2f1c2845c", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.46697438636572647, "res": {"No": 0.5330106520135228, "Yes": 0.46697438636572647}, "ground_truth": 0}, {"key": "d2ee859acf7e35bea229f968d53240c2f1c2845c", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.40374602487684674, "res": {"No": 0.596234913145702, "Yes": 0.40374602487684674}, "ground_truth": 1}, {"key": "d2ee859acf7e35bea229f968d53240c2f1c2845c", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4505875415256203, "res": {"No": 0.5493917455033481, "Yes": 0.4505875415256203}, "ground_truth": 0}, {"key": "d2ee859acf7e35bea229f968d53240c2f1c2845c", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3657964674957377, "res": {"No": 0.6341866926435733, "Yes": 0.3657964674957377}, "ground_truth": 0}, {"key": "d4d44650eb77504fd01a730c0ccc7e6cdfcc578a", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.17439228602387122, "res": {"No": 0.8255926722173147, "Yes": 0.17439228602387122}, "ground_truth": 0}, {"key": "d4d44650eb77504fd01a730c0ccc7e6cdfcc578a", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.364489585484718, "res": {"No": 0.6354780936456715, "Yes": 0.364489585484718}, "ground_truth": 0}, {"key": "d4d44650eb77504fd01a730c0ccc7e6cdfcc578a", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6233728463153136, "res": {"Yes": 0.6233728463153136, "No": 0.3765775940056254}, "ground_truth": 1}, {"key": "d4d44650eb77504fd01a730c0ccc7e6cdfcc578a", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.390077321208613, "res": {"No": 0.6098923943990874, "Yes": 0.390077321208613}, "ground_truth": 0}, {"key": "d4d44650eb77504fd01a730c0ccc7e6cdfcc578a", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2576044783510268, "res": {"No": 0.7423711342007233, "Yes": 0.2576044783510268}, "ground_truth": 0}, {"key": "3695132378151f89d210da575bfef0a3f29aa051", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3498942895416109, "res": {"No": 0.6500877793866546, "Yes": 0.3498942895416109}, "ground_truth": 0}, {"key": "3695132378151f89d210da575bfef0a3f29aa051", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.46468785889114944, "res": {"No": 0.535293331050816, "Yes": 0.46468785889114944}, "ground_truth": 0}, {"key": "3695132378151f89d210da575bfef0a3f29aa051", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3781157550312717, "res": {"No": 0.6218659648868439, "Yes": 0.3781157550312717}, "ground_truth": 1}, {"key": "3695132378151f89d210da575bfef0a3f29aa051", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4487439594975224, "res": {"No": 0.5512354043176025, "Yes": 0.4487439594975224}, "ground_truth": 0}, {"key": "3695132378151f89d210da575bfef0a3f29aa051", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.39361981582006733, "res": {"No": 0.60635915709368, "Yes": 0.39361981582006733}, "ground_truth": 0}, {"key": "c193cecd478fa204b16500ece25bf25b75bc5265", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.5593912164900255, "res": {"Yes": 0.5593912164900255, "No": 0.44057618384058345}, "ground_truth": 0}, {"key": "c193cecd478fa204b16500ece25bf25b75bc5265", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3879678514031841, "res": {"No": 0.6120074887653423, "Yes": 0.3879678514031841}, "ground_truth": 0}, {"key": "c193cecd478fa204b16500ece25bf25b75bc5265", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.47914541369032176, "res": {"No": 0.520827128182636, "Yes": 0.47914541369032176}, "ground_truth": 1}, {"key": "c193cecd478fa204b16500ece25bf25b75bc5265", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3347895009594341, "res": {"No": 0.6651902426940319, "Yes": 0.3347895009594341}, "ground_truth": 0}, {"key": "c193cecd478fa204b16500ece25bf25b75bc5265", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5268131749654806, "res": {"Yes": 0.5268131749654806, "No": 0.4731582073442809}, "ground_truth": 0}, {"key": "b599d61b0c28b86dc92c7af4d40814ece8279500", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4192889955343205, "res": {"No": 0.5806824088914685, "Yes": 0.4192889955343205}, "ground_truth": 0}, {"key": "b599d61b0c28b86dc92c7af4d40814ece8279500", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.43507441219749543, "res": {"No": 0.5649062617540096, "Yes": 0.43507441219749543}, "ground_truth": 0}, {"key": "b599d61b0c28b86dc92c7af4d40814ece8279500", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3184680573163082, "res": {"No": 0.6815081425053847, "Yes": 0.3184680573163082}, "ground_truth": 1}, {"key": "b599d61b0c28b86dc92c7af4d40814ece8279500", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.38683444429540986, "res": {"No": 0.6131491077288899, "Yes": 0.38683444429540986}, "ground_truth": 0}, {"key": "b599d61b0c28b86dc92c7af4d40814ece8279500", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4974236010025242, "res": {"No": 0.5025599714504722, "Yes": 0.4974236010025242}, "ground_truth": 0}, {"key": "06f290a163e81c46437ca61b5fd13741d7db11f2", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.44249590447531567, "res": {"No": 0.557483055406195, "Yes": 0.44249590447531567}, "ground_truth": 0}, {"key": "06f290a163e81c46437ca61b5fd13741d7db11f2", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.42187479638433434, "res": {"No": 0.578106830230401, "Yes": 0.42187479638433434}, "ground_truth": 0}, {"key": "06f290a163e81c46437ca61b5fd13741d7db11f2", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5249386921849631, "res": {"Yes": 0.5249386921849631, "No": 0.47503473086971787}, "ground_truth": 1}, {"key": "06f290a163e81c46437ca61b5fd13741d7db11f2", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4699245166727422, "res": {"No": 0.5300440302198873, "Yes": 0.4699245166727422}, "ground_truth": 0}, {"key": "06f290a163e81c46437ca61b5fd13741d7db11f2", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4257807229897623, "res": {"No": 0.5741988580868241, "Yes": 0.4257807229897623}, "ground_truth": 0}, {"key": "46210bd0b15cd5bba64751d873674766decb2677", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3327627185160365, "res": {"No": 0.6672098732570185, "Yes": 0.3327627185160365}, "ground_truth": 0}, {"key": "46210bd0b15cd5bba64751d873674766decb2677", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3727139662612774, "res": {"No": 0.6272707082074842, "Yes": 0.3727139662612774}, "ground_truth": 0}, {"key": "46210bd0b15cd5bba64751d873674766decb2677", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.522045724556441, "res": {"Yes": 0.522045724556441, "No": 0.4779253166574647}, "ground_truth": 1}, {"key": "46210bd0b15cd5bba64751d873674766decb2677", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4115868393095625, "res": {"No": 0.5883913683852684, "Yes": 0.4115868393095625}, "ground_truth": 0}, {"key": "46210bd0b15cd5bba64751d873674766decb2677", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.6116538078323291, "res": {"Yes": 0.6116538078323291, "No": 0.3883268760446103}, "ground_truth": 0}, {"key": "cfba701dad16c47e0f7b97b8a6b043d25f740085", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.31998588293167907, "res": {"No": 0.6799965189610596, "Yes": 0.31998588293167907}, "ground_truth": 0}, {"key": "cfba701dad16c47e0f7b97b8a6b043d25f740085", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3808543158788621, "res": {"No": 0.6191268685677211, "Yes": 0.3808543158788621}, "ground_truth": 0}, {"key": "cfba701dad16c47e0f7b97b8a6b043d25f740085", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4037261247257238, "res": {"No": 0.5962544341963163, "Yes": 0.4037261247257238}, "ground_truth": 1}, {"key": "cfba701dad16c47e0f7b97b8a6b043d25f740085", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3604828846799981, "res": {"No": 0.6394951960705354, "Yes": 0.3604828846799981}, "ground_truth": 0}, {"key": "cfba701dad16c47e0f7b97b8a6b043d25f740085", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.41163847611740495, "res": {"No": 0.5883372565194903, "Yes": 0.41163847611740495}, "ground_truth": 0}, {"key": "1c0fbe7e6758334d83d145580688e7aa1c549c53", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3878917395775531, "res": {"No": 0.612088150547378, "Yes": 0.3878917395775531}, "ground_truth": 0}, {"key": "1c0fbe7e6758334d83d145580688e7aa1c549c53", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.6153214283406429, "res": {"Yes": 0.6153214283406429, "No": 0.3846340446939651}, "ground_truth": 0}, {"key": "1c0fbe7e6758334d83d145580688e7aa1c549c53", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4921456350951955, "res": {"No": 0.5078299914071629, "Yes": 0.4921456350951955}, "ground_truth": 1}, {"key": "1c0fbe7e6758334d83d145580688e7aa1c549c53", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5458446049849219, "res": {"Yes": 0.5458446049849219, "No": 0.4541301718051145}, "ground_truth": 0}, {"key": "1c0fbe7e6758334d83d145580688e7aa1c549c53", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4926588659144326, "res": {"No": 0.5072989521270179, "Yes": 0.4926588659144326}, "ground_truth": 0}, {"key": "820c917dada8b32be365eaeec9edc1ca5833cfcd", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.26873274668028757, "res": {"No": 0.7312412838708231, "Yes": 0.26873274668028757}, "ground_truth": 0}, {"key": "820c917dada8b32be365eaeec9edc1ca5833cfcd", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4760782059651892, "res": {"No": 0.5238598721422209, "Yes": 0.4760782059651892}, "ground_truth": 0}, {"key": "820c917dada8b32be365eaeec9edc1ca5833cfcd", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.39711818793203035, "res": {"No": 0.6028547440978006, "Yes": 0.39711818793203035}, "ground_truth": 1}, {"key": "820c917dada8b32be365eaeec9edc1ca5833cfcd", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5173914160069175, "res": {"Yes": 0.5173914160069175, "No": 0.4825811431552464}, "ground_truth": 0}, {"key": "820c917dada8b32be365eaeec9edc1ca5833cfcd", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4609349279823319, "res": {"No": 0.5390293806521564, "Yes": 0.4609349279823319}, "ground_truth": 0}, {"key": "faf3821150b0c63290cbbddd7a8b54900995ea5a", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4207473854302701, "res": {"No": 0.5792271523222542, "Yes": 0.4207473854302701}, "ground_truth": 0}, {"key": "faf3821150b0c63290cbbddd7a8b54900995ea5a", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.43127896830991874, "res": {"No": 0.5687076346607244, "Yes": 0.43127896830991874}, "ground_truth": 0}, {"key": "faf3821150b0c63290cbbddd7a8b54900995ea5a", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.43247779282405796, "res": {"No": 0.5674987342644953, "Yes": 0.43247779282405796}, "ground_truth": 1}, {"key": "faf3821150b0c63290cbbddd7a8b54900995ea5a", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5415189054710182, "res": {"Yes": 0.5415189054710182, "No": 0.45845698888798175}, "ground_truth": 0}, {"key": "faf3821150b0c63290cbbddd7a8b54900995ea5a", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.47274646452566793, "res": {"No": 0.527236929435873, "Yes": 0.47274646452566793}, "ground_truth": 0}, {"key": "1de0cae2832cf41f49e6fad8254247c789dfabd2", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.5067988650404864, "res": {"Yes": 0.5067988650404864, "No": 0.49318150914030334}, "ground_truth": 0}, {"key": "1de0cae2832cf41f49e6fad8254247c789dfabd2", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4088218632464912, "res": {"No": 0.5911440215369838, "Yes": 0.4088218632464912}, "ground_truth": 0}, {"key": "1de0cae2832cf41f49e6fad8254247c789dfabd2", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5898814467411064, "res": {"Yes": 0.5898814467411064, "No": 0.41010114277974125}, "ground_truth": 1}, {"key": "1de0cae2832cf41f49e6fad8254247c789dfabd2", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5226984597429758, "res": {"Yes": 0.5226984597429758, "No": 0.4772822948707058}, "ground_truth": 0}, {"key": "1de0cae2832cf41f49e6fad8254247c789dfabd2", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.44619797737271344, "res": {"No": 0.5537856004154417, "Yes": 0.44619797737271344}, "ground_truth": 0}, {"key": "6c6e51ef20def117ed9114ed8d8480b3a100f1c3", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4234056821852612, "res": {"No": 0.5765729891009841, "Yes": 0.4234056821852612}, "ground_truth": 0}, {"key": "6c6e51ef20def117ed9114ed8d8480b3a100f1c3", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5097217450997785, "res": {"Yes": 0.5097217450997785, "No": 0.49026474395396336}, "ground_truth": 0}, {"key": "6c6e51ef20def117ed9114ed8d8480b3a100f1c3", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.44409760927583514, "res": {"No": 0.555881187007272, "Yes": 0.44409760927583514}, "ground_truth": 1}, {"key": "6c6e51ef20def117ed9114ed8d8480b3a100f1c3", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4502354548754056, "res": {"No": 0.5497451564749466, "Yes": 0.4502354548754056}, "ground_truth": 0}, {"key": "6c6e51ef20def117ed9114ed8d8480b3a100f1c3", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.338802444858817, "res": {"No": 0.6611804902827568, "Yes": 0.338802444858817}, "ground_truth": 0}, {"key": "b8c755b8921f7917d161605d2ef404d9551f3846", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.38307451216115407, "res": {"No": 0.6168936538383233, "Yes": 0.38307451216115407}, "ground_truth": 0}, {"key": "b8c755b8921f7917d161605d2ef404d9551f3846", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.30940570564948555, "res": {"No": 0.6905397367480288, "Yes": 0.30940570564948555}, "ground_truth": 0}, {"key": "b8c755b8921f7917d161605d2ef404d9551f3846", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.44995054601563694, "res": {"No": 0.5500137835883041, "Yes": 0.44995054601563694}, "ground_truth": 1}, {"key": "b8c755b8921f7917d161605d2ef404d9551f3846", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4725524419278429, "res": {"No": 0.527418762621939, "Yes": 0.4725524419278429}, "ground_truth": 0}, {"key": "b8c755b8921f7917d161605d2ef404d9551f3846", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3793779846187423, "res": {"No": 0.6205215406202853, "Yes": 0.3793779846187423}, "ground_truth": 0}, {"key": "dc7e18d14b8ad29abbd79f554324ec7b8ba8b9c0", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4057230885448458, "res": {"No": 0.5942356863192241, "Yes": 0.4057230885448458}, "ground_truth": 0}, {"key": "dc7e18d14b8ad29abbd79f554324ec7b8ba8b9c0", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5951830343415296, "res": {"Yes": 0.5951830343415296, "No": 0.404777882740633}, "ground_truth": 0}, {"key": "dc7e18d14b8ad29abbd79f554324ec7b8ba8b9c0", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.47661740304144645, "res": {"No": 0.5233480231439628, "Yes": 0.47661740304144645}, "ground_truth": 1}, {"key": "dc7e18d14b8ad29abbd79f554324ec7b8ba8b9c0", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.44955143701195843, "res": {"No": 0.550394464304435, "Yes": 0.44955143701195843}, "ground_truth": 0}, {"key": "dc7e18d14b8ad29abbd79f554324ec7b8ba8b9c0", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.37321340185596846, "res": {"No": 0.6267422421012494, "Yes": 0.37321340185596846}, "ground_truth": 0}, {"key": "a7fc90db6c296663b1b7fc057451dfd45019342d", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4944518024051267, "res": {"No": 0.5055251646800246, "Yes": 0.4944518024051267}, "ground_truth": 0}, {"key": "a7fc90db6c296663b1b7fc057451dfd45019342d", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5046974187870497, "res": {"Yes": 0.5046974187870497, "No": 0.49528317959446644}, "ground_truth": 0}, {"key": "a7fc90db6c296663b1b7fc057451dfd45019342d", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.46453275803331584, "res": {"No": 0.5354474909595998, "Yes": 0.46453275803331584}, "ground_truth": 1}, {"key": "a7fc90db6c296663b1b7fc057451dfd45019342d", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4802558482000071, "res": {"No": 0.5197173618911374, "Yes": 0.4802558482000071}, "ground_truth": 0}, {"key": "a7fc90db6c296663b1b7fc057451dfd45019342d", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.294182104598001, "res": {"No": 0.7057990385182297, "Yes": 0.294182104598001}, "ground_truth": 0}, {"key": "7f22fbc4fd81e50c632c5853856965aa4d0065d4", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4747509918913411, "res": {"No": 0.5252178763021182, "Yes": 0.4747509918913411}, "ground_truth": 0}, {"key": "7f22fbc4fd81e50c632c5853856965aa4d0065d4", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.49374428871164217, "res": {"No": 0.5062288192273017, "Yes": 0.49374428871164217}, "ground_truth": 0}, {"key": "7f22fbc4fd81e50c632c5853856965aa4d0065d4", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5610131687653039, "res": {"Yes": 0.5610131687653039, "No": 0.438947237964064}, "ground_truth": 1}, {"key": "7f22fbc4fd81e50c632c5853856965aa4d0065d4", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5185045375754146, "res": {"Yes": 0.5185045375754146, "No": 0.48146107121247655}, "ground_truth": 0}, {"key": "7f22fbc4fd81e50c632c5853856965aa4d0065d4", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4371590199642976, "res": {"No": 0.5627930515688367, "Yes": 0.4371590199642976}, "ground_truth": 0}, {"key": "309ae632df7b88a9569d81fa91aff3b9fb03529e", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.22652188867195305, "res": {"No": 0.7734620058156985, "Yes": 0.22652188867195305}, "ground_truth": 0}, {"key": "309ae632df7b88a9569d81fa91aff3b9fb03529e", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.30749117233540013, "res": {"No": 0.692469039654655, "Yes": 0.30749117233540013}, "ground_truth": 0}, {"key": "309ae632df7b88a9569d81fa91aff3b9fb03529e", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4168885579593857, "res": {"No": 0.5830727877976644, "Yes": 0.4168885579593857}, "ground_truth": 1}, {"key": "309ae632df7b88a9569d81fa91aff3b9fb03529e", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.37180013522174876, "res": {"No": 0.6281770157619803, "Yes": 0.37180013522174876}, "ground_truth": 0}, {"key": "309ae632df7b88a9569d81fa91aff3b9fb03529e", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2563408435344168, "res": {"No": 0.7436422548835891, "Yes": 0.2563408435344168}, "ground_truth": 0}, {"key": "732304a0901ff14d023d4361c1d41a85724808f3", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4642608537112453, "res": {"No": 0.5357141459319793, "Yes": 0.4642608537112453}, "ground_truth": 0}, {"key": "732304a0901ff14d023d4361c1d41a85724808f3", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4508550175539006, "res": {"No": 0.5491247291091228, "Yes": 0.4508550175539006}, "ground_truth": 0}, {"key": "732304a0901ff14d023d4361c1d41a85724808f3", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5920929418811016, "res": {"Yes": 0.5920929418811016, "No": 0.4078887146475714}, "ground_truth": 1}, {"key": "732304a0901ff14d023d4361c1d41a85724808f3", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4630040396444258, "res": {"No": 0.5369763104846889, "Yes": 0.4630040396444258}, "ground_truth": 0}, {"key": "732304a0901ff14d023d4361c1d41a85724808f3", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4420231158362001, "res": {"No": 0.5579623176356844, "Yes": 0.4420231158362001}, "ground_truth": 0}, {"key": "509b9f69b376e4acb8da49d53198699b1e779ea7", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.23851983654879533, "res": {"No": 0.7614618929401404, "Yes": 0.23851983654879533}, "ground_truth": 0}, {"key": "509b9f69b376e4acb8da49d53198699b1e779ea7", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.40040217479495127, "res": {"No": 0.5995759941476825, "Yes": 0.40040217479495127}, "ground_truth": 0}, {"key": "509b9f69b376e4acb8da49d53198699b1e779ea7", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3022905312648643, "res": {"No": 0.6976842796264835, "Yes": 0.3022905312648643}, "ground_truth": 1}, {"key": "509b9f69b376e4acb8da49d53198699b1e779ea7", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.343800338998446, "res": {"No": 0.6561810088330783, "Yes": 0.343800338998446}, "ground_truth": 0}, {"key": "509b9f69b376e4acb8da49d53198699b1e779ea7", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3838103225966113, "res": {"No": 0.6161705604046948, "Yes": 0.3838103225966113}, "ground_truth": 0}, {"key": "c292860b41f7c9131458cdc44a79febef38ad8a2", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.21507480094274364, "res": {"No": 0.7849048647070451, "Yes": 0.21507480094274364}, "ground_truth": 0}, {"key": "c292860b41f7c9131458cdc44a79febef38ad8a2", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4731556381021905, "res": {"No": 0.5268163358540131, "Yes": 0.4731556381021905}, "ground_truth": 0}, {"key": "c292860b41f7c9131458cdc44a79febef38ad8a2", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.49760980160098, "res": {"No": 0.502366573339315, "Yes": 0.49760980160098}, "ground_truth": 1}, {"key": "c292860b41f7c9131458cdc44a79febef38ad8a2", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.38796121720964766, "res": {"No": 0.6120215161377378, "Yes": 0.38796121720964766}, "ground_truth": 0}, {"key": "c292860b41f7c9131458cdc44a79febef38ad8a2", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.38764337446577146, "res": {"No": 0.6123371068093993, "Yes": 0.38764337446577146}, "ground_truth": 0}, {"key": "6db7e70883f0241081833487c421bab86fdac26e", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4912481164522056, "res": {"No": 0.5087257750339119, "Yes": 0.4912481164522056}, "ground_truth": 0}, {"key": "6db7e70883f0241081833487c421bab86fdac26e", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5950632360548188, "res": {"Yes": 0.5950632360548188, "No": 0.4049071894455618}, "ground_truth": 0}, {"key": "6db7e70883f0241081833487c421bab86fdac26e", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5304543051904391, "res": {"Yes": 0.5304543051904391, "No": 0.46952550878467236}, "ground_truth": 1}, {"key": "6db7e70883f0241081833487c421bab86fdac26e", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3681763292503416, "res": {"No": 0.6318093447791288, "Yes": 0.3681763292503416}, "ground_truth": 0}, {"key": "6db7e70883f0241081833487c421bab86fdac26e", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.31992326782147773, "res": {"No": 0.6800580750319417, "Yes": 0.31992326782147773}, "ground_truth": 0}, {"key": "e51e06234b3f49da16fec69aaee6fb9cd09cb1d8", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3741197558552738, "res": {"No": 0.6258602347389775, "Yes": 0.3741197558552738}, "ground_truth": 0}, {"key": "e51e06234b3f49da16fec69aaee6fb9cd09cb1d8", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4897715603261918, "res": {"No": 0.5102103435294604, "Yes": 0.4897715603261918}, "ground_truth": 0}, {"key": "e51e06234b3f49da16fec69aaee6fb9cd09cb1d8", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.43852425286467334, "res": {"No": 0.561453680857823, "Yes": 0.43852425286467334}, "ground_truth": 1}, {"key": "e51e06234b3f49da16fec69aaee6fb9cd09cb1d8", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4679235926321316, "res": {"No": 0.5320504304788478, "Yes": 0.4679235926321316}, "ground_truth": 0}, {"key": "e51e06234b3f49da16fec69aaee6fb9cd09cb1d8", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5016178520928476, "res": {"Yes": 0.5016178520928476, "No": 0.498364949512571}, "ground_truth": 0}, {"key": "42474528588bac3a27842671a3fa940fed450552", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.37559834798263286, "res": {"No": 0.6243829480816752, "Yes": 0.37559834798263286}, "ground_truth": 0}, {"key": "42474528588bac3a27842671a3fa940fed450552", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.32951681317948295, "res": {"No": 0.6704587361923783, "Yes": 0.32951681317948295}, "ground_truth": 0}, {"key": "42474528588bac3a27842671a3fa940fed450552", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5968579402116304, "res": {"Yes": 0.5968579402116304, "No": 0.40310471986870183}, "ground_truth": 1}, {"key": "42474528588bac3a27842671a3fa940fed450552", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4194732590172473, "res": {"No": 0.5805022685079853, "Yes": 0.4194732590172473}, "ground_truth": 0}, {"key": "42474528588bac3a27842671a3fa940fed450552", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.491419351170043, "res": {"No": 0.5085431040632559, "Yes": 0.491419351170043}, "ground_truth": 0}, {"key": "5932d9c8f1da55bad396197ebae0853c515180a5", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3073030453106723, "res": {"No": 0.6926752737899599, "Yes": 0.3073030453106723}, "ground_truth": 0}, {"key": "5932d9c8f1da55bad396197ebae0853c515180a5", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.45025155558315094, "res": {"No": 0.5497291701182391, "Yes": 0.45025155558315094}, "ground_truth": 0}, {"key": "5932d9c8f1da55bad396197ebae0853c515180a5", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.34905537560886046, "res": {"No": 0.6509221364464409, "Yes": 0.34905537560886046}, "ground_truth": 1}, {"key": "5932d9c8f1da55bad396197ebae0853c515180a5", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4162402755510083, "res": {"No": 0.5837371693154435, "Yes": 0.4162402755510083}, "ground_truth": 0}, {"key": "5932d9c8f1da55bad396197ebae0853c515180a5", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.213056093111794, "res": {"No": 0.7869221306470733, "Yes": 0.213056093111794}, "ground_truth": 0}, {"key": "daacc676fc659bfd147d11b1409ab42615a2f169", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.5479088708126635, "res": {"Yes": 0.5479088708126635, "No": 0.4520707431512346}, "ground_truth": 0}, {"key": "daacc676fc659bfd147d11b1409ab42615a2f169", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.636621125370308, "res": {"Yes": 0.636621125370308, "No": 0.3633617806954228}, "ground_truth": 0}, {"key": "daacc676fc659bfd147d11b1409ab42615a2f169", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5269175996849067, "res": {"Yes": 0.5269175996849067, "No": 0.47305976282701945}, "ground_truth": 1}, {"key": "daacc676fc659bfd147d11b1409ab42615a2f169", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5430144380451196, "res": {"Yes": 0.5430144380451196, "No": 0.45695884812911525}, "ground_truth": 0}, {"key": "daacc676fc659bfd147d11b1409ab42615a2f169", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5441388639971726, "res": {"Yes": 0.5441388639971726, "No": 0.4558311377508697}, "ground_truth": 0}, {"key": "7a02e707b139791920fb13115d6132541fff7d97", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.38735755413626155, "res": {"No": 0.6126189814498936, "Yes": 0.38735755413626155}, "ground_truth": 0}, {"key": "7a02e707b139791920fb13115d6132541fff7d97", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.59888659060647, "res": {"Yes": 0.59888659060647, "No": 0.4010975958964835}, "ground_truth": 0}, {"key": "7a02e707b139791920fb13115d6132541fff7d97", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4154715494813867, "res": {"No": 0.5845101984858949, "Yes": 0.4154715494813867}, "ground_truth": 1}, {"key": "7a02e707b139791920fb13115d6132541fff7d97", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4338274519882455, "res": {"No": 0.5661534072266882, "Yes": 0.4338274519882455}, "ground_truth": 0}, {"key": "7a02e707b139791920fb13115d6132541fff7d97", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4166234344737738, "res": {"No": 0.5833658846689198, "Yes": 0.4166234344737738}, "ground_truth": 0}, {"key": "db1972f0204c7a9c292f4c85e8838251cb26625e", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2899662042098555, "res": {"No": 0.7100200144343025, "Yes": 0.2899662042098555}, "ground_truth": 0}, {"key": "db1972f0204c7a9c292f4c85e8838251cb26625e", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4664082758497694, "res": {"No": 0.5335803021891345, "Yes": 0.4664082758497694}, "ground_truth": 0}, {"key": "db1972f0204c7a9c292f4c85e8838251cb26625e", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4996357730057363, "res": {"No": 0.5003452894499159, "Yes": 0.4996357730057363}, "ground_truth": 1}, {"key": "db1972f0204c7a9c292f4c85e8838251cb26625e", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4319526384352185, "res": {"No": 0.5680239191318568, "Yes": 0.4319526384352185}, "ground_truth": 0}, {"key": "db1972f0204c7a9c292f4c85e8838251cb26625e", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3829590172867943, "res": {"No": 0.6170277132247204, "Yes": 0.3829590172867943}, "ground_truth": 0}, {"key": "6defcdfd6995661fd681dc15163d67f6a5d6727b", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4276033779811839, "res": {"No": 0.5723559161936896, "Yes": 0.4276033779811839}, "ground_truth": 0}, {"key": "6defcdfd6995661fd681dc15163d67f6a5d6727b", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4838444055918038, "res": {"No": 0.5161206102598704, "Yes": 0.4838444055918038}, "ground_truth": 0}, {"key": "6defcdfd6995661fd681dc15163d67f6a5d6727b", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6135928693523229, "res": {"Yes": 0.6135928693523229, "No": 0.3863553836712025}, "ground_truth": 1}, {"key": "6defcdfd6995661fd681dc15163d67f6a5d6727b", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4686370737753836, "res": {"No": 0.531333231476789, "Yes": 0.4686370737753836}, "ground_truth": 0}, {"key": "6defcdfd6995661fd681dc15163d67f6a5d6727b", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5409391409967687, "res": {"Yes": 0.5409391409967687, "No": 0.45903129060152187}, "ground_truth": 0}, {"key": "24716f5a39d23dafbe9a9df8fa002788d14d020b", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.23898217772963756, "res": {"No": 0.7610005183328399, "Yes": 0.23898217772963756}, "ground_truth": 0}, {"key": "24716f5a39d23dafbe9a9df8fa002788d14d020b", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4009333159071163, "res": {"No": 0.5990513489999549, "Yes": 0.4009333159071163}, "ground_truth": 0}, {"key": "24716f5a39d23dafbe9a9df8fa002788d14d020b", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3463995504748837, "res": {"No": 0.6535752195109681, "Yes": 0.3463995504748837}, "ground_truth": 1}, {"key": "24716f5a39d23dafbe9a9df8fa002788d14d020b", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.47783376927369875, "res": {"No": 0.5221359105240386, "Yes": 0.47783376927369875}, "ground_truth": 0}, {"key": "24716f5a39d23dafbe9a9df8fa002788d14d020b", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.46669884539793377, "res": {"No": 0.5332847805687689, "Yes": 0.46669884539793377}, "ground_truth": 0}, {"key": "ab3c98cf7833b842898a568d872e45ceb2f7dda9", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3119853335905963, "res": {"No": 0.6879986178429582, "Yes": 0.3119853335905963}, "ground_truth": 0}, {"key": "ab3c98cf7833b842898a568d872e45ceb2f7dda9", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2760100502441164, "res": {"No": 0.7239760986903425, "Yes": 0.2760100502441164}, "ground_truth": 0}, {"key": "ab3c98cf7833b842898a568d872e45ceb2f7dda9", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.412395639651592, "res": {"No": 0.5875808061728749, "Yes": 0.412395639651592}, "ground_truth": 1}, {"key": "ab3c98cf7833b842898a568d872e45ceb2f7dda9", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5458745726763412, "res": {"Yes": 0.5458745726763412, "No": 0.4541040146605515}, "ground_truth": 0}, {"key": "ab3c98cf7833b842898a568d872e45ceb2f7dda9", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.561656160713417, "res": {"Yes": 0.561656160713417, "No": 0.43832190308085783}, "ground_truth": 0}, {"key": "831005755f85012c882f17c3b3699b34a7febb7b", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.24464318934388019, "res": {"No": 0.7553369937345895, "Yes": 0.24464318934388019}, "ground_truth": 0}, {"key": "831005755f85012c882f17c3b3699b34a7febb7b", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2828599726728354, "res": {"No": 0.7171097010615445, "Yes": 0.2828599726728354}, "ground_truth": 0}, {"key": "831005755f85012c882f17c3b3699b34a7febb7b", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.45548205924783247, "res": {"No": 0.544489102817712, "Yes": 0.45548205924783247}, "ground_truth": 1}, {"key": "831005755f85012c882f17c3b3699b34a7febb7b", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3497072161034601, "res": {"No": 0.6502634698428689, "Yes": 0.3497072161034601}, "ground_truth": 0}, {"key": "831005755f85012c882f17c3b3699b34a7febb7b", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2644003624938663, "res": {"No": 0.7355733395820896, "Yes": 0.2644003624938663}, "ground_truth": 0}, {"key": "0590a8c653b7d332d9df195dcf3ce014abb4eff7", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.1433120583637232, "res": {"No": 0.8566719949451442, "Yes": 0.1433120583637232}, "ground_truth": 0}, {"key": "0590a8c653b7d332d9df195dcf3ce014abb4eff7", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.38203058810804896, "res": {"No": 0.6179409353248085, "Yes": 0.38203058810804896}, "ground_truth": 0}, {"key": "0590a8c653b7d332d9df195dcf3ce014abb4eff7", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4242010337136891, "res": {"No": 0.5757651656167154, "Yes": 0.4242010337136891}, "ground_truth": 1}, {"key": "0590a8c653b7d332d9df195dcf3ce014abb4eff7", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4155406481454601, "res": {"No": 0.5844323761341045, "Yes": 0.4155406481454601}, "ground_truth": 0}, {"key": "0590a8c653b7d332d9df195dcf3ce014abb4eff7", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.41920231230322497, "res": {"No": 0.5807705631718788, "Yes": 0.41920231230322497}, "ground_truth": 0}, {"key": "f79341438eed40986c3219b67a3b5d87eb6c018a", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.408617348134463, "res": {"No": 0.5913624009080662, "Yes": 0.408617348134463}, "ground_truth": 0}, {"key": "f79341438eed40986c3219b67a3b5d87eb6c018a", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.471902397375968, "res": {"No": 0.5280703106666765, "Yes": 0.471902397375968}, "ground_truth": 0}, {"key": "f79341438eed40986c3219b67a3b5d87eb6c018a", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.33882602632960485, "res": {"No": 0.661148337857307, "Yes": 0.33882602632960485}, "ground_truth": 1}, {"key": "f79341438eed40986c3219b67a3b5d87eb6c018a", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4293339348237216, "res": {"No": 0.5706375354182122, "Yes": 0.4293339348237216}, "ground_truth": 0}, {"key": "f79341438eed40986c3219b67a3b5d87eb6c018a", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.26736460241076104, "res": {"No": 0.7326122188716729, "Yes": 0.26736460241076104}, "ground_truth": 0}, {"key": "8db6225f67580d64a196f2f3197386a6be950a09", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.24659111966281605, "res": {"No": 0.7533816502052768, "Yes": 0.24659111966281605}, "ground_truth": 0}, {"key": "8db6225f67580d64a196f2f3197386a6be950a09", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3555348868338481, "res": {"No": 0.6444404229843591, "Yes": 0.3555348868338481}, "ground_truth": 0}, {"key": "8db6225f67580d64a196f2f3197386a6be950a09", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2911818188140756, "res": {"No": 0.7087932307734934, "Yes": 0.2911818188140756}, "ground_truth": 1}, {"key": "8db6225f67580d64a196f2f3197386a6be950a09", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.24771113250414545, "res": {"No": 0.7522673045608306, "Yes": 0.24771113250414545}, "ground_truth": 0}, {"key": "8db6225f67580d64a196f2f3197386a6be950a09", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3193556522175381, "res": {"No": 0.6806160194130978, "Yes": 0.3193556522175381}, "ground_truth": 0}, {"key": "e386d9ed70bac668fa6048b572f445d9b7b62f75", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.5536970128813931, "res": {"Yes": 0.5536970128813931, "No": 0.44627468539788967}, "ground_truth": 0}, {"key": "e386d9ed70bac668fa6048b572f445d9b7b62f75", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2963990768814823, "res": {"No": 0.7035749565908399, "Yes": 0.2963990768814823}, "ground_truth": 0}, {"key": "e386d9ed70bac668fa6048b572f445d9b7b62f75", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.49822215851271023, "res": {"No": 0.5017602063539346, "Yes": 0.49822215851271023}, "ground_truth": 1}, {"key": "e386d9ed70bac668fa6048b572f445d9b7b62f75", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4814649421750503, "res": {"No": 0.5185037598191916, "Yes": 0.4814649421750503}, "ground_truth": 0}, {"key": "e386d9ed70bac668fa6048b572f445d9b7b62f75", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3698077658923075, "res": {"No": 0.6301750569627086, "Yes": 0.3698077658923075}, "ground_truth": 0}, {"key": "e35669d4530ea55bc8f0e451268cab9e9eaeab92", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.24884817685134272, "res": {"No": 0.7511332596623157, "Yes": 0.24884817685134272}, "ground_truth": 0}, {"key": "e35669d4530ea55bc8f0e451268cab9e9eaeab92", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2677708504117298, "res": {"No": 0.7322140644494087, "Yes": 0.2677708504117298}, "ground_truth": 0}, {"key": "e35669d4530ea55bc8f0e451268cab9e9eaeab92", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.38483130867495996, "res": {"No": 0.6151510448183736, "Yes": 0.38483130867495996}, "ground_truth": 1}, {"key": "e35669d4530ea55bc8f0e451268cab9e9eaeab92", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.36857800767503457, "res": {"No": 0.6314065810269929, "Yes": 0.36857800767503457}, "ground_truth": 0}, {"key": "e35669d4530ea55bc8f0e451268cab9e9eaeab92", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.45323841024978717, "res": {"No": 0.546741153342333, "Yes": 0.45323841024978717}, "ground_truth": 0}, {"key": "d3ca193f45aef9aa766d52fbc58d6b8f1658ee60", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.1737237821541025, "res": {"No": 0.826256476021048, "Yes": 0.1737237821541025}, "ground_truth": 0}, {"key": "d3ca193f45aef9aa766d52fbc58d6b8f1658ee60", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4359691478276629, "res": {"No": 0.563998774702351, "Yes": 0.4359691478276629}, "ground_truth": 0}, {"key": "d3ca193f45aef9aa766d52fbc58d6b8f1658ee60", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.41635600643345877, "res": {"No": 0.5836255636017029, "Yes": 0.41635600643345877}, "ground_truth": 1}, {"key": "d3ca193f45aef9aa766d52fbc58d6b8f1658ee60", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4535933441803466, "res": {"No": 0.5463735935225522, "Yes": 0.4535933441803466}, "ground_truth": 0}, {"key": "d3ca193f45aef9aa766d52fbc58d6b8f1658ee60", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4971556613557269, "res": {"No": 0.502811646122242, "Yes": 0.4971556613557269}, "ground_truth": 0}, {"key": "1ca9bf09bf534add02b7cfe6ebae6d17cde66286", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3540085194128334, "res": {"No": 0.6459791185163964, "Yes": 0.3540085194128334}, "ground_truth": 0}, {"key": "1ca9bf09bf534add02b7cfe6ebae6d17cde66286", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5432999594832191, "res": {"Yes": 0.5432999594832191, "No": 0.4566674789988773}, "ground_truth": 0}, {"key": "1ca9bf09bf534add02b7cfe6ebae6d17cde66286", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3625093543198774, "res": {"No": 0.6374663867540301, "Yes": 0.3625093543198774}, "ground_truth": 1}, {"key": "1ca9bf09bf534add02b7cfe6ebae6d17cde66286", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5441670239120159, "res": {"Yes": 0.5441670239120159, "No": 0.45581210719813414}, "ground_truth": 0}, {"key": "1ca9bf09bf534add02b7cfe6ebae6d17cde66286", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.41898211287795145, "res": {"No": 0.5809938542901656, "Yes": 0.41898211287795145}, "ground_truth": 0}, {"key": "c296e88117bef515bd9b36ad560223df3fd2b84c", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.42875276566164705, "res": {"No": 0.5712210022430051, "Yes": 0.42875276566164705}, "ground_truth": 0}, {"key": "c296e88117bef515bd9b36ad560223df3fd2b84c", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3516846097696684, "res": {"No": 0.6482958423365416, "Yes": 0.3516846097696684}, "ground_truth": 0}, {"key": "c296e88117bef515bd9b36ad560223df3fd2b84c", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.30645008245050503, "res": {"No": 0.6935276984072212, "Yes": 0.30645008245050503}, "ground_truth": 1}, {"key": "c296e88117bef515bd9b36ad560223df3fd2b84c", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6005415425028763, "res": {"Yes": 0.6005415425028763, "No": 0.39943848723961556}, "ground_truth": 0}, {"key": "c296e88117bef515bd9b36ad560223df3fd2b84c", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3096598320384321, "res": {"No": 0.6903217539549171, "Yes": 0.3096598320384321}, "ground_truth": 0}, {"key": "c5e11ee4c2f0ae511bd0e1fae2978d070b1efc15", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2643676053183022, "res": {"No": 0.7356126864523465, "Yes": 0.2643676053183022}, "ground_truth": 0}, {"key": "c5e11ee4c2f0ae511bd0e1fae2978d070b1efc15", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.37611334182218803, "res": {"No": 0.6238762796889484, "Yes": 0.37611334182218803}, "ground_truth": 0}, {"key": "c5e11ee4c2f0ae511bd0e1fae2978d070b1efc15", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.31310186315398864, "res": {"No": 0.6868853539110689, "Yes": 0.31310186315398864}, "ground_truth": 1}, {"key": "c5e11ee4c2f0ae511bd0e1fae2978d070b1efc15", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.38616327759707153, "res": {"No": 0.6138237648297385, "Yes": 0.38616327759707153}, "ground_truth": 0}, {"key": "c5e11ee4c2f0ae511bd0e1fae2978d070b1efc15", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.28378785766566933, "res": {"No": 0.7161980601160589, "Yes": 0.28378785766566933}, "ground_truth": 0}, {"key": "1fc2d1ecf1ffdc2f0b642b686e1c21de53a4fd28", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2655662872422611, "res": {"No": 0.7344165312920052, "Yes": 0.2655662872422611}, "ground_truth": 0}, {"key": "1fc2d1ecf1ffdc2f0b642b686e1c21de53a4fd28", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.40595983209559566, "res": {"No": 0.5940167864511665, "Yes": 0.40595983209559566}, "ground_truth": 0}, {"key": "1fc2d1ecf1ffdc2f0b642b686e1c21de53a4fd28", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2914190247344191, "res": {"No": 0.7085620105282548, "Yes": 0.2914190247344191}, "ground_truth": 1}, {"key": "1fc2d1ecf1ffdc2f0b642b686e1c21de53a4fd28", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5823602479555535, "res": {"Yes": 0.5823602479555535, "No": 0.41758640417328935}, "ground_truth": 0}, {"key": "1fc2d1ecf1ffdc2f0b642b686e1c21de53a4fd28", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2795392966573846, "res": {"No": 0.7204460607365607, "Yes": 0.2795392966573846}, "ground_truth": 0}, {"key": "6556c64821ece8123c130b27285efe9619c3c7fd", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3523055289109754, "res": {"No": 0.6476776047303834, "Yes": 0.3523055289109754}, "ground_truth": 0}, {"key": "6556c64821ece8123c130b27285efe9619c3c7fd", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4854739979235683, "res": {"No": 0.5145045912682684, "Yes": 0.4854739979235683}, "ground_truth": 0}, {"key": "6556c64821ece8123c130b27285efe9619c3c7fd", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3552728120471461, "res": {"No": 0.644712608305576, "Yes": 0.3552728120471461}, "ground_truth": 1}, {"key": "6556c64821ece8123c130b27285efe9619c3c7fd", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.23383447166196186, "res": {"No": 0.7661479972543024, "Yes": 0.23383447166196186}, "ground_truth": 0}, {"key": "6556c64821ece8123c130b27285efe9619c3c7fd", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.31637163382487593, "res": {"No": 0.6836006342038375, "Yes": 0.31637163382487593}, "ground_truth": 0}, {"key": "32ba61f4513f7ec21cedb6ddd2a8a93260f8cd56", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4446612289489219, "res": {"No": 0.5552962077218591, "Yes": 0.4446612289489219}, "ground_truth": 0}, {"key": "32ba61f4513f7ec21cedb6ddd2a8a93260f8cd56", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.36157991384571764, "res": {"No": 0.6383996060093514, "Yes": 0.36157991384571764}, "ground_truth": 0}, {"key": "32ba61f4513f7ec21cedb6ddd2a8a93260f8cd56", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4419848824902963, "res": {"No": 0.557973616487017, "Yes": 0.4419848824902963}, "ground_truth": 1}, {"key": "32ba61f4513f7ec21cedb6ddd2a8a93260f8cd56", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.39735446011823167, "res": {"No": 0.6026246180903543, "Yes": 0.39735446011823167}, "ground_truth": 0}, {"key": "32ba61f4513f7ec21cedb6ddd2a8a93260f8cd56", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.22482503533834652, "res": {"No": 0.7751275907139922, "Yes": 0.22482503533834652}, "ground_truth": 0}, {"key": "1146cfc5981ce23511ac1bac5f3957c30bebc7b8", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.5658661289750556, "res": {"Yes": 0.5658661289750556, "No": 0.43410300688072123}, "ground_truth": 0}, {"key": "1146cfc5981ce23511ac1bac5f3957c30bebc7b8", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4033359667292456, "res": {"No": 0.5966424656506971, "Yes": 0.4033359667292456}, "ground_truth": 0}, {"key": "1146cfc5981ce23511ac1bac5f3957c30bebc7b8", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5834384423959048, "res": {"Yes": 0.5834384423959048, "No": 0.4165401764344129}, "ground_truth": 1}, {"key": "1146cfc5981ce23511ac1bac5f3957c30bebc7b8", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5023091259828903, "res": {"Yes": 0.5023091259828903, "No": 0.49766393453943525}, "ground_truth": 0}, {"key": "1146cfc5981ce23511ac1bac5f3957c30bebc7b8", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.46935686204062665, "res": {"No": 0.530621934013314, "Yes": 0.46935686204062665}, "ground_truth": 0}, {"key": "e060abdb2a5e2ce6152ca0ed53703a9d17b2df36", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.35995314816052065, "res": {"No": 0.6400246869583058, "Yes": 0.35995314816052065}, "ground_truth": 0}, {"key": "e060abdb2a5e2ce6152ca0ed53703a9d17b2df36", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.33146286392181507, "res": {"No": 0.6685180781156701, "Yes": 0.33146286392181507}, "ground_truth": 0}, {"key": "e060abdb2a5e2ce6152ca0ed53703a9d17b2df36", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.27178819795689285, "res": {"No": 0.7281938054043015, "Yes": 0.27178819795689285}, "ground_truth": 1}, {"key": "e060abdb2a5e2ce6152ca0ed53703a9d17b2df36", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.37285827176614567, "res": {"No": 0.6271259194773767, "Yes": 0.37285827176614567}, "ground_truth": 0}, {"key": "e060abdb2a5e2ce6152ca0ed53703a9d17b2df36", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.32219729618603543, "res": {"No": 0.6777847088751523, "Yes": 0.32219729618603543}, "ground_truth": 0}, {"key": "aef2a80ff67e87a8c165a481ede2a643128f4aa7", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3337034014964548, "res": {"No": 0.6662789381992017, "Yes": 0.3337034014964548}, "ground_truth": 0}, {"key": "aef2a80ff67e87a8c165a481ede2a643128f4aa7", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4268653521607696, "res": {"No": 0.5731100049372796, "Yes": 0.4268653521607696}, "ground_truth": 0}, {"key": "aef2a80ff67e87a8c165a481ede2a643128f4aa7", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3750530711344368, "res": {"No": 0.6249171112755033, "Yes": 0.3750530711344368}, "ground_truth": 1}, {"key": "aef2a80ff67e87a8c165a481ede2a643128f4aa7", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.33151984728570305, "res": {"No": 0.6684545318075683, "Yes": 0.33151984728570305}, "ground_truth": 0}, {"key": "aef2a80ff67e87a8c165a481ede2a643128f4aa7", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4746267847975255, "res": {"No": 0.5253429140467951, "Yes": 0.4746267847975255}, "ground_truth": 0}, {"key": "d4eaedd7f1055f95e9cafa21e870d049f5df8526", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.29253288244890246, "res": {"No": 0.7074535042491862, "Yes": 0.29253288244890246}, "ground_truth": 0}, {"key": "d4eaedd7f1055f95e9cafa21e870d049f5df8526", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3221913678103268, "res": {"No": 0.6777955399613402, "Yes": 0.3221913678103268}, "ground_truth": 0}, {"key": "d4eaedd7f1055f95e9cafa21e870d049f5df8526", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5480209844364644, "res": {"Yes": 0.5480209844364644, "No": 0.4519571972425182}, "ground_truth": 1}, {"key": "d4eaedd7f1055f95e9cafa21e870d049f5df8526", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.40157145151357376, "res": {"No": 0.5984104794651663, "Yes": 0.40157145151357376}, "ground_truth": 0}, {"key": "d4eaedd7f1055f95e9cafa21e870d049f5df8526", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.44988656759696694, "res": {"No": 0.5500845419125027, "Yes": 0.44988656759696694}, "ground_truth": 0}, {"key": "ec9531cd58e385f0da60e23b2855a3c5ece3f270", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.44332102942334445, "res": {"No": 0.5566561247464454, "Yes": 0.44332102942334445}, "ground_truth": 0}, {"key": "ec9531cd58e385f0da60e23b2855a3c5ece3f270", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.40152451055464433, "res": {"No": 0.5984501194881892, "Yes": 0.40152451055464433}, "ground_truth": 0}, {"key": "ec9531cd58e385f0da60e23b2855a3c5ece3f270", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5116449763632339, "res": {"Yes": 0.5116449763632339, "No": 0.48833052356863366}, "ground_truth": 1}, {"key": "ec9531cd58e385f0da60e23b2855a3c5ece3f270", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3942306243261772, "res": {"No": 0.60574641353781, "Yes": 0.3942306243261772}, "ground_truth": 0}, {"key": "ec9531cd58e385f0da60e23b2855a3c5ece3f270", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4217147674459954, "res": {"No": 0.5782679710579175, "Yes": 0.4217147674459954}, "ground_truth": 0}, {"key": "b171bad51c8ec09118fd553347c41d9301f195ee", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.06975557731304939, "res": {"No": 0.9302340014705394, "Yes": 0.06975557731304939}, "ground_truth": 0}, {"key": "b171bad51c8ec09118fd553347c41d9301f195ee", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4941891946464611, "res": {"No": 0.5057821682069986, "Yes": 0.4941891946464611}, "ground_truth": 0}, {"key": "b171bad51c8ec09118fd553347c41d9301f195ee", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.49309159062204716, "res": {"No": 0.5068823974447324, "Yes": 0.49309159062204716}, "ground_truth": 1}, {"key": "b171bad51c8ec09118fd553347c41d9301f195ee", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.2807509223673415, "res": {"No": 0.7192331313042479, "Yes": 0.2807509223673415}, "ground_truth": 0}, {"key": "b171bad51c8ec09118fd553347c41d9301f195ee", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4788844511517425, "res": {"No": 0.5210818749288204, "Yes": 0.4788844511517425}, "ground_truth": 0}, {"key": "1df14bd442bd39ed653aa77a1e77105d4b321db0", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.5062097094500752, "res": {"Yes": 0.5062097094500752, "No": 0.4937783977452188}, "ground_truth": 0}, {"key": "1df14bd442bd39ed653aa77a1e77105d4b321db0", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4563974720047617, "res": {"No": 0.5435786895066421, "Yes": 0.4563974720047617}, "ground_truth": 0}, {"key": "1df14bd442bd39ed653aa77a1e77105d4b321db0", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.49113838388057485, "res": {"No": 0.5088411467710118, "Yes": 0.49113838388057485}, "ground_truth": 1}, {"key": "1df14bd442bd39ed653aa77a1e77105d4b321db0", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4885846191915356, "res": {"No": 0.5113988320276848, "Yes": 0.4885846191915356}, "ground_truth": 0}, {"key": "1df14bd442bd39ed653aa77a1e77105d4b321db0", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4070573680633276, "res": {"No": 0.592934109491701, "Yes": 0.4070573680633276}, "ground_truth": 0}, {"key": "d4f57e3c18c38696345fb7a3d76a151bb9c5123b", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.29593593023380316, "res": {"No": 0.704048945769567, "Yes": 0.29593593023380316}, "ground_truth": 0}, {"key": "d4f57e3c18c38696345fb7a3d76a151bb9c5123b", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.46423923822910884, "res": {"No": 0.5357372196371332, "Yes": 0.46423923822910884}, "ground_truth": 0}, {"key": "d4f57e3c18c38696345fb7a3d76a151bb9c5123b", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.49797846294054826, "res": {"No": 0.5019999996437847, "Yes": 0.49797846294054826}, "ground_truth": 1}, {"key": "d4f57e3c18c38696345fb7a3d76a151bb9c5123b", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.38058623170914696, "res": {"No": 0.6193876811503182, "Yes": 0.38058623170914696}, "ground_truth": 0}, {"key": "d4f57e3c18c38696345fb7a3d76a151bb9c5123b", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2624869270250351, "res": {"No": 0.7374866961865152, "Yes": 0.2624869270250351}, "ground_truth": 0}, {"key": "28bbbf57c580be154f0602cbfe8b63603ef5a3a4", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2674598278995715, "res": {"No": 0.7325139455343428, "Yes": 0.2674598278995715}, "ground_truth": 0}, {"key": "28bbbf57c580be154f0602cbfe8b63603ef5a3a4", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.49254507485977916, "res": {"No": 0.50743337879177, "Yes": 0.49254507485977916}, "ground_truth": 0}, {"key": "28bbbf57c580be154f0602cbfe8b63603ef5a3a4", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.36687719700074617, "res": {"No": 0.6330986017571788, "Yes": 0.36687719700074617}, "ground_truth": 1}, {"key": "28bbbf57c580be154f0602cbfe8b63603ef5a3a4", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4720297810226696, "res": {"No": 0.5279398881277376, "Yes": 0.4720297810226696}, "ground_truth": 0}, {"key": "28bbbf57c580be154f0602cbfe8b63603ef5a3a4", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4488165272627188, "res": {"No": 0.5511626295115051, "Yes": 0.4488165272627188}, "ground_truth": 0}, {"key": "05850904cabf491c9ab7c0e0d85fd932eb8c0bb0", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.31450607080228804, "res": {"No": 0.685457568849389, "Yes": 0.31450607080228804}, "ground_truth": 0}, {"key": "05850904cabf491c9ab7c0e0d85fd932eb8c0bb0", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.43465808432919456, "res": {"No": 0.5653115974185561, "Yes": 0.43465808432919456}, "ground_truth": 0}, {"key": "05850904cabf491c9ab7c0e0d85fd932eb8c0bb0", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4440860184794936, "res": {"No": 0.5558794081903197, "Yes": 0.4440860184794936}, "ground_truth": 1}, {"key": "05850904cabf491c9ab7c0e0d85fd932eb8c0bb0", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3957113451153979, "res": {"No": 0.6042515163059732, "Yes": 0.3957113451153979}, "ground_truth": 0}, {"key": "05850904cabf491c9ab7c0e0d85fd932eb8c0bb0", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5044360885341681, "res": {"Yes": 0.5044360885341681, "No": 0.4955453825507463}, "ground_truth": 0}, {"key": "df79c0f955e4ccf51ad45780f865e334d3577bf5", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.31976005262580737, "res": {"No": 0.6802246960677404, "Yes": 0.31976005262580737}, "ground_truth": 0}, {"key": "df79c0f955e4ccf51ad45780f865e334d3577bf5", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4974661524372529, "res": {"No": 0.5025147631889385, "Yes": 0.4974661524372529}, "ground_truth": 0}, {"key": "df79c0f955e4ccf51ad45780f865e334d3577bf5", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5095087768853092, "res": {"Yes": 0.5095087768853092, "No": 0.49047509793572536}, "ground_truth": 1}, {"key": "df79c0f955e4ccf51ad45780f865e334d3577bf5", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.42392961071347235, "res": {"No": 0.5760545315140393, "Yes": 0.42392961071347235}, "ground_truth": 0}, {"key": "df79c0f955e4ccf51ad45780f865e334d3577bf5", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.47252217591321694, "res": {"No": 0.527460414526227, "Yes": 0.47252217591321694}, "ground_truth": 0}, {"key": "ffb8b9be838da033f6f3383f324d425556723147", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.36791052829763393, "res": {"No": 0.632058111807396, "Yes": 0.36791052829763393}, "ground_truth": 0}, {"key": "ffb8b9be838da033f6f3383f324d425556723147", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.37556528175967296, "res": {"No": 0.6244030223161475, "Yes": 0.37556528175967296}, "ground_truth": 0}, {"key": "ffb8b9be838da033f6f3383f324d425556723147", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.42085460973188354, "res": {"No": 0.5791228776528375, "Yes": 0.42085460973188354}, "ground_truth": 1}, {"key": "ffb8b9be838da033f6f3383f324d425556723147", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.46535021794691783, "res": {"No": 0.5346186606869308, "Yes": 0.46535021794691783}, "ground_truth": 0}, {"key": "ffb8b9be838da033f6f3383f324d425556723147", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.31906019338528724, "res": {"No": 0.6809143170897494, "Yes": 0.31906019338528724}, "ground_truth": 0}, {"key": "d70dcffc2c07e08cc121d7126ada53dbc479626e", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3246349041349551, "res": {"No": 0.6753491720047696, "Yes": 0.3246349041349551}, "ground_truth": 0}, {"key": "d70dcffc2c07e08cc121d7126ada53dbc479626e", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4534730218276013, "res": {"No": 0.5465110784135566, "Yes": 0.4534730218276013}, "ground_truth": 0}, {"key": "d70dcffc2c07e08cc121d7126ada53dbc479626e", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3369416193456445, "res": {"No": 0.6630438315807405, "Yes": 0.3369416193456445}, "ground_truth": 1}, {"key": "d70dcffc2c07e08cc121d7126ada53dbc479626e", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6060765807635528, "res": {"Yes": 0.6060765807635528, "No": 0.3939049273279965}, "ground_truth": 0}, {"key": "d70dcffc2c07e08cc121d7126ada53dbc479626e", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.47969146164342436, "res": {"No": 0.5202802345353036, "Yes": 0.47969146164342436}, "ground_truth": 0}, {"key": "4dc0cef39980df6382f9aee5e150e904fe62965e", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4647465387317654, "res": {"No": 0.5352343128618668, "Yes": 0.4647465387317654}, "ground_truth": 0}, {"key": "4dc0cef39980df6382f9aee5e150e904fe62965e", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.45966346273540826, "res": {"No": 0.5403117182248556, "Yes": 0.45966346273540826}, "ground_truth": 0}, {"key": "4dc0cef39980df6382f9aee5e150e904fe62965e", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.48989083422213875, "res": {"No": 0.5100884178280586, "Yes": 0.48989083422213875}, "ground_truth": 1}, {"key": "4dc0cef39980df6382f9aee5e150e904fe62965e", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.37570643444441104, "res": {"No": 0.6242744147839682, "Yes": 0.37570643444441104}, "ground_truth": 0}, {"key": "4dc0cef39980df6382f9aee5e150e904fe62965e", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3660370943622052, "res": {"No": 0.6339481121048968, "Yes": 0.3660370943622052}, "ground_truth": 0}, {"key": "6cb082c504e00c93686ff1e9287a84ba13c12b79", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.5758654438517814, "res": {"Yes": 0.5758654438517814, "No": 0.424102588078822}, "ground_truth": 0}, {"key": "6cb082c504e00c93686ff1e9287a84ba13c12b79", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.37086695223746774, "res": {"No": 0.6291199946294479, "Yes": 0.37086695223746774}, "ground_truth": 0}, {"key": "6cb082c504e00c93686ff1e9287a84ba13c12b79", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3278422275136763, "res": {"No": 0.6721422092095338, "Yes": 0.3278422275136763}, "ground_truth": 1}, {"key": "6cb082c504e00c93686ff1e9287a84ba13c12b79", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3182742011417101, "res": {"No": 0.6817145207828033, "Yes": 0.3182742011417101}, "ground_truth": 0}, {"key": "6cb082c504e00c93686ff1e9287a84ba13c12b79", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4984563031960197, "res": {"No": 0.5015270674778743, "Yes": 0.4984563031960197}, "ground_truth": 0}, {"key": "19fe2a8e0bef37b495afcc83c2f46b60c56039bf", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.22694667522493328, "res": {"No": 0.7730228659833055, "Yes": 0.22694667522493328}, "ground_truth": 0}, {"key": "19fe2a8e0bef37b495afcc83c2f46b60c56039bf", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.38366174000714753, "res": {"No": 0.6163080988601045, "Yes": 0.38366174000714753}, "ground_truth": 0}, {"key": "19fe2a8e0bef37b495afcc83c2f46b60c56039bf", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4595805882882128, "res": {"No": 0.5397559964330111, "Yes": 0.4595805882882128}, "ground_truth": 1}, {"key": "19fe2a8e0bef37b495afcc83c2f46b60c56039bf", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4488273887541021, "res": {"No": 0.5511381363884907, "Yes": 0.4488273887541021}, "ground_truth": 0}, {"key": "19fe2a8e0bef37b495afcc83c2f46b60c56039bf", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.34107601795118003, "res": {"No": 0.6588849120627405, "Yes": 0.34107601795118003}, "ground_truth": 0}, {"key": "503c6aaeaf0e61372a896ebe1fdf2a2492844fe4", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4600877255334415, "res": {"No": 0.5398941916563318, "Yes": 0.4600877255334415}, "ground_truth": 0}, {"key": "503c6aaeaf0e61372a896ebe1fdf2a2492844fe4", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4693511077607719, "res": {"No": 0.5306285668289442, "Yes": 0.4693511077607719}, "ground_truth": 0}, {"key": "503c6aaeaf0e61372a896ebe1fdf2a2492844fe4", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4107089771867711, "res": {"No": 0.5892668923696316, "Yes": 0.4107089771867711}, "ground_truth": 1}, {"key": "503c6aaeaf0e61372a896ebe1fdf2a2492844fe4", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.49261832175818276, "res": {"No": 0.5073636368669786, "Yes": 0.49261832175818276}, "ground_truth": 0}, {"key": "503c6aaeaf0e61372a896ebe1fdf2a2492844fe4", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4431915985805755, "res": {"No": 0.5567866202355598, "Yes": 0.4431915985805755}, "ground_truth": 0}, {"key": "e4007135fdafe84907e2b018da3a4c7ab853c251", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.25487098577266804, "res": {"No": 0.7451032198988287, "Yes": 0.25487098577266804}, "ground_truth": 0}, {"key": "e4007135fdafe84907e2b018da3a4c7ab853c251", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.30142061429175465, "res": {"No": 0.698540514374068, "Yes": 0.30142061429175465}, "ground_truth": 0}, {"key": "e4007135fdafe84907e2b018da3a4c7ab853c251", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.38184894901573735, "res": {"No": 0.6181314944294319, "Yes": 0.38184894901573735}, "ground_truth": 1}, {"key": "e4007135fdafe84907e2b018da3a4c7ab853c251", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4017437103976779, "res": {"No": 0.5982334001051601, "Yes": 0.4017437103976779}, "ground_truth": 0}, {"key": "e4007135fdafe84907e2b018da3a4c7ab853c251", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.37864482139926875, "res": {"No": 0.621321914825716, "Yes": 0.37864482139926875}, "ground_truth": 0}, {"key": "5b97a507eed113ce329e8c85078fb53033bbe567", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3116178131186339, "res": {"No": 0.6883648337396958, "Yes": 0.3116178131186339}, "ground_truth": 0}, {"key": "5b97a507eed113ce329e8c85078fb53033bbe567", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4783124973271884, "res": {"No": 0.5216650623751099, "Yes": 0.4783124973271884}, "ground_truth": 0}, {"key": "5b97a507eed113ce329e8c85078fb53033bbe567", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.43418802559689457, "res": {"No": 0.5657934820864233, "Yes": 0.43418802559689457}, "ground_truth": 1}, {"key": "5b97a507eed113ce329e8c85078fb53033bbe567", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3737211903215405, "res": {"No": 0.6262652472332512, "Yes": 0.3737211903215405}, "ground_truth": 0}, {"key": "5b97a507eed113ce329e8c85078fb53033bbe567", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3708392865947721, "res": {"No": 0.6291486454063766, "Yes": 0.3708392865947721}, "ground_truth": 0}, {"key": "145a118e355e3a92383b41307a5b473a7c77a8bb", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.40731631852759403, "res": {"No": 0.5926643265722311, "Yes": 0.40731631852759403}, "ground_truth": 0}, {"key": "145a118e355e3a92383b41307a5b473a7c77a8bb", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3971849451386921, "res": {"No": 0.6027973851977927, "Yes": 0.3971849451386921}, "ground_truth": 0}, {"key": "145a118e355e3a92383b41307a5b473a7c77a8bb", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.43566019881868234, "res": {"No": 0.5643176651276673, "Yes": 0.43566019881868234}, "ground_truth": 1}, {"key": "145a118e355e3a92383b41307a5b473a7c77a8bb", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4942669614343803, "res": {"No": 0.5057064431122537, "Yes": 0.4942669614343803}, "ground_truth": 0}, {"key": "145a118e355e3a92383b41307a5b473a7c77a8bb", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.47196365428263465, "res": {"No": 0.5280210123237551, "Yes": 0.47196365428263465}, "ground_truth": 0}, {"key": "1f6309c88c5711b2b579e3c0cf699c44d3b56d16", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.24446451830622312, "res": {"No": 0.7555227992709966, "Yes": 0.24446451830622312}, "ground_truth": 0}, {"key": "1f6309c88c5711b2b579e3c0cf699c44d3b56d16", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.30937659194240535, "res": {"No": 0.6906025718173137, "Yes": 0.30937659194240535}, "ground_truth": 0}, {"key": "1f6309c88c5711b2b579e3c0cf699c44d3b56d16", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4018615711654533, "res": {"No": 0.5981145070505384, "Yes": 0.4018615711654533}, "ground_truth": 1}, {"key": "1f6309c88c5711b2b579e3c0cf699c44d3b56d16", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.30489765989779943, "res": {"No": 0.6950880714048512, "Yes": 0.30489765989779943}, "ground_truth": 0}, {"key": "1f6309c88c5711b2b579e3c0cf699c44d3b56d16", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.26788339136627143, "res": {"No": 0.7321041454031926, "Yes": 0.26788339136627143}, "ground_truth": 0}, {"key": "d8304c60e76a4b16053e74c50abd886dcdf7397c", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3877226323787151, "res": {"No": 0.6122544101443852, "Yes": 0.3877226323787151}, "ground_truth": 0}, {"key": "d8304c60e76a4b16053e74c50abd886dcdf7397c", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3308230628013362, "res": {"No": 0.6691520935994812, "Yes": 0.3308230628013362}, "ground_truth": 0}, {"key": "d8304c60e76a4b16053e74c50abd886dcdf7397c", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4945087516985718, "res": {"No": 0.5054686299070902, "Yes": 0.4945087516985718}, "ground_truth": 1}, {"key": "d8304c60e76a4b16053e74c50abd886dcdf7397c", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3554066329850641, "res": {"No": 0.644571728206686, "Yes": 0.3554066329850641}, "ground_truth": 0}, {"key": "d8304c60e76a4b16053e74c50abd886dcdf7397c", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.459697276823394, "res": {"No": 0.5402813805736147, "Yes": 0.459697276823394}, "ground_truth": 0}, {"key": "d47e5724a866cf365428dbded7740555e5f80f2b", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3662202685377139, "res": {"No": 0.6337574935529386, "Yes": 0.3662202685377139}, "ground_truth": 0}, {"key": "d47e5724a866cf365428dbded7740555e5f80f2b", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5089270972990192, "res": {"Yes": 0.5089270972990192, "No": 0.49105086099719547}, "ground_truth": 0}, {"key": "d47e5724a866cf365428dbded7740555e5f80f2b", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3265306370190525, "res": {"No": 0.6734451006268553, "Yes": 0.3265306370190525}, "ground_truth": 1}, {"key": "d47e5724a866cf365428dbded7740555e5f80f2b", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.44952938705475265, "res": {"No": 0.5504367637443519, "Yes": 0.44952938705475265}, "ground_truth": 0}, {"key": "d47e5724a866cf365428dbded7740555e5f80f2b", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.39214023139986715, "res": {"No": 0.6078326813725686, "Yes": 0.39214023139986715}, "ground_truth": 0}, {"key": "0a078513cee66db2ccaaca1b7b0755c96f0c5c8a", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4636061740417795, "res": {"No": 0.5363727449074772, "Yes": 0.4636061740417795}, "ground_truth": 0}, {"key": "0a078513cee66db2ccaaca1b7b0755c96f0c5c8a", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5276451212994419, "res": {"Yes": 0.5276451212994419, "No": 0.4723325435735621}, "ground_truth": 0}, {"key": "0a078513cee66db2ccaaca1b7b0755c96f0c5c8a", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.46432460110261814, "res": {"No": 0.5356548831541957, "Yes": 0.46432460110261814}, "ground_truth": 1}, {"key": "0a078513cee66db2ccaaca1b7b0755c96f0c5c8a", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.44176908745020504, "res": {"No": 0.5582099125365813, "Yes": 0.44176908745020504}, "ground_truth": 0}, {"key": "0a078513cee66db2ccaaca1b7b0755c96f0c5c8a", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.452332631229034, "res": {"No": 0.5476504119492657, "Yes": 0.452332631229034}, "ground_truth": 0}, {"key": "f0d93bdd383762936746b48f7e67ffe72cfb1ede", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3925926573257409, "res": {"No": 0.6073812962374147, "Yes": 0.3925926573257409}, "ground_truth": 0}, {"key": "f0d93bdd383762936746b48f7e67ffe72cfb1ede", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2534179948504074, "res": {"No": 0.7465622173977842, "Yes": 0.2534179948504074}, "ground_truth": 0}, {"key": "f0d93bdd383762936746b48f7e67ffe72cfb1ede", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3439689800320926, "res": {"No": 0.6560147536308637, "Yes": 0.3439689800320926}, "ground_truth": 1}, {"key": "f0d93bdd383762936746b48f7e67ffe72cfb1ede", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3391314800634417, "res": {"No": 0.6608425085003664, "Yes": 0.3391314800634417}, "ground_truth": 0}, {"key": "f0d93bdd383762936746b48f7e67ffe72cfb1ede", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.47445000501131623, "res": {"No": 0.5255209934428536, "Yes": 0.47445000501131623}, "ground_truth": 0}, {"key": "38649fdccec39afa649297349b2ad2e5e1a39d82", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.22805311879308673, "res": {"No": 0.7719279826744205, "Yes": 0.22805311879308673}, "ground_truth": 0}, {"key": "38649fdccec39afa649297349b2ad2e5e1a39d82", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4872195188831042, "res": {"No": 0.5127565540275884, "Yes": 0.4872195188831042}, "ground_truth": 0}, {"key": "38649fdccec39afa649297349b2ad2e5e1a39d82", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4395059604765274, "res": {"No": 0.5604708415698604, "Yes": 0.4395059604765274}, "ground_truth": 1}, {"key": "38649fdccec39afa649297349b2ad2e5e1a39d82", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.441621322737671, "res": {"No": 0.5583511016760759, "Yes": 0.441621322737671}, "ground_truth": 0}, {"key": "38649fdccec39afa649297349b2ad2e5e1a39d82", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2086921248330036, "res": {"No": 0.7912772871411711, "Yes": 0.2086921248330036}, "ground_truth": 0}, {"key": "d86d9a4e0b39556578f24ff54ed82716f0375fdb", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.23150183012026285, "res": {"No": 0.768483259181021, "Yes": 0.23150183012026285}, "ground_truth": 0}, {"key": "d86d9a4e0b39556578f24ff54ed82716f0375fdb", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.49660295274672633, "res": {"No": 0.5033747280821933, "Yes": 0.49660295274672633}, "ground_truth": 0}, {"key": "d86d9a4e0b39556578f24ff54ed82716f0375fdb", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.464281931632469, "res": {"No": 0.5356955623305913, "Yes": 0.464281931632469}, "ground_truth": 1}, {"key": "d86d9a4e0b39556578f24ff54ed82716f0375fdb", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3743730944781879, "res": {"No": 0.6256059368164173, "Yes": 0.3743730944781879}, "ground_truth": 0}, {"key": "d86d9a4e0b39556578f24ff54ed82716f0375fdb", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.507774996397127, "res": {"Yes": 0.507774996397127, "No": 0.4921955904123471}, "ground_truth": 0}, {"key": "d10d57811499caa54276d3e14f89c45cb1a8958f", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.28829598525953615, "res": {"No": 0.7116813570327384, "Yes": 0.28829598525953615}, "ground_truth": 0}, {"key": "d10d57811499caa54276d3e14f89c45cb1a8958f", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.35293894406405824, "res": {"No": 0.6470353688860621, "Yes": 0.35293894406405824}, "ground_truth": 0}, {"key": "d10d57811499caa54276d3e14f89c45cb1a8958f", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5299407029084293, "res": {"Yes": 0.5299407029084293, "No": 0.4700273944089993}, "ground_truth": 1}, {"key": "d10d57811499caa54276d3e14f89c45cb1a8958f", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.23283936122263796, "res": {"No": 0.7671409396981791, "Yes": 0.23283936122263796}, "ground_truth": 0}, {"key": "d10d57811499caa54276d3e14f89c45cb1a8958f", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.1400329957798768, "res": {"No": 0.8599521507029281, "Yes": 0.1400329957798768}, "ground_truth": 0}, {"key": "66646c49afb65308491ec8ca6db79aa76263cc69", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.40093496374643095, "res": {"No": 0.5990549552999308, "Yes": 0.40093496374643095}, "ground_truth": 0}, {"key": "66646c49afb65308491ec8ca6db79aa76263cc69", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.41968655245773573, "res": {"No": 0.5802980663191906, "Yes": 0.41968655245773573}, "ground_truth": 0}, {"key": "66646c49afb65308491ec8ca6db79aa76263cc69", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.38229286569092547, "res": {"No": 0.6176979654726817, "Yes": 0.38229286569092547}, "ground_truth": 1}, {"key": "66646c49afb65308491ec8ca6db79aa76263cc69", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5436270701629156, "res": {"Yes": 0.5436270701629156, "No": 0.4563553804070026}, "ground_truth": 0}, {"key": "66646c49afb65308491ec8ca6db79aa76263cc69", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.45887939474269757, "res": {"No": 0.5411057759126554, "Yes": 0.45887939474269757}, "ground_truth": 0}, {"key": "7fbd6d867def06f6ae4c051562dc36ab8d0038c0", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.21219031388571205, "res": {"No": 0.7877888278504516, "Yes": 0.21219031388571205}, "ground_truth": 0}, {"key": "7fbd6d867def06f6ae4c051562dc36ab8d0038c0", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4664129399558484, "res": {"No": 0.5335499956886829, "Yes": 0.4664129399558484}, "ground_truth": 0}, {"key": "7fbd6d867def06f6ae4c051562dc36ab8d0038c0", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.31170835124337076, "res": {"No": 0.6882707406983295, "Yes": 0.31170835124337076}, "ground_truth": 1}, {"key": "7fbd6d867def06f6ae4c051562dc36ab8d0038c0", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3956833020460875, "res": {"No": 0.604293966466038, "Yes": 0.3956833020460875}, "ground_truth": 0}, {"key": "7fbd6d867def06f6ae4c051562dc36ab8d0038c0", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.43629749514456806, "res": {"No": 0.5636780070504328, "Yes": 0.43629749514456806}, "ground_truth": 0}, {"key": "5c243b51bf4de7f6974338d71ac26317254007bb", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4260362680794167, "res": {"No": 0.5739487341378148, "Yes": 0.4260362680794167}, "ground_truth": 0}, {"key": "5c243b51bf4de7f6974338d71ac26317254007bb", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.25217724135175973, "res": {"No": 0.7478016350209641, "Yes": 0.25217724135175973}, "ground_truth": 0}, {"key": "5c243b51bf4de7f6974338d71ac26317254007bb", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3345335518365207, "res": {"No": 0.6654478209539854, "Yes": 0.3345335518365207}, "ground_truth": 1}, {"key": "5c243b51bf4de7f6974338d71ac26317254007bb", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3314794043314046, "res": {"No": 0.6684927150209273, "Yes": 0.3314794043314046}, "ground_truth": 0}, {"key": "5c243b51bf4de7f6974338d71ac26317254007bb", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.16641729827808038, "res": {"No": 0.8335590612279586, "Yes": 0.16641729827808038}, "ground_truth": 0}, {"key": "56b79c03a77d5261382e4705f3537396473f55cf", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3924671262674142, "res": {"No": 0.6075126019694216, "Yes": 0.3924671262674142}, "ground_truth": 0}, {"key": "56b79c03a77d5261382e4705f3537396473f55cf", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2598337956168472, "res": {"No": 0.7401368851190381, "Yes": 0.2598337956168472}, "ground_truth": 0}, {"key": "56b79c03a77d5261382e4705f3537396473f55cf", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3618285493247189, "res": {"No": 0.6381469582993751, "Yes": 0.3618285493247189}, "ground_truth": 1}, {"key": "56b79c03a77d5261382e4705f3537396473f55cf", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4580763800977216, "res": {"No": 0.5419028159419326, "Yes": 0.4580763800977216}, "ground_truth": 0}, {"key": "56b79c03a77d5261382e4705f3537396473f55cf", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4031958157028231, "res": {"No": 0.5967867691459192, "Yes": 0.4031958157028231}, "ground_truth": 0}, {"key": "7382d171d71d3219c5ad0a13ea0ac92ac6b6998b", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4687413290009105, "res": {"No": 0.5312325533689761, "Yes": 0.4687413290009105}, "ground_truth": 0}, {"key": "7382d171d71d3219c5ad0a13ea0ac92ac6b6998b", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.32087867076387977, "res": {"No": 0.6791071488298772, "Yes": 0.32087867076387977}, "ground_truth": 0}, {"key": "7382d171d71d3219c5ad0a13ea0ac92ac6b6998b", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4997471292451588, "res": {"No": 0.500227772144372, "Yes": 0.4997471292451588}, "ground_truth": 1}, {"key": "7382d171d71d3219c5ad0a13ea0ac92ac6b6998b", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4741869954096885, "res": {"No": 0.5257929893904235, "Yes": 0.4741869954096885}, "ground_truth": 0}, {"key": "7382d171d71d3219c5ad0a13ea0ac92ac6b6998b", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3952807527895951, "res": {"No": 0.6047025468221491, "Yes": 0.3952807527895951}, "ground_truth": 0}, {"key": "ef1a5fa8dbba16e4d7ef3fc8cd540c2f55691fc8", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4249265706186431, "res": {"No": 0.5750440686449012, "Yes": 0.4249265706186431}, "ground_truth": 0}, {"key": "ef1a5fa8dbba16e4d7ef3fc8cd540c2f55691fc8", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4554126445170664, "res": {"No": 0.544562940546139, "Yes": 0.4554126445170664}, "ground_truth": 0}, {"key": "ef1a5fa8dbba16e4d7ef3fc8cd540c2f55691fc8", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5525426577352882, "res": {"Yes": 0.5525426577352882, "No": 0.44742939511255847}, "ground_truth": 1}, {"key": "ef1a5fa8dbba16e4d7ef3fc8cd540c2f55691fc8", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4362325091099713, "res": {"No": 0.5637391863443724, "Yes": 0.4362325091099713}, "ground_truth": 0}, {"key": "ef1a5fa8dbba16e4d7ef3fc8cd540c2f55691fc8", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.29722795777129113, "res": {"No": 0.7027355159063996, "Yes": 0.29722795777129113}, "ground_truth": 0}, {"key": "1ba0f66a7f4f23171634a3455893f4d5b8b94db4", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.10448375230505431, "res": {"No": 0.895504278244787, "Yes": 0.10448375230505431}, "ground_truth": 0}, {"key": "1ba0f66a7f4f23171634a3455893f4d5b8b94db4", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4139544232438535, "res": {"No": 0.5860183593189947, "Yes": 0.4139544232438535}, "ground_truth": 0}, {"key": "1ba0f66a7f4f23171634a3455893f4d5b8b94db4", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.44937556194685624, "res": {"No": 0.5505969641506302, "Yes": 0.44937556194685624}, "ground_truth": 1}, {"key": "1ba0f66a7f4f23171634a3455893f4d5b8b94db4", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.47104167826277643, "res": {"No": 0.5289370988284638, "Yes": 0.47104167826277643}, "ground_truth": 0}, {"key": "1ba0f66a7f4f23171634a3455893f4d5b8b94db4", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.35946498705978835, "res": {"No": 0.6405149863671676, "Yes": 0.35946498705978835}, "ground_truth": 0}, {"key": "3131e8ba238a31d83cad00bc7b02933b0a3587b3", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4031604731067072, "res": {"No": 0.596819808175971, "Yes": 0.4031604731067072}, "ground_truth": 0}, {"key": "3131e8ba238a31d83cad00bc7b02933b0a3587b3", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4294692650308345, "res": {"No": 0.5705136976868816, "Yes": 0.4294692650308345}, "ground_truth": 0}, {"key": "3131e8ba238a31d83cad00bc7b02933b0a3587b3", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4377261029238337, "res": {"No": 0.562251933099063, "Yes": 0.4377261029238337}, "ground_truth": 1}, {"key": "3131e8ba238a31d83cad00bc7b02933b0a3587b3", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4755262944489224, "res": {"No": 0.5244465672628436, "Yes": 0.4755262944489224}, "ground_truth": 0}, {"key": "3131e8ba238a31d83cad00bc7b02933b0a3587b3", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4260779790717814, "res": {"No": 0.5739031070271525, "Yes": 0.4260779790717814}, "ground_truth": 0}, {"key": "112bc4ab4b6e7d6dd35fc33cd946fbfc4c9356d6", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.1712624335164088, "res": {"No": 0.8287276385399468, "Yes": 0.1712624335164088}, "ground_truth": 0}, {"key": "112bc4ab4b6e7d6dd35fc33cd946fbfc4c9356d6", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3322413222047494, "res": {"No": 0.6677443421729221, "Yes": 0.3322413222047494}, "ground_truth": 0}, {"key": "112bc4ab4b6e7d6dd35fc33cd946fbfc4c9356d6", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5719134787328427, "res": {"Yes": 0.5719134787328427, "No": 0.42806816588302593}, "ground_truth": 1}, {"key": "112bc4ab4b6e7d6dd35fc33cd946fbfc4c9356d6", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.18029589800041057, "res": {"No": 0.819686269585185, "Yes": 0.18029589800041057}, "ground_truth": 0}, {"key": "112bc4ab4b6e7d6dd35fc33cd946fbfc4c9356d6", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.30219538518240124, "res": {"No": 0.6977867071967323, "Yes": 0.30219538518240124}, "ground_truth": 0}, {"key": "3b1d3309825780556178569cd40d880a793f8da7", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.333345729786539, "res": {"No": 0.6666294597770793, "Yes": 0.333345729786539}, "ground_truth": 0}, {"key": "3b1d3309825780556178569cd40d880a793f8da7", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3770207097571698, "res": {"No": 0.622961088756495, "Yes": 0.3770207097571698}, "ground_truth": 0}, {"key": "3b1d3309825780556178569cd40d880a793f8da7", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.48086123654928664, "res": {"No": 0.5191174608185776, "Yes": 0.48086123654928664}, "ground_truth": 1}, {"key": "3b1d3309825780556178569cd40d880a793f8da7", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.49397683776936396, "res": {"No": 0.5060027879131841, "Yes": 0.49397683776936396}, "ground_truth": 0}, {"key": "3b1d3309825780556178569cd40d880a793f8da7", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5905617537889011, "res": {"Yes": 0.5905617537889011, "No": 0.4094167471586009}, "ground_truth": 0}, {"key": "c977693b405a89cec98e53b05199e608fd6adeca", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.29621613657090073, "res": {"No": 0.7037606122980108, "Yes": 0.29621613657090073}, "ground_truth": 0}, {"key": "c977693b405a89cec98e53b05199e608fd6adeca", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5505418851854323, "res": {"Yes": 0.5505418851854323, "No": 0.4494300475428368}, "ground_truth": 0}, {"key": "c977693b405a89cec98e53b05199e608fd6adeca", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.35330244647566983, "res": {"No": 0.6466722381073917, "Yes": 0.35330244647566983}, "ground_truth": 1}, {"key": "c977693b405a89cec98e53b05199e608fd6adeca", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4619084166085838, "res": {"No": 0.5380742054423956, "Yes": 0.4619084166085838}, "ground_truth": 0}, {"key": "c977693b405a89cec98e53b05199e608fd6adeca", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.45393622698660957, "res": {"No": 0.546035198062007, "Yes": 0.45393622698660957}, "ground_truth": 0}, {"key": "c84e272a80fa95f863e016bc54df0841fd83fc12", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.47989012861512503, "res": {"No": 0.5200898156071524, "Yes": 0.47989012861512503}, "ground_truth": 0}, {"key": "c84e272a80fa95f863e016bc54df0841fd83fc12", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.30217921816176563, "res": {"No": 0.6978078993008304, "Yes": 0.30217921816176563}, "ground_truth": 0}, {"key": "c84e272a80fa95f863e016bc54df0841fd83fc12", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3893809144332817, "res": {"No": 0.6106037543963805, "Yes": 0.3893809144332817}, "ground_truth": 1}, {"key": "c84e272a80fa95f863e016bc54df0841fd83fc12", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5038983589493173, "res": {"Yes": 0.5038983589493173, "No": 0.49608613899847526}, "ground_truth": 0}, {"key": "c84e272a80fa95f863e016bc54df0841fd83fc12", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4455535844300427, "res": {"No": 0.5544274052827435, "Yes": 0.4455535844300427}, "ground_truth": 0}, {"key": "f51cff1d73cd0f3954376d7b3eab1d5b2ccb354c", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3440166574366928, "res": {"No": 0.6559692933836528, "Yes": 0.3440166574366928}, "ground_truth": 0}, {"key": "f51cff1d73cd0f3954376d7b3eab1d5b2ccb354c", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3712192390781758, "res": {"No": 0.6287694460092056, "Yes": 0.3712192390781758}, "ground_truth": 0}, {"key": "f51cff1d73cd0f3954376d7b3eab1d5b2ccb354c", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4395546164794122, "res": {"No": 0.5604345298403662, "Yes": 0.4395546164794122}, "ground_truth": 1}, {"key": "f51cff1d73cd0f3954376d7b3eab1d5b2ccb354c", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5679183049038814, "res": {"Yes": 0.5679183049038814, "No": 0.4320725348876251}, "ground_truth": 0}, {"key": "f51cff1d73cd0f3954376d7b3eab1d5b2ccb354c", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2881228866774968, "res": {"No": 0.7118634497657068, "Yes": 0.2881228866774968}, "ground_truth": 0}, {"key": "a98f1a0258f641788e570264b79e5c816f80054f", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4306472799142163, "res": {"No": 0.5693366088892085, "Yes": 0.4306472799142163}, "ground_truth": 0}, {"key": "a98f1a0258f641788e570264b79e5c816f80054f", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4731914904631391, "res": {"No": 0.5267678183037484, "Yes": 0.4731914904631391}, "ground_truth": 0}, {"key": "a98f1a0258f641788e570264b79e5c816f80054f", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.34134533552434715, "res": {"No": 0.6586283263628009, "Yes": 0.34134533552434715}, "ground_truth": 1}, {"key": "a98f1a0258f641788e570264b79e5c816f80054f", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.38262166418134025, "res": {"No": 0.6173525280339082, "Yes": 0.38262166418134025}, "ground_truth": 0}, {"key": "a98f1a0258f641788e570264b79e5c816f80054f", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.25943048313264366, "res": {"No": 0.7405429689622378, "Yes": 0.25943048313264366}, "ground_truth": 0}, {"key": "d69d50059c96bd729e9e4c54eee7901d43c0181d", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4343993529019135, "res": {"No": 0.5655713046724556, "Yes": 0.4343993529019135}, "ground_truth": 0}, {"key": "d69d50059c96bd729e9e4c54eee7901d43c0181d", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3482443814920375, "res": {"No": 0.651738872225334, "Yes": 0.3482443814920375}, "ground_truth": 0}, {"key": "d69d50059c96bd729e9e4c54eee7901d43c0181d", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3268303336752907, "res": {"No": 0.6731496577416096, "Yes": 0.3268303336752907}, "ground_truth": 1}, {"key": "d69d50059c96bd729e9e4c54eee7901d43c0181d", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.36939706003725814, "res": {"No": 0.6305831202438847, "Yes": 0.36939706003725814}, "ground_truth": 0}, {"key": "d69d50059c96bd729e9e4c54eee7901d43c0181d", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4178773962031935, "res": {"No": 0.5821006255857949, "Yes": 0.4178773962031935}, "ground_truth": 0}, {"key": "379692c378381f092c0bae0516ea8abc5bdb98b8", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.36510678738912095, "res": {"No": 0.6348736095368523, "Yes": 0.36510678738912095}, "ground_truth": 0}, {"key": "379692c378381f092c0bae0516ea8abc5bdb98b8", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3880867429661114, "res": {"No": 0.6118996380696047, "Yes": 0.3880867429661114}, "ground_truth": 0}, {"key": "379692c378381f092c0bae0516ea8abc5bdb98b8", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5011123257165344, "res": {"Yes": 0.5011123257165344, "No": 0.49886932064638273}, "ground_truth": 1}, {"key": "379692c378381f092c0bae0516ea8abc5bdb98b8", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.41857541591287295, "res": {"No": 0.5814071459483848, "Yes": 0.41857541591287295}, "ground_truth": 0}, {"key": "379692c378381f092c0bae0516ea8abc5bdb98b8", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.44921729278898825, "res": {"No": 0.5507642829500025, "Yes": 0.44921729278898825}, "ground_truth": 0}, {"key": "88b0d86cd06cd05a090f6a6c3a3ed17bf5f96df3", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3381726212877424, "res": {"No": 0.661806785797532, "Yes": 0.3381726212877424}, "ground_truth": 0}, {"key": "88b0d86cd06cd05a090f6a6c3a3ed17bf5f96df3", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.41842697354065966, "res": {"No": 0.5815458280898815, "Yes": 0.41842697354065966}, "ground_truth": 0}, {"key": "88b0d86cd06cd05a090f6a6c3a3ed17bf5f96df3", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.35212420870051864, "res": {"No": 0.6478396285923241, "Yes": 0.35212420870051864}, "ground_truth": 1}, {"key": "88b0d86cd06cd05a090f6a6c3a3ed17bf5f96df3", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.36025956263984293, "res": {"No": 0.639714842828106, "Yes": 0.36025956263984293}, "ground_truth": 0}, {"key": "88b0d86cd06cd05a090f6a6c3a3ed17bf5f96df3", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.359431881858998, "res": {"No": 0.6405391342373382, "Yes": 0.359431881858998}, "ground_truth": 0}, {"key": "3b46e80db81819fdcb8b8044941f64d0fa5abd11", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.29412971543056593, "res": {"No": 0.7058390090495359, "Yes": 0.29412971543056593}, "ground_truth": 0}, {"key": "3b46e80db81819fdcb8b8044941f64d0fa5abd11", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3706740255256361, "res": {"No": 0.629311043336009, "Yes": 0.3706740255256361}, "ground_truth": 0}, {"key": "3b46e80db81819fdcb8b8044941f64d0fa5abd11", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5607170319991819, "res": {"Yes": 0.5607170319991819, "No": 0.4392606308197758}, "ground_truth": 1}, {"key": "3b46e80db81819fdcb8b8044941f64d0fa5abd11", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3997739166658396, "res": {"No": 0.6002026984907836, "Yes": 0.3997739166658396}, "ground_truth": 0}, {"key": "3b46e80db81819fdcb8b8044941f64d0fa5abd11", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.43556982177282044, "res": {"No": 0.5644094024198802, "Yes": 0.43556982177282044}, "ground_truth": 0}, {"key": "786d56f09dae2902696b680395353986051a1139", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.33381965063480823, "res": {"No": 0.6661615302076319, "Yes": 0.33381965063480823}, "ground_truth": 0}, {"key": "786d56f09dae2902696b680395353986051a1139", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3595563027631049, "res": {"No": 0.6404187498157493, "Yes": 0.3595563027631049}, "ground_truth": 0}, {"key": "786d56f09dae2902696b680395353986051a1139", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.532075171399087, "res": {"Yes": 0.532075171399087, "No": 0.46789182637774557}, "ground_truth": 1}, {"key": "786d56f09dae2902696b680395353986051a1139", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4637311559296696, "res": {"No": 0.5362216914340059, "Yes": 0.4637311559296696}, "ground_truth": 0}, {"key": "786d56f09dae2902696b680395353986051a1139", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2861464603827408, "res": {"No": 0.713835239695482, "Yes": 0.2861464603827408}, "ground_truth": 0}, {"key": "bf1a3fd65fdd6e91ccec0663d80fa567eada14df", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.268998466537671, "res": {"No": 0.7309877918407972, "Yes": 0.268998466537671}, "ground_truth": 0}, {"key": "bf1a3fd65fdd6e91ccec0663d80fa567eada14df", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3628329657745159, "res": {"No": 0.6371501352821547, "Yes": 0.3628329657745159}, "ground_truth": 0}, {"key": "bf1a3fd65fdd6e91ccec0663d80fa567eada14df", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3610418136757782, "res": {"No": 0.6389407384405017, "Yes": 0.3610418136757782}, "ground_truth": 1}, {"key": "bf1a3fd65fdd6e91ccec0663d80fa567eada14df", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3161985942433041, "res": {"No": 0.6837833577255914, "Yes": 0.3161985942433041}, "ground_truth": 0}, {"key": "bf1a3fd65fdd6e91ccec0663d80fa567eada14df", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4562385136335706, "res": {"No": 0.5437397974856808, "Yes": 0.4562385136335706}, "ground_truth": 0}, {"key": "a0bd3047a6de80e3924843f1961ce28347c6a548", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.35461290247701277, "res": {"No": 0.645364981135785, "Yes": 0.35461290247701277}, "ground_truth": 0}, {"key": "a0bd3047a6de80e3924843f1961ce28347c6a548", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.30105409884523227, "res": {"No": 0.6989284517490987, "Yes": 0.30105409884523227}, "ground_truth": 0}, {"key": "a0bd3047a6de80e3924843f1961ce28347c6a548", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.33589532509856246, "res": {"No": 0.6640821149888317, "Yes": 0.33589532509856246}, "ground_truth": 1}, {"key": "a0bd3047a6de80e3924843f1961ce28347c6a548", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.2807076619835029, "res": {"No": 0.7192732152835852, "Yes": 0.2807076619835029}, "ground_truth": 0}, {"key": "a0bd3047a6de80e3924843f1961ce28347c6a548", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3315253174083117, "res": {"No": 0.6684603607564998, "Yes": 0.3315253174083117}, "ground_truth": 0}, {"key": "090d322e93af624203dbd5dc83403170071c9e6a", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.03310049639115295, "res": {"No": 0.9668900569278539, "Yes": 0.03310049639115295}, "ground_truth": 0}, {"key": "090d322e93af624203dbd5dc83403170071c9e6a", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.514084772542741, "res": {"Yes": 0.514084772542741, "No": 0.48588529570789385}, "ground_truth": 0}, {"key": "090d322e93af624203dbd5dc83403170071c9e6a", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.49924960393824863, "res": {"No": 0.500718205605844, "Yes": 0.49924960393824863}, "ground_truth": 1}, {"key": "090d322e93af624203dbd5dc83403170071c9e6a", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.25374544487976847, "res": {"No": 0.7462390408584331, "Yes": 0.25374544487976847}, "ground_truth": 0}, {"key": "090d322e93af624203dbd5dc83403170071c9e6a", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.1963574414050827, "res": {"No": 0.8036225461511114, "Yes": 0.1963574414050827}, "ground_truth": 0}, {"key": "ed5179817cb8f597a760800fcdce2b8e64fd30dc", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.42080915988836826, "res": {"No": 0.5791762984107998, "Yes": 0.42080915988836826}, "ground_truth": 0}, {"key": "ed5179817cb8f597a760800fcdce2b8e64fd30dc", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3696079424990578, "res": {"No": 0.6303717589890072, "Yes": 0.3696079424990578}, "ground_truth": 0}, {"key": "ed5179817cb8f597a760800fcdce2b8e64fd30dc", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.41739001772248074, "res": {"No": 0.5825928541383574, "Yes": 0.41739001772248074}, "ground_truth": 1}, {"key": "ed5179817cb8f597a760800fcdce2b8e64fd30dc", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4190084593015321, "res": {"No": 0.5809782838635152, "Yes": 0.4190084593015321}, "ground_truth": 0}, {"key": "ed5179817cb8f597a760800fcdce2b8e64fd30dc", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3414996926403346, "res": {"No": 0.6584763456553713, "Yes": 0.3414996926403346}, "ground_truth": 0}, {"key": "571a8023d0deed8c667dd41fed88290309e56927", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.30381549079858255, "res": {"No": 0.6961678596975673, "Yes": 0.30381549079858255}, "ground_truth": 0}, {"key": "571a8023d0deed8c667dd41fed88290309e56927", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.32132934016156883, "res": {"No": 0.6786458929842566, "Yes": 0.32132934016156883}, "ground_truth": 0}, {"key": "571a8023d0deed8c667dd41fed88290309e56927", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.41128827965762427, "res": {"No": 0.5886894758735118, "Yes": 0.41128827965762427}, "ground_truth": 1}, {"key": "571a8023d0deed8c667dd41fed88290309e56927", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.49767416661511477, "res": {"No": 0.5023022092338475, "Yes": 0.49767416661511477}, "ground_truth": 0}, {"key": "571a8023d0deed8c667dd41fed88290309e56927", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.47753014030299945, "res": {"No": 0.5224484658288814, "Yes": 0.47753014030299945}, "ground_truth": 0}, {"key": "544d7c0cba7df38cc8dda8d0d91c6845cf5f52bb", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.16430518439395725, "res": {"No": 0.8356848004055214, "Yes": 0.16430518439395725}, "ground_truth": 0}, {"key": "544d7c0cba7df38cc8dda8d0d91c6845cf5f52bb", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.10853774351450601, "res": {"No": 0.8914480642196371, "Yes": 0.10853774351450601}, "ground_truth": 0}, {"key": "544d7c0cba7df38cc8dda8d0d91c6845cf5f52bb", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.34078816900175707, "res": {"No": 0.6591894531394011, "Yes": 0.34078816900175707}, "ground_truth": 1}, {"key": "544d7c0cba7df38cc8dda8d0d91c6845cf5f52bb", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3577019432882086, "res": {"No": 0.6422732453698617, "Yes": 0.3577019432882086}, "ground_truth": 0}, {"key": "544d7c0cba7df38cc8dda8d0d91c6845cf5f52bb", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.33144092180651546, "res": {"No": 0.6685374921625492, "Yes": 0.33144092180651546}, "ground_truth": 0}, {"key": "cbd3a0715a91ac8f651aa7b1f2c1fb37a2ef8a69", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2833534558615247, "res": {"No": 0.7166282590413273, "Yes": 0.2833534558615247}, "ground_truth": 0}, {"key": "cbd3a0715a91ac8f651aa7b1f2c1fb37a2ef8a69", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.31235647353412493, "res": {"No": 0.6876212234270732, "Yes": 0.31235647353412493}, "ground_truth": 0}, {"key": "cbd3a0715a91ac8f651aa7b1f2c1fb37a2ef8a69", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.42331883788516467, "res": {"No": 0.5766559581574514, "Yes": 0.42331883788516467}, "ground_truth": 1}, {"key": "cbd3a0715a91ac8f651aa7b1f2c1fb37a2ef8a69", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.31152680284811907, "res": {"No": 0.6884591668359477, "Yes": 0.31152680284811907}, "ground_truth": 0}, {"key": "cbd3a0715a91ac8f651aa7b1f2c1fb37a2ef8a69", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2815647049751061, "res": {"No": 0.7184153035155925, "Yes": 0.2815647049751061}, "ground_truth": 0}, {"key": "4ca8b3cf75948dffea7b96e71bf1d9621107354b", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3262171549160761, "res": {"No": 0.6737577729361663, "Yes": 0.3262171549160761}, "ground_truth": 0}, {"key": "4ca8b3cf75948dffea7b96e71bf1d9621107354b", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3509982644905018, "res": {"No": 0.6489760732895169, "Yes": 0.3509982644905018}, "ground_truth": 0}, {"key": "4ca8b3cf75948dffea7b96e71bf1d9621107354b", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3303387618513963, "res": {"No": 0.6696410476087735, "Yes": 0.3303387618513963}, "ground_truth": 1}, {"key": "4ca8b3cf75948dffea7b96e71bf1d9621107354b", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3527124299807699, "res": {"No": 0.6472629324110513, "Yes": 0.3527124299807699}, "ground_truth": 0}, {"key": "4ca8b3cf75948dffea7b96e71bf1d9621107354b", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2734061938630313, "res": {"No": 0.7265538835766515, "Yes": 0.2734061938630313}, "ground_truth": 0}, {"key": "98f04e49605f36e0236b206fcb26a1549a7c00c6", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.35247619180131023, "res": {"No": 0.6475071130910377, "Yes": 0.35247619180131023}, "ground_truth": 0}, {"key": "98f04e49605f36e0236b206fcb26a1549a7c00c6", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5469650514049101, "res": {"Yes": 0.5469650514049101, "No": 0.45300734930470926}, "ground_truth": 0}, {"key": "98f04e49605f36e0236b206fcb26a1549a7c00c6", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.45838941730796745, "res": {"No": 0.5415909322751222, "Yes": 0.45838941730796745}, "ground_truth": 1}, {"key": "98f04e49605f36e0236b206fcb26a1549a7c00c6", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.42640520472581084, "res": {"No": 0.5735731731813964, "Yes": 0.42640520472581084}, "ground_truth": 0}, {"key": "98f04e49605f36e0236b206fcb26a1549a7c00c6", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5295765256609671, "res": {"Yes": 0.5295765256609671, "No": 0.470403707894501}, "ground_truth": 0}, {"key": "fb0003754a764d7f3252120de58b515d1f882556", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.27403473789263416, "res": {"No": 0.7259539760771077, "Yes": 0.27403473789263416}, "ground_truth": 0}, {"key": "fb0003754a764d7f3252120de58b515d1f882556", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.34054275361404457, "res": {"No": 0.6594338599676867, "Yes": 0.34054275361404457}, "ground_truth": 0}, {"key": "fb0003754a764d7f3252120de58b515d1f882556", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2835967341731372, "res": {"No": 0.716384911912041, "Yes": 0.2835967341731372}, "ground_truth": 1}, {"key": "fb0003754a764d7f3252120de58b515d1f882556", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.44829140519443, "res": {"No": 0.5516803370472686, "Yes": 0.44829140519443}, "ground_truth": 0}, {"key": "fb0003754a764d7f3252120de58b515d1f882556", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.28179123038450565, "res": {"No": 0.7181914945443334, "Yes": 0.28179123038450565}, "ground_truth": 0}, {"key": "0130f10c1d700cf42cad5fd24b242667342c86be", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3091820243360719, "res": {"No": 0.690805638822815, "Yes": 0.3091820243360719}, "ground_truth": 0}, {"key": "0130f10c1d700cf42cad5fd24b242667342c86be", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3686055414805827, "res": {"No": 0.6313839013099325, "Yes": 0.3686055414805827}, "ground_truth": 0}, {"key": "0130f10c1d700cf42cad5fd24b242667342c86be", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5158981070278706, "res": {"Yes": 0.5158981070278706, "No": 0.48408065673632533}, "ground_truth": 1}, {"key": "0130f10c1d700cf42cad5fd24b242667342c86be", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.43306430136815566, "res": {"No": 0.5669215923010477, "Yes": 0.43306430136815566}, "ground_truth": 0}, {"key": "0130f10c1d700cf42cad5fd24b242667342c86be", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4505244636853586, "res": {"No": 0.5494625831475465, "Yes": 0.4505244636853586}, "ground_truth": 0}, {"key": "fb8431bc320c2c1a6c32699da0fdae228ed8d73b", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.5123899616492086, "res": {"Yes": 0.5123899616492086, "No": 0.4875820963271612}, "ground_truth": 0}, {"key": "fb8431bc320c2c1a6c32699da0fdae228ed8d73b", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4518186484064655, "res": {"No": 0.5481592511297443, "Yes": 0.4518186484064655}, "ground_truth": 0}, {"key": "fb8431bc320c2c1a6c32699da0fdae228ed8d73b", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3776245616815899, "res": {"No": 0.6223566427100514, "Yes": 0.3776245616815899}, "ground_truth": 1}, {"key": "fb8431bc320c2c1a6c32699da0fdae228ed8d73b", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.40099626737927496, "res": {"No": 0.5989854330162748, "Yes": 0.40099626737927496}, "ground_truth": 0}, {"key": "fb8431bc320c2c1a6c32699da0fdae228ed8d73b", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.43455824179633323, "res": {"No": 0.5654232857562784, "Yes": 0.43455824179633323}, "ground_truth": 0}, {"key": "875d564015e85f88103edf0b36ce10e292334902", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.33544639895460204, "res": {"No": 0.6645342231820157, "Yes": 0.33544639895460204}, "ground_truth": 0}, {"key": "875d564015e85f88103edf0b36ce10e292334902", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.37009914861924814, "res": {"No": 0.6298865595603386, "Yes": 0.37009914861924814}, "ground_truth": 0}, {"key": "875d564015e85f88103edf0b36ce10e292334902", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5186516841133795, "res": {"Yes": 0.5186516841133795, "No": 0.48132221860738145}, "ground_truth": 1}, {"key": "875d564015e85f88103edf0b36ce10e292334902", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5573691007934459, "res": {"Yes": 0.5573691007934459, "No": 0.4426156025292357}, "ground_truth": 0}, {"key": "875d564015e85f88103edf0b36ce10e292334902", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4688469248775571, "res": {"No": 0.5311363877326903, "Yes": 0.4688469248775571}, "ground_truth": 0}, {"key": "53102b470b50905c033a90940fc6fa87190f7343", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2688705881723232, "res": {"No": 0.7311086487322997, "Yes": 0.2688705881723232}, "ground_truth": 0}, {"key": "53102b470b50905c033a90940fc6fa87190f7343", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3594154561973263, "res": {"No": 0.6405671968715166, "Yes": 0.3594154561973263}, "ground_truth": 0}, {"key": "53102b470b50905c033a90940fc6fa87190f7343", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3415556348715328, "res": {"No": 0.6584285288393994, "Yes": 0.3415556348715328}, "ground_truth": 1}, {"key": "53102b470b50905c033a90940fc6fa87190f7343", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.38292395516433997, "res": {"No": 0.6170583123877192, "Yes": 0.38292395516433997}, "ground_truth": 0}, {"key": "53102b470b50905c033a90940fc6fa87190f7343", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3257681327194731, "res": {"No": 0.6742178369939719, "Yes": 0.3257681327194731}, "ground_truth": 0}, {"key": "58776b77856719aff45f09e50d370f52c756dc10", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.31634088399653404, "res": {"No": 0.6836423761329302, "Yes": 0.31634088399653404}, "ground_truth": 0}, {"key": "58776b77856719aff45f09e50d370f52c756dc10", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.41383551122058565, "res": {"No": 0.5861495366276824, "Yes": 0.41383551122058565}, "ground_truth": 0}, {"key": "58776b77856719aff45f09e50d370f52c756dc10", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.551621310409186, "res": {"Yes": 0.551621310409186, "No": 0.44836059982952037}, "ground_truth": 1}, {"key": "58776b77856719aff45f09e50d370f52c756dc10", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.46191871728112616, "res": {"No": 0.5380523385510247, "Yes": 0.46191871728112616}, "ground_truth": 0}, {"key": "58776b77856719aff45f09e50d370f52c756dc10", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.6028751812200328, "res": {"Yes": 0.6028751812200328, "No": 0.3971058933432532}, "ground_truth": 0}, {"key": "1b1479bb6e290e86e4b2a1e862e06909a0a62f8e", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.28125436657050784, "res": {"No": 0.7187193005938657, "Yes": 0.28125436657050784}, "ground_truth": 0}, {"key": "1b1479bb6e290e86e4b2a1e862e06909a0a62f8e", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3022748730208968, "res": {"No": 0.6977053639639965, "Yes": 0.3022748730208968}, "ground_truth": 0}, {"key": "1b1479bb6e290e86e4b2a1e862e06909a0a62f8e", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3877909008348675, "res": {"No": 0.6121858782215169, "Yes": 0.3877909008348675}, "ground_truth": 1}, {"key": "1b1479bb6e290e86e4b2a1e862e06909a0a62f8e", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.44021001901749474, "res": {"No": 0.559764869118444, "Yes": 0.44021001901749474}, "ground_truth": 0}, {"key": "1b1479bb6e290e86e4b2a1e862e06909a0a62f8e", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2039385206339986, "res": {"No": 0.7960467131482759, "Yes": 0.2039385206339986}, "ground_truth": 0}, {"key": "137019bb8262f8ab776ec6367a0ae4b020fa638e", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3121174249374664, "res": {"No": 0.6878620086418035, "Yes": 0.3121174249374664}, "ground_truth": 0}, {"key": "137019bb8262f8ab776ec6367a0ae4b020fa638e", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3599665026700421, "res": {"No": 0.6400228372896333, "Yes": 0.3599665026700421}, "ground_truth": 0}, {"key": "137019bb8262f8ab776ec6367a0ae4b020fa638e", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4543939345162512, "res": {"No": 0.5455858541136788, "Yes": 0.4543939345162512}, "ground_truth": 1}, {"key": "137019bb8262f8ab776ec6367a0ae4b020fa638e", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.42592789823804555, "res": {"No": 0.5740534032323995, "Yes": 0.42592789823804555}, "ground_truth": 0}, {"key": "137019bb8262f8ab776ec6367a0ae4b020fa638e", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.42519021307773475, "res": {"No": 0.5747865640790076, "Yes": 0.42519021307773475}, "ground_truth": 0}, {"key": "1f738e018a3a193d04ec22612e4fe6b01b4969fe", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.5155623150993099, "res": {"Yes": 0.5155623150993099, "No": 0.48441282093133703}, "ground_truth": 0}, {"key": "1f738e018a3a193d04ec22612e4fe6b01b4969fe", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.25044325777101173, "res": {"No": 0.7495330997297542, "Yes": 0.25044325777101173}, "ground_truth": 0}, {"key": "1f738e018a3a193d04ec22612e4fe6b01b4969fe", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.22124838379547232, "res": {"No": 0.7787304605358037, "Yes": 0.22124838379547232}, "ground_truth": 1}, {"key": "1f738e018a3a193d04ec22612e4fe6b01b4969fe", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.2274859501683416, "res": {"No": 0.7724928082208847, "Yes": 0.2274859501683416}, "ground_truth": 0}, {"key": "1f738e018a3a193d04ec22612e4fe6b01b4969fe", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.1665846650507568, "res": {"No": 0.8333858156475288, "Yes": 0.1665846650507568}, "ground_truth": 0}, {"key": "2bb7b7d5a229ccb9c9b5b4fa1475a54297fc0a25", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.28540196182904776, "res": {"No": 0.7145777715642421, "Yes": 0.28540196182904776}, "ground_truth": 0}, {"key": "2bb7b7d5a229ccb9c9b5b4fa1475a54297fc0a25", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4127632066353589, "res": {"No": 0.5872167951477549, "Yes": 0.4127632066353589}, "ground_truth": 0}, {"key": "2bb7b7d5a229ccb9c9b5b4fa1475a54297fc0a25", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.46451378690287093, "res": {"No": 0.5354511587874751, "Yes": 0.46451378690287093}, "ground_truth": 1}, {"key": "2bb7b7d5a229ccb9c9b5b4fa1475a54297fc0a25", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4545563002949686, "res": {"No": 0.5454196939699903, "Yes": 0.4545563002949686}, "ground_truth": 0}, {"key": "2bb7b7d5a229ccb9c9b5b4fa1475a54297fc0a25", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.35490657257096525, "res": {"No": 0.6450807024085272, "Yes": 0.35490657257096525}, "ground_truth": 0}, {"key": "c136c209869b227e534aca60b6df7e0b26bce7fe", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.09954228004473184, "res": {"No": 0.9004484968045897, "Yes": 0.09954228004473184}, "ground_truth": 0}, {"key": "c136c209869b227e534aca60b6df7e0b26bce7fe", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.416522353061597, "res": {"No": 0.5834579820765308, "Yes": 0.416522353061597}, "ground_truth": 0}, {"key": "c136c209869b227e534aca60b6df7e0b26bce7fe", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.41117269510529686, "res": {"No": 0.5888103285655161, "Yes": 0.41117269510529686}, "ground_truth": 1}, {"key": "c136c209869b227e534aca60b6df7e0b26bce7fe", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4649023013934708, "res": {"No": 0.5350788819808044, "Yes": 0.4649023013934708}, "ground_truth": 0}, {"key": "c136c209869b227e534aca60b6df7e0b26bce7fe", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4834469939426658, "res": {"No": 0.5165324322713996, "Yes": 0.4834469939426658}, "ground_truth": 0}, {"key": "bfbf2be3d5b9da8425253502393528d0f95d2b5d", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.13194158397393507, "res": {"No": 0.8680433267126137, "Yes": 0.13194158397393507}, "ground_truth": 0}, {"key": "bfbf2be3d5b9da8425253502393528d0f95d2b5d", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.464470143237865, "res": {"No": 0.5355009580608726, "Yes": 0.464470143237865}, "ground_truth": 0}, {"key": "bfbf2be3d5b9da8425253502393528d0f95d2b5d", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.46686371254395237, "res": {"No": 0.5331059468454581, "Yes": 0.46686371254395237}, "ground_truth": 1}, {"key": "bfbf2be3d5b9da8425253502393528d0f95d2b5d", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5122143598392896, "res": {"Yes": 0.5122143598392896, "No": 0.4877683882674534}, "ground_truth": 0}, {"key": "bfbf2be3d5b9da8425253502393528d0f95d2b5d", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5281240920856182, "res": {"Yes": 0.5281240920856182, "No": 0.4718496416370421}, "ground_truth": 0}, {"key": "de149988c9768a12f4a656f1d86efe42af51b4c8", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.36013425013737427, "res": {"No": 0.6398302705924745, "Yes": 0.36013425013737427}, "ground_truth": 0}, {"key": "de149988c9768a12f4a656f1d86efe42af51b4c8", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4749791835562047, "res": {"No": 0.524979429008045, "Yes": 0.4749791835562047}, "ground_truth": 0}, {"key": "de149988c9768a12f4a656f1d86efe42af51b4c8", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2841683030430182, "res": {"No": 0.7153344166775897, "Yes": 0.2841683030430182}, "ground_truth": 1}, {"key": "de149988c9768a12f4a656f1d86efe42af51b4c8", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5500004074157422, "res": {"Yes": 0.5500004074157422, "No": 0.4499550455435947}, "ground_truth": 0}, {"key": "de149988c9768a12f4a656f1d86efe42af51b4c8", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.33682162211360933, "res": {"No": 0.6631490981443164, "Yes": 0.33682162211360933}, "ground_truth": 0}, {"key": "45986f43dd61461844801c08c8753a71ccbf9083", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4448605307434547, "res": {"No": 0.5551235207991703, "Yes": 0.4448605307434547}, "ground_truth": 0}, {"key": "45986f43dd61461844801c08c8753a71ccbf9083", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3988693178202833, "res": {"No": 0.6011115646362885, "Yes": 0.3988693178202833}, "ground_truth": 0}, {"key": "45986f43dd61461844801c08c8753a71ccbf9083", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5144579586855363, "res": {"Yes": 0.5144579586855363, "No": 0.4855272185001872}, "ground_truth": 1}, {"key": "45986f43dd61461844801c08c8753a71ccbf9083", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5374948502265916, "res": {"Yes": 0.5374948502265916, "No": 0.4624877772252287}, "ground_truth": 0}, {"key": "45986f43dd61461844801c08c8753a71ccbf9083", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.43615238055808064, "res": {"No": 0.5638356334563488, "Yes": 0.43615238055808064}, "ground_truth": 0}, {"key": "e27256e1314f48219ceca70c273dd8429fc4ca4f", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2464058267692246, "res": {"No": 0.7535725662339361, "Yes": 0.2464058267692246}, "ground_truth": 0}, {"key": "e27256e1314f48219ceca70c273dd8429fc4ca4f", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.1340714784474689, "res": {"No": 0.8659150585894636, "Yes": 0.1340714784474689}, "ground_truth": 0}, {"key": "e27256e1314f48219ceca70c273dd8429fc4ca4f", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2344886483867276, "res": {"No": 0.765490227600885, "Yes": 0.2344886483867276}, "ground_truth": 1}, {"key": "e27256e1314f48219ceca70c273dd8429fc4ca4f", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.2706887614086628, "res": {"No": 0.7292944638988412, "Yes": 0.2706887614086628}, "ground_truth": 0}, {"key": "e27256e1314f48219ceca70c273dd8429fc4ca4f", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.23169889885851652, "res": {"No": 0.7682679757019346, "Yes": 0.23169889885851652}, "ground_truth": 0}, {"key": "8da9fddf6f4a0d798e454ee7b778ce0d7ce3c447", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.28366820955566885, "res": {"No": 0.7163085708398848, "Yes": 0.28366820955566885}, "ground_truth": 0}, {"key": "8da9fddf6f4a0d798e454ee7b778ce0d7ce3c447", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3137792078876537, "res": {"No": 0.6861952436608381, "Yes": 0.3137792078876537}, "ground_truth": 0}, {"key": "8da9fddf6f4a0d798e454ee7b778ce0d7ce3c447", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3662429382739575, "res": {"No": 0.6337265669418688, "Yes": 0.3662429382739575}, "ground_truth": 1}, {"key": "8da9fddf6f4a0d798e454ee7b778ce0d7ce3c447", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3291792314861478, "res": {"No": 0.670801005552146, "Yes": 0.3291792314861478}, "ground_truth": 0}, {"key": "8da9fddf6f4a0d798e454ee7b778ce0d7ce3c447", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.43526806339511914, "res": {"No": 0.5647051513881958, "Yes": 0.43526806339511914}, "ground_truth": 0}, {"key": "d2e357c372891291bbdb4f496792ea9d5807cd3a", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.1443815255662844, "res": {"No": 0.8555990004663211, "Yes": 0.1443815255662844}, "ground_truth": 0}, {"key": "d2e357c372891291bbdb4f496792ea9d5807cd3a", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4784001943958296, "res": {"No": 0.5215607085015972, "Yes": 0.4784001943958296}, "ground_truth": 0}, {"key": "d2e357c372891291bbdb4f496792ea9d5807cd3a", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.24651103987281145, "res": {"No": 0.7534602169401585, "Yes": 0.24651103987281145}, "ground_truth": 1}, {"key": "d2e357c372891291bbdb4f496792ea9d5807cd3a", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.2783443396301057, "res": {"No": 0.7216343780115364, "Yes": 0.2783443396301057}, "ground_truth": 0}, {"key": "d2e357c372891291bbdb4f496792ea9d5807cd3a", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.36785273409349195, "res": {"No": 0.632127540883372, "Yes": 0.36785273409349195}, "ground_truth": 0}, {"key": "2a17cad6a35f54ceab81d03772a75c58aa7147ef", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4350886828722533, "res": {"No": 0.5648730631885558, "Yes": 0.4350886828722533}, "ground_truth": 0}, {"key": "2a17cad6a35f54ceab81d03772a75c58aa7147ef", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4824184442263555, "res": {"No": 0.5175347532804473, "Yes": 0.4824184442263555}, "ground_truth": 0}, {"key": "2a17cad6a35f54ceab81d03772a75c58aa7147ef", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5427517749168348, "res": {"Yes": 0.5427517749168348, "No": 0.4571840655141928}, "ground_truth": 1}, {"key": "2a17cad6a35f54ceab81d03772a75c58aa7147ef", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5458000003863712, "res": {"Yes": 0.5458000003863712, "No": 0.4541658224228696}, "ground_truth": 0}, {"key": "2a17cad6a35f54ceab81d03772a75c58aa7147ef", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5331044541508966, "res": {"Yes": 0.5331044541508966, "No": 0.46686061724779915}, "ground_truth": 0}, {"key": "4b25022fda48e7f91ab1459f1a6dca501318672e", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.36320513287128503, "res": {"No": 0.6367752954742264, "Yes": 0.36320513287128503}, "ground_truth": 0}, {"key": "4b25022fda48e7f91ab1459f1a6dca501318672e", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3963514980865188, "res": {"No": 0.6036242604321198, "Yes": 0.3963514980865188}, "ground_truth": 0}, {"key": "4b25022fda48e7f91ab1459f1a6dca501318672e", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4687716575460147, "res": {"No": 0.5311969832281042, "Yes": 0.4687716575460147}, "ground_truth": 1}, {"key": "4b25022fda48e7f91ab1459f1a6dca501318672e", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.42099666792438334, "res": {"No": 0.5789792150889698, "Yes": 0.42099666792438334}, "ground_truth": 0}, {"key": "4b25022fda48e7f91ab1459f1a6dca501318672e", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.1870328142966009, "res": {"No": 0.8129512616510678, "Yes": 0.1870328142966009}, "ground_truth": 0}, {"key": "62066d76f4ef9e06a1129551c41371bb5d2f7465", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.5170686127654168, "res": {"Yes": 0.5170686127654168, "No": 0.4829059571407091}, "ground_truth": 0}, {"key": "62066d76f4ef9e06a1129551c41371bb5d2f7465", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.25454831962777813, "res": {"No": 0.7454385321657168, "Yes": 0.25454831962777813}, "ground_truth": 0}, {"key": "62066d76f4ef9e06a1129551c41371bb5d2f7465", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3532072796161408, "res": {"No": 0.6467693561693155, "Yes": 0.3532072796161408}, "ground_truth": 1}, {"key": "62066d76f4ef9e06a1129551c41371bb5d2f7465", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.40699263073730946, "res": {"No": 0.592980063665888, "Yes": 0.40699263073730946}, "ground_truth": 0}, {"key": "62066d76f4ef9e06a1129551c41371bb5d2f7465", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.40349063538697666, "res": {"No": 0.5964922678855593, "Yes": 0.40349063538697666}, "ground_truth": 0}, {"key": "16c4a27e42066259797d93d5db0239bce0a15f18", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.45416309743611005, "res": {"No": 0.5458175372221115, "Yes": 0.45416309743611005}, "ground_truth": 0}, {"key": "16c4a27e42066259797d93d5db0239bce0a15f18", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3879828932083666, "res": {"No": 0.6119973172854042, "Yes": 0.3879828932083666}, "ground_truth": 0}, {"key": "16c4a27e42066259797d93d5db0239bce0a15f18", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.505520594753192, "res": {"Yes": 0.505520594753192, "No": 0.49446337271267504}, "ground_truth": 1}, {"key": "16c4a27e42066259797d93d5db0239bce0a15f18", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5090589264883627, "res": {"Yes": 0.5090589264883627, "No": 0.4909101803495694}, "ground_truth": 0}, {"key": "16c4a27e42066259797d93d5db0239bce0a15f18", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.346650469223552, "res": {"No": 0.6533342450120828, "Yes": 0.346650469223552}, "ground_truth": 0}, {"key": "8626e8d0c96f4d12eca92423e0c43c6ea3e53545", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.16559371345165866, "res": {"No": 0.8343797037134788, "Yes": 0.16559371345165866}, "ground_truth": 0}, {"key": "8626e8d0c96f4d12eca92423e0c43c6ea3e53545", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3980330010197837, "res": {"No": 0.6019417193926389, "Yes": 0.3980330010197837}, "ground_truth": 0}, {"key": "8626e8d0c96f4d12eca92423e0c43c6ea3e53545", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4858442692856842, "res": {"No": 0.51413253323656, "Yes": 0.4858442692856842}, "ground_truth": 1}, {"key": "8626e8d0c96f4d12eca92423e0c43c6ea3e53545", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5422977359755027, "res": {"Yes": 0.5422977359755027, "No": 0.4576728506465667}, "ground_truth": 0}, {"key": "8626e8d0c96f4d12eca92423e0c43c6ea3e53545", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5048043250213193, "res": {"Yes": 0.5048043250213193, "No": 0.49517462541947155}, "ground_truth": 0}, {"key": "f675d806e17d1920e6622d3a925367697c94e58e", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.376057696208896, "res": {"No": 0.623919665553947, "Yes": 0.376057696208896}, "ground_truth": 0}, {"key": "f675d806e17d1920e6622d3a925367697c94e58e", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2805642289548959, "res": {"No": 0.7194187389742621, "Yes": 0.2805642289548959}, "ground_truth": 0}, {"key": "f675d806e17d1920e6622d3a925367697c94e58e", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.38273889828457913, "res": {"No": 0.6172402294769948, "Yes": 0.38273889828457913}, "ground_truth": 1}, {"key": "f675d806e17d1920e6622d3a925367697c94e58e", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4067593687330502, "res": {"No": 0.593215820094681, "Yes": 0.4067593687330502}, "ground_truth": 0}, {"key": "f675d806e17d1920e6622d3a925367697c94e58e", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.36942337312697415, "res": {"No": 0.6305594675146541, "Yes": 0.36942337312697415}, "ground_truth": 0}, {"key": "835b6e4637bd2c3fe2644f2cefa00270e950ebdf", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3126891540379852, "res": {"No": 0.6872896224356975, "Yes": 0.3126891540379852}, "ground_truth": 0}, {"key": "835b6e4637bd2c3fe2644f2cefa00270e950ebdf", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5245325992889954, "res": {"Yes": 0.5245325992889954, "No": 0.4754437645125626}, "ground_truth": 0}, {"key": "835b6e4637bd2c3fe2644f2cefa00270e950ebdf", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5660218767636623, "res": {"Yes": 0.5660218767636623, "No": 0.43396167727074325}, "ground_truth": 1}, {"key": "835b6e4637bd2c3fe2644f2cefa00270e950ebdf", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4282142862006434, "res": {"No": 0.5717639772166526, "Yes": 0.4282142862006434}, "ground_truth": 0}, {"key": "835b6e4637bd2c3fe2644f2cefa00270e950ebdf", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.31197850118660575, "res": {"No": 0.688004500256288, "Yes": 0.31197850118660575}, "ground_truth": 0}, {"key": "aec90720358f66b9d149db8afd08fbb821e3aa52", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3589737905440764, "res": {"No": 0.641002757627332, "Yes": 0.3589737905440764}, "ground_truth": 0}, {"key": "aec90720358f66b9d149db8afd08fbb821e3aa52", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.30428086685023675, "res": {"No": 0.6957066133921866, "Yes": 0.30428086685023675}, "ground_truth": 0}, {"key": "aec90720358f66b9d149db8afd08fbb821e3aa52", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5264739954425898, "res": {"Yes": 0.5264739954425898, "No": 0.4734961676750767}, "ground_truth": 1}, {"key": "aec90720358f66b9d149db8afd08fbb821e3aa52", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4199904735440251, "res": {"No": 0.5799854859377745, "Yes": 0.4199904735440251}, "ground_truth": 0}, {"key": "aec90720358f66b9d149db8afd08fbb821e3aa52", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3457537716127571, "res": {"No": 0.6542284150599754, "Yes": 0.3457537716127571}, "ground_truth": 0}, {"key": "0033829349643e2964a8c3ce5fd997116ce43f7c", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2703521528472889, "res": {"No": 0.7296320594903278, "Yes": 0.2703521528472889}, "ground_truth": 0}, {"key": "0033829349643e2964a8c3ce5fd997116ce43f7c", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.35703334435840245, "res": {"No": 0.6429439037478264, "Yes": 0.35703334435840245}, "ground_truth": 0}, {"key": "0033829349643e2964a8c3ce5fd997116ce43f7c", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.43298343849092, "res": {"No": 0.5669909140420365, "Yes": 0.43298343849092}, "ground_truth": 1}, {"key": "0033829349643e2964a8c3ce5fd997116ce43f7c", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.27249486747781937, "res": {"No": 0.7274725737207856, "Yes": 0.27249486747781937}, "ground_truth": 0}, {"key": "0033829349643e2964a8c3ce5fd997116ce43f7c", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.42367174919770867, "res": {"No": 0.5762996084270563, "Yes": 0.42367174919770867}, "ground_truth": 0}, {"key": "3276a47234f0aa9eb7d4d06940ffb912be059454", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.09093921160684214, "res": {"No": 0.9090458054864894, "Yes": 0.09093921160684214}, "ground_truth": 0}, {"key": "3276a47234f0aa9eb7d4d06940ffb912be059454", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5969392377993272, "res": {"Yes": 0.5969392377993272, "No": 0.4030350693910831}, "ground_truth": 0}, {"key": "3276a47234f0aa9eb7d4d06940ffb912be059454", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5594135366448476, "res": {"Yes": 0.5594135366448476, "No": 0.4405542701261978}, "ground_truth": 1}, {"key": "3276a47234f0aa9eb7d4d06940ffb912be059454", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4527485884266781, "res": {"No": 0.547234733349399, "Yes": 0.4527485884266781}, "ground_truth": 0}, {"key": "3276a47234f0aa9eb7d4d06940ffb912be059454", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.43182034728462493, "res": {"No": 0.5681608862094772, "Yes": 0.43182034728462493}, "ground_truth": 0}, {"key": "8d3bb51d711ce00e3fff53625a70923985573834", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.1342023021462053, "res": {"No": 0.8657834148717647, "Yes": 0.1342023021462053}, "ground_truth": 0}, {"key": "8d3bb51d711ce00e3fff53625a70923985573834", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.14092895229908908, "res": {"No": 0.8590485065206762, "Yes": 0.14092895229908908}, "ground_truth": 0}, {"key": "8d3bb51d711ce00e3fff53625a70923985573834", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.27730264167232294, "res": {"No": 0.7226772719433695, "Yes": 0.27730264167232294}, "ground_truth": 1}, {"key": "8d3bb51d711ce00e3fff53625a70923985573834", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.2866627331690924, "res": {"No": 0.7132895357504436, "Yes": 0.2866627331690924}, "ground_truth": 0}, {"key": "8d3bb51d711ce00e3fff53625a70923985573834", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.20748998614875183, "res": {"No": 0.7924858961858324, "Yes": 0.20748998614875183}, "ground_truth": 0}, {"key": "e43fc7e34d05a75894279f55935b25040a6d7017", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2294261242488191, "res": {"No": 0.7705526687280697, "Yes": 0.2294261242488191}, "ground_truth": 0}, {"key": "e43fc7e34d05a75894279f55935b25040a6d7017", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4020628336359906, "res": {"No": 0.5979189316643828, "Yes": 0.4020628336359906}, "ground_truth": 0}, {"key": "e43fc7e34d05a75894279f55935b25040a6d7017", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.29642077408801815, "res": {"No": 0.7035526676892732, "Yes": 0.29642077408801815}, "ground_truth": 1}, {"key": "e43fc7e34d05a75894279f55935b25040a6d7017", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.39725779553780793, "res": {"No": 0.6027216906551194, "Yes": 0.39725779553780793}, "ground_truth": 0}, {"key": "e43fc7e34d05a75894279f55935b25040a6d7017", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2583508329860574, "res": {"No": 0.7416406146362473, "Yes": 0.2583508329860574}, "ground_truth": 0}, {"key": "518daa3f4d9f4eb8d3e6dec7852c8b645d1ceb49", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.11890665564770832, "res": {"No": 0.8810715692203421, "Yes": 0.11890665564770832}, "ground_truth": 0}, {"key": "518daa3f4d9f4eb8d3e6dec7852c8b645d1ceb49", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.31255350764466133, "res": {"No": 0.6874200961346952, "Yes": 0.31255350764466133}, "ground_truth": 0}, {"key": "518daa3f4d9f4eb8d3e6dec7852c8b645d1ceb49", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.38203135216998924, "res": {"No": 0.617924467418318, "Yes": 0.38203135216998924}, "ground_truth": 1}, {"key": "518daa3f4d9f4eb8d3e6dec7852c8b645d1ceb49", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3858784096682323, "res": {"No": 0.6140931943646011, "Yes": 0.3858784096682323}, "ground_truth": 0}, {"key": "518daa3f4d9f4eb8d3e6dec7852c8b645d1ceb49", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.28029482033129205, "res": {"No": 0.7196812308058389, "Yes": 0.28029482033129205}, "ground_truth": 0}, {"key": "a86f10b66912e0221e9ad4d44c37fc7f4e138ed2", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.28502964117463897, "res": {"No": 0.7149494057361526, "Yes": 0.28502964117463897}, "ground_truth": 0}, {"key": "a86f10b66912e0221e9ad4d44c37fc7f4e138ed2", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3848480876857979, "res": {"No": 0.6151004691786616, "Yes": 0.3848480876857979}, "ground_truth": 0}, {"key": "a86f10b66912e0221e9ad4d44c37fc7f4e138ed2", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.24310561510600806, "res": {"No": 0.7568619929687397, "Yes": 0.24310561510600806}, "ground_truth": 1}, {"key": "a86f10b66912e0221e9ad4d44c37fc7f4e138ed2", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.31139271942302105, "res": {"No": 0.6885764696163551, "Yes": 0.31139271942302105}, "ground_truth": 0}, {"key": "a86f10b66912e0221e9ad4d44c37fc7f4e138ed2", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.34047955474401037, "res": {"No": 0.6594863266126813, "Yes": 0.34047955474401037}, "ground_truth": 0}, {"key": "a34829c5a83a3ef39c34b57b65cc48c9bf11f14b", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3669099239063169, "res": {"No": 0.6330705117953993, "Yes": 0.3669099239063169}, "ground_truth": 0}, {"key": "a34829c5a83a3ef39c34b57b65cc48c9bf11f14b", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5149275179850789, "res": {"Yes": 0.5149275179850789, "No": 0.48505635447545187}, "ground_truth": 0}, {"key": "a34829c5a83a3ef39c34b57b65cc48c9bf11f14b", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5115991145631094, "res": {"Yes": 0.5115991145631094, "No": 0.488381752127447}, "ground_truth": 1}, {"key": "a34829c5a83a3ef39c34b57b65cc48c9bf11f14b", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.44887440139680934, "res": {"No": 0.5511001808120529, "Yes": 0.44887440139680934}, "ground_truth": 0}, {"key": "a34829c5a83a3ef39c34b57b65cc48c9bf11f14b", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.452640466866209, "res": {"No": 0.5473409838139303, "Yes": 0.452640466866209}, "ground_truth": 0}, {"key": "05c6b2a21f9c0ffd5f0c76c9ce4c6d3d875fcf39", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.06160305595586801, "res": {"No": 0.938380398570392, "Yes": 0.06160305595586801}, "ground_truth": 0}, {"key": "05c6b2a21f9c0ffd5f0c76c9ce4c6d3d875fcf39", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4499895404195578, "res": {"No": 0.5499775278746915, "Yes": 0.4499895404195578}, "ground_truth": 0}, {"key": "05c6b2a21f9c0ffd5f0c76c9ce4c6d3d875fcf39", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5121872183194827, "res": {"Yes": 0.5121872183194827, "No": 0.4877872164906229}, "ground_truth": 1}, {"key": "05c6b2a21f9c0ffd5f0c76c9ce4c6d3d875fcf39", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.48872647589599133, "res": {"No": 0.5112391113218014, "Yes": 0.48872647589599133}, "ground_truth": 0}, {"key": "05c6b2a21f9c0ffd5f0c76c9ce4c6d3d875fcf39", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3380752415961602, "res": {"No": 0.6618926475611564, "Yes": 0.3380752415961602}, "ground_truth": 0}, {"key": "b3e6507640b009ed51c8c8f8be85d0b7fec5b579", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.510620717593926, "res": {"Yes": 0.510620717593926, "No": 0.4893490502265512}, "ground_truth": 0}, {"key": "b3e6507640b009ed51c8c8f8be85d0b7fec5b579", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5541359639796788, "res": {"Yes": 0.5541359639796788, "No": 0.4458326774595204}, "ground_truth": 0}, {"key": "b3e6507640b009ed51c8c8f8be85d0b7fec5b579", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4378349045118627, "res": {"No": 0.5621332430003148, "Yes": 0.4378349045118627}, "ground_truth": 1}, {"key": "b3e6507640b009ed51c8c8f8be85d0b7fec5b579", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4110039926730992, "res": {"No": 0.58896966453091, "Yes": 0.4110039926730992}, "ground_truth": 0}, {"key": "b3e6507640b009ed51c8c8f8be85d0b7fec5b579", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5053395294896741, "res": {"Yes": 0.5053395294896741, "No": 0.4946371082103624}, "ground_truth": 0}, {"key": "6e930c55a62640d218e4b94f39cb7c83b6ad9117", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.33689529305798976, "res": {"No": 0.6630854455174422, "Yes": 0.33689529305798976}, "ground_truth": 0}, {"key": "6e930c55a62640d218e4b94f39cb7c83b6ad9117", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3192432907346851, "res": {"No": 0.680732128796377, "Yes": 0.3192432907346851}, "ground_truth": 0}, {"key": "6e930c55a62640d218e4b94f39cb7c83b6ad9117", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.512652351974919, "res": {"Yes": 0.512652351974919, "No": 0.487298917772555}, "ground_truth": 1}, {"key": "6e930c55a62640d218e4b94f39cb7c83b6ad9117", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3710610405959655, "res": {"No": 0.6289068031202081, "Yes": 0.3710610405959655}, "ground_truth": 0}, {"key": "6e930c55a62640d218e4b94f39cb7c83b6ad9117", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3925812094907635, "res": {"No": 0.6073879045818671, "Yes": 0.3925812094907635}, "ground_truth": 0}, {"key": "cd13c83364964da3b3659782e6ef0d2b3dc273b1", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4182304845402418, "res": {"No": 0.5817385843152529, "Yes": 0.4182304845402418}, "ground_truth": 0}, {"key": "cd13c83364964da3b3659782e6ef0d2b3dc273b1", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5391989536208895, "res": {"Yes": 0.5391989536208895, "No": 0.4607695926271983}, "ground_truth": 0}, {"key": "cd13c83364964da3b3659782e6ef0d2b3dc273b1", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.42403571237397, "res": {"No": 0.575936135673636, "Yes": 0.42403571237397}, "ground_truth": 1}, {"key": "cd13c83364964da3b3659782e6ef0d2b3dc273b1", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4388809443798594, "res": {"No": 0.5610857124578739, "Yes": 0.4388809443798594}, "ground_truth": 0}, {"key": "cd13c83364964da3b3659782e6ef0d2b3dc273b1", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4169986477476435, "res": {"No": 0.5829765245968984, "Yes": 0.4169986477476435}, "ground_truth": 0}, {"key": "328210eaa50ae54cc3d979958c1d2008faa27401", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.33728449700755725, "res": {"No": 0.6626787408507482, "Yes": 0.33728449700755725}, "ground_truth": 0}, {"key": "328210eaa50ae54cc3d979958c1d2008faa27401", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.049419494852180786, "res": {"No": 0.9505407498099848, "Yes": 0.049419494852180786}, "ground_truth": 0}, {"key": "328210eaa50ae54cc3d979958c1d2008faa27401", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.22503266714618445, "res": {"No": 0.7749517187162989, "Yes": 0.22503266714618445}, "ground_truth": 1}, {"key": "328210eaa50ae54cc3d979958c1d2008faa27401", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4405788493345588, "res": {"No": 0.5593850072819837, "Yes": 0.4405788493345588}, "ground_truth": 0}, {"key": "328210eaa50ae54cc3d979958c1d2008faa27401", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.34390321945002233, "res": {"No": 0.6560707665616867, "Yes": 0.34390321945002233}, "ground_truth": 0}, {"key": "b67b63d1da9cccd6e229d5755065d57dd1653feb", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.28334606043283744, "res": {"No": 0.7166245827477881, "Yes": 0.28334606043283744}, "ground_truth": 0}, {"key": "b67b63d1da9cccd6e229d5755065d57dd1653feb", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.20429004616598545, "res": {"No": 0.7956898537746335, "Yes": 0.20429004616598545}, "ground_truth": 0}, {"key": "b67b63d1da9cccd6e229d5755065d57dd1653feb", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.44201403677464307, "res": {"No": 0.557962551979907, "Yes": 0.44201403677464307}, "ground_truth": 1}, {"key": "b67b63d1da9cccd6e229d5755065d57dd1653feb", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.41026079341412847, "res": {"No": 0.5897150233742244, "Yes": 0.41026079341412847}, "ground_truth": 0}, {"key": "b67b63d1da9cccd6e229d5755065d57dd1653feb", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3154202507346257, "res": {"No": 0.6845635822244601, "Yes": 0.3154202507346257}, "ground_truth": 0}, {"key": "e4c55cb4bb6e5418d8486fb7c9369b6c84b2716d", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4264624449380321, "res": {"No": 0.5735178031000143, "Yes": 0.4264624449380321}, "ground_truth": 0}, {"key": "e4c55cb4bb6e5418d8486fb7c9369b6c84b2716d", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.40463243170869206, "res": {"No": 0.5953233057504018, "Yes": 0.40463243170869206}, "ground_truth": 0}, {"key": "e4c55cb4bb6e5418d8486fb7c9369b6c84b2716d", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3507438829731887, "res": {"No": 0.6492297427027334, "Yes": 0.3507438829731887}, "ground_truth": 1}, {"key": "e4c55cb4bb6e5418d8486fb7c9369b6c84b2716d", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.49842662100696034, "res": {"No": 0.5015459954665699, "Yes": 0.49842662100696034}, "ground_truth": 0}, {"key": "e4c55cb4bb6e5418d8486fb7c9369b6c84b2716d", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2998228136434011, "res": {"No": 0.7001590888324145, "Yes": 0.2998228136434011}, "ground_truth": 0}, {"key": "544b165ad1d22a5dcbbab469a2aa7666c2868361", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.5089666221121241, "res": {"Yes": 0.5089666221121241, "No": 0.49102437933834625}, "ground_truth": 0}, {"key": "544b165ad1d22a5dcbbab469a2aa7666c2868361", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3761318997122929, "res": {"No": 0.6238547188972949, "Yes": 0.3761318997122929}, "ground_truth": 0}, {"key": "544b165ad1d22a5dcbbab469a2aa7666c2868361", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2690935473955434, "res": {"No": 0.7308947576354742, "Yes": 0.2690935473955434}, "ground_truth": 1}, {"key": "544b165ad1d22a5dcbbab469a2aa7666c2868361", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4079381537557361, "res": {"No": 0.5920438357289622, "Yes": 0.4079381537557361}, "ground_truth": 0}, {"key": "544b165ad1d22a5dcbbab469a2aa7666c2868361", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3351085063664095, "res": {"No": 0.6648811341116273, "Yes": 0.3351085063664095}, "ground_truth": 0}, {"key": "2577ef64a22c06cf63cb244abfad9ecc46603208", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3946384696114054, "res": {"No": 0.6053388470304569, "Yes": 0.3946384696114054}, "ground_truth": 0}, {"key": "2577ef64a22c06cf63cb244abfad9ecc46603208", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4087628745077646, "res": {"No": 0.5911793493592956, "Yes": 0.4087628745077646}, "ground_truth": 0}, {"key": "2577ef64a22c06cf63cb244abfad9ecc46603208", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5674393543037556, "res": {"Yes": 0.5674393543037556, "No": 0.432521622344387}, "ground_truth": 1}, {"key": "2577ef64a22c06cf63cb244abfad9ecc46603208", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.47549046964743585, "res": {"No": 0.524485078788211, "Yes": 0.47549046964743585}, "ground_truth": 0}, {"key": "2577ef64a22c06cf63cb244abfad9ecc46603208", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.43246137627863035, "res": {"No": 0.5675096643953741, "Yes": 0.43246137627863035}, "ground_truth": 0}, {"key": "2a73430dd3232ad3da7570a5ac133d7f29a6b6c6", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.13513028389925866, "res": {"No": 0.8648605317682584, "Yes": 0.13513028389925866}, "ground_truth": 0}, {"key": "2a73430dd3232ad3da7570a5ac133d7f29a6b6c6", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.522996137615965, "res": {"Yes": 0.522996137615965, "No": 0.47698060943487497}, "ground_truth": 0}, {"key": "2a73430dd3232ad3da7570a5ac133d7f29a6b6c6", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5707523934863702, "res": {"Yes": 0.5707523934863702, "No": 0.42922874372487213}, "ground_truth": 1}, {"key": "2a73430dd3232ad3da7570a5ac133d7f29a6b6c6", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5494322206841243, "res": {"Yes": 0.5494322206841243, "No": 0.45054769332556116}, "ground_truth": 0}, {"key": "2a73430dd3232ad3da7570a5ac133d7f29a6b6c6", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4569665570899081, "res": {"No": 0.5430163494593055, "Yes": 0.4569665570899081}, "ground_truth": 0}, {"key": "8fbcaf3abc124b7baaa278d382411f43e7e48353", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.15447660778958808, "res": {"No": 0.84550388139799, "Yes": 0.15447660778958808}, "ground_truth": 0}, {"key": "8fbcaf3abc124b7baaa278d382411f43e7e48353", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.47114362271194404, "res": {"No": 0.5288114435909582, "Yes": 0.47114362271194404}, "ground_truth": 0}, {"key": "8fbcaf3abc124b7baaa278d382411f43e7e48353", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3044311274029661, "res": {"No": 0.6955377997959189, "Yes": 0.3044311274029661}, "ground_truth": 1}, {"key": "8fbcaf3abc124b7baaa278d382411f43e7e48353", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.39957160231930455, "res": {"No": 0.6003980662485013, "Yes": 0.39957160231930455}, "ground_truth": 0}, {"key": "8fbcaf3abc124b7baaa278d382411f43e7e48353", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.09223936382488165, "res": {"No": 0.9077214868290727, "Yes": 0.09223936382488165}, "ground_truth": 0}, {"key": "bc04e5a298854589044eb5a162f281f3e7e12c16", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.31184500295875656, "res": {"No": 0.6881378759749748, "Yes": 0.31184500295875656}, "ground_truth": 0}, {"key": "bc04e5a298854589044eb5a162f281f3e7e12c16", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5520705129564926, "res": {"Yes": 0.5520705129564926, "No": 0.44790653301352695}, "ground_truth": 0}, {"key": "bc04e5a298854589044eb5a162f281f3e7e12c16", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5336595981135811, "res": {"Yes": 0.5336595981135811, "No": 0.466319247007983}, "ground_truth": 1}, {"key": "bc04e5a298854589044eb5a162f281f3e7e12c16", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4354741898511685, "res": {"No": 0.5645018602520864, "Yes": 0.4354741898511685}, "ground_truth": 0}, {"key": "bc04e5a298854589044eb5a162f281f3e7e12c16", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.36065220719645913, "res": {"No": 0.6393344599719138, "Yes": 0.36065220719645913}, "ground_truth": 0}, {"key": "8c3b7358950d0684a90736fc39d08340d3ec41ee", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4073739008322025, "res": {"No": 0.5926102187892729, "Yes": 0.4073739008322025}, "ground_truth": 0}, {"key": "8c3b7358950d0684a90736fc39d08340d3ec41ee", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5256549185091913, "res": {"Yes": 0.5256549185091913, "No": 0.4743265834095077}, "ground_truth": 0}, {"key": "8c3b7358950d0684a90736fc39d08340d3ec41ee", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5384307115627271, "res": {"Yes": 0.5384307115627271, "No": 0.4615493208508951}, "ground_truth": 1}, {"key": "8c3b7358950d0684a90736fc39d08340d3ec41ee", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4060139745089398, "res": {"No": 0.5939566452962357, "Yes": 0.4060139745089398}, "ground_truth": 0}, {"key": "8c3b7358950d0684a90736fc39d08340d3ec41ee", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5353772716258939, "res": {"Yes": 0.5353772716258939, "No": 0.4645985405291442}, "ground_truth": 0}, {"key": "d1a86381ca59e3471d2863688229922f83218150", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.34436855992915655, "res": {"No": 0.6556053314346082, "Yes": 0.34436855992915655}, "ground_truth": 0}, {"key": "d1a86381ca59e3471d2863688229922f83218150", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5104661760937463, "res": {"Yes": 0.5104661760937463, "No": 0.4894772275268903}, "ground_truth": 0}, {"key": "d1a86381ca59e3471d2863688229922f83218150", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.39115722102854017, "res": {"No": 0.6088224605285308, "Yes": 0.39115722102854017}, "ground_truth": 1}, {"key": "d1a86381ca59e3471d2863688229922f83218150", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5019705883254186, "res": {"Yes": 0.5019705883254186, "No": 0.49799014067242514}, "ground_truth": 0}, {"key": "d1a86381ca59e3471d2863688229922f83218150", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4594357605030057, "res": {"No": 0.5405356211522414, "Yes": 0.4594357605030057}, "ground_truth": 0}, {"key": "313d98f4c6047ff556795b6da47e42e9f650946a", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.43835574283797346, "res": {"No": 0.5616138134354116, "Yes": 0.43835574283797346}, "ground_truth": 0}, {"key": "313d98f4c6047ff556795b6da47e42e9f650946a", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.47310981519765033, "res": {"No": 0.5268426773293372, "Yes": 0.47310981519765033}, "ground_truth": 0}, {"key": "313d98f4c6047ff556795b6da47e42e9f650946a", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5432568231789701, "res": {"Yes": 0.5432568231789701, "No": 0.4567052606622318}, "ground_truth": 1}, {"key": "313d98f4c6047ff556795b6da47e42e9f650946a", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.39839282618403155, "res": {"No": 0.6015752966132677, "Yes": 0.39839282618403155}, "ground_truth": 0}, {"key": "313d98f4c6047ff556795b6da47e42e9f650946a", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.49977913907389077, "res": {"No": 0.50019112680012, "Yes": 0.49977913907389077}, "ground_truth": 0}, {"key": "8c0934c689bbc9234dc51eff9d8d156aec106ca4", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.12048545966653439, "res": {"No": 0.8795040587299442, "Yes": 0.12048545966653439}, "ground_truth": 0}, {"key": "8c0934c689bbc9234dc51eff9d8d156aec106ca4", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2291953001054317, "res": {"No": 0.7707852334956818, "Yes": 0.2291953001054317}, "ground_truth": 0}, {"key": "8c0934c689bbc9234dc51eff9d8d156aec106ca4", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.30405307621590205, "res": {"No": 0.6959274239630407, "Yes": 0.30405307621590205}, "ground_truth": 1}, {"key": "8c0934c689bbc9234dc51eff9d8d156aec106ca4", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.32609324855642674, "res": {"No": 0.6738733457806242, "Yes": 0.32609324855642674}, "ground_truth": 0}, {"key": "8c0934c689bbc9234dc51eff9d8d156aec106ca4", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2400235345074617, "res": {"No": 0.7599541929138467, "Yes": 0.2400235345074617}, "ground_truth": 0}, {"key": "70f210b63211c32ab22354506b7bd9b80ee194af", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.29238217908663255, "res": {"No": 0.7075976272064266, "Yes": 0.29238217908663255}, "ground_truth": 0}, {"key": "70f210b63211c32ab22354506b7bd9b80ee194af", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.17042311881377817, "res": {"No": 0.8295535537108615, "Yes": 0.17042311881377817}, "ground_truth": 0}, {"key": "70f210b63211c32ab22354506b7bd9b80ee194af", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.32486960257824077, "res": {"No": 0.6751056579924638, "Yes": 0.32486960257824077}, "ground_truth": 1}, {"key": "70f210b63211c32ab22354506b7bd9b80ee194af", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4089690206255012, "res": {"No": 0.5910083695507018, "Yes": 0.4089690206255012}, "ground_truth": 0}, {"key": "70f210b63211c32ab22354506b7bd9b80ee194af", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2496968492294722, "res": {"No": 0.7502784310085991, "Yes": 0.2496968492294722}, "ground_truth": 0}, {"key": "dea4bf4a53b568dea8b3d5009a24d090485be8c9", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.25787404782171514, "res": {"No": 0.7421084110135543, "Yes": 0.25787404782171514}, "ground_truth": 0}, {"key": "dea4bf4a53b568dea8b3d5009a24d090485be8c9", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2372045333761725, "res": {"No": 0.7627742675712209, "Yes": 0.2372045333761725}, "ground_truth": 0}, {"key": "dea4bf4a53b568dea8b3d5009a24d090485be8c9", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.30175450184318814, "res": {"No": 0.6982174791748694, "Yes": 0.30175450184318814}, "ground_truth": 1}, {"key": "dea4bf4a53b568dea8b3d5009a24d090485be8c9", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.34849670626593804, "res": {"No": 0.651480020376195, "Yes": 0.34849670626593804}, "ground_truth": 0}, {"key": "dea4bf4a53b568dea8b3d5009a24d090485be8c9", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.43528965322648694, "res": {"No": 0.5646793110716992, "Yes": 0.43528965322648694}, "ground_truth": 0}, {"key": "3ffb4210cf832dc039ffe7522c47c6eb7a5e5ab5", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.47989012861512503, "res": {"No": 0.5200898156071524, "Yes": 0.47989012861512503}, "ground_truth": 0}, {"key": "3ffb4210cf832dc039ffe7522c47c6eb7a5e5ab5", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4570981686985412, "res": {"No": 0.5428806394838189, "Yes": 0.4570981686985412}, "ground_truth": 0}, {"key": "3ffb4210cf832dc039ffe7522c47c6eb7a5e5ab5", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4192124277770636, "res": {"No": 0.5807690531703776, "Yes": 0.4192124277770636}, "ground_truth": 1}, {"key": "3ffb4210cf832dc039ffe7522c47c6eb7a5e5ab5", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4495827088975406, "res": {"No": 0.5504033532468116, "Yes": 0.4495827088975406}, "ground_truth": 0}, {"key": "3ffb4210cf832dc039ffe7522c47c6eb7a5e5ab5", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.44462198043450124, "res": {"No": 0.5553605147460181, "Yes": 0.44462198043450124}, "ground_truth": 0}, {"key": "c010d72bf64ac4e1c758b7f078aeed89bc57a9db", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2790974988397722, "res": {"No": 0.7208719197412701, "Yes": 0.2790974988397722}, "ground_truth": 0}, {"key": "c010d72bf64ac4e1c758b7f078aeed89bc57a9db", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3888701716663311, "res": {"No": 0.6111139131217423, "Yes": 0.3888701716663311}, "ground_truth": 0}, {"key": "c010d72bf64ac4e1c758b7f078aeed89bc57a9db", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4262138940556369, "res": {"No": 0.573769804819041, "Yes": 0.4262138940556369}, "ground_truth": 1}, {"key": "c010d72bf64ac4e1c758b7f078aeed89bc57a9db", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.2337545139462022, "res": {"No": 0.7662234818733031, "Yes": 0.2337545139462022}, "ground_truth": 0}, {"key": "c010d72bf64ac4e1c758b7f078aeed89bc57a9db", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4020857116620982, "res": {"No": 0.5978947762274868, "Yes": 0.4020857116620982}, "ground_truth": 0}, {"key": "343644770a597a2dfa7548ba165fa9c6bdc88245", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3995088745020029, "res": {"No": 0.6004801462741994, "Yes": 0.3995088745020029}, "ground_truth": 0}, {"key": "343644770a597a2dfa7548ba165fa9c6bdc88245", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3211762953220912, "res": {"No": 0.6788080408558826, "Yes": 0.3211762953220912}, "ground_truth": 0}, {"key": "343644770a597a2dfa7548ba165fa9c6bdc88245", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3869015001208648, "res": {"No": 0.6130706112736851, "Yes": 0.3869015001208648}, "ground_truth": 1}, {"key": "343644770a597a2dfa7548ba165fa9c6bdc88245", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.38277658078400584, "res": {"No": 0.6172080227221022, "Yes": 0.38277658078400584}, "ground_truth": 0}, {"key": "343644770a597a2dfa7548ba165fa9c6bdc88245", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4091043995041956, "res": {"No": 0.5908816118506826, "Yes": 0.4091043995041956}, "ground_truth": 0}, {"key": "4b89e8a6a25de61d21a1fed20fb911cd4b1e4b47", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.40524259143568453, "res": {"No": 0.5947388127382145, "Yes": 0.40524259143568453}, "ground_truth": 0}, {"key": "4b89e8a6a25de61d21a1fed20fb911cd4b1e4b47", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5115182217910791, "res": {"Yes": 0.5115182217910791, "No": 0.4884600948434061}, "ground_truth": 0}, {"key": "4b89e8a6a25de61d21a1fed20fb911cd4b1e4b47", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4837164940416175, "res": {"No": 0.5162706321922114, "Yes": 0.4837164940416175}, "ground_truth": 1}, {"key": "4b89e8a6a25de61d21a1fed20fb911cd4b1e4b47", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4232960554783898, "res": {"No": 0.5766898146226016, "Yes": 0.4232960554783898}, "ground_truth": 0}, {"key": "4b89e8a6a25de61d21a1fed20fb911cd4b1e4b47", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5009447016185654, "res": {"Yes": 0.5009447016185654, "No": 0.49903547179278845}, "ground_truth": 0}, {"key": "9b382d27692cb18aa61af350038b175b3ce75e2b", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4870113257547743, "res": {"No": 0.5129619233783902, "Yes": 0.4870113257547743}, "ground_truth": 0}, {"key": "9b382d27692cb18aa61af350038b175b3ce75e2b", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4665782193379883, "res": {"No": 0.5333991824233613, "Yes": 0.4665782193379883}, "ground_truth": 0}, {"key": "9b382d27692cb18aa61af350038b175b3ce75e2b", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.41148599231137284, "res": {"No": 0.5884918506742676, "Yes": 0.41148599231137284}, "ground_truth": 1}, {"key": "9b382d27692cb18aa61af350038b175b3ce75e2b", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4709750305807369, "res": {"No": 0.5289984379308164, "Yes": 0.4709750305807369}, "ground_truth": 0}, {"key": "9b382d27692cb18aa61af350038b175b3ce75e2b", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3524427786421362, "res": {"No": 0.6475408231888115, "Yes": 0.3524427786421362}, "ground_truth": 0}, {"key": "59be27355e5b86754678f70254cfbb1e6dc8bf0c", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.45514548676901306, "res": {"No": 0.5448323752371212, "Yes": 0.45514548676901306}, "ground_truth": 0}, {"key": "59be27355e5b86754678f70254cfbb1e6dc8bf0c", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5313235081676215, "res": {"Yes": 0.5313235081676215, "No": 0.4686597751050591}, "ground_truth": 0}, {"key": "59be27355e5b86754678f70254cfbb1e6dc8bf0c", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5245294730840198, "res": {"Yes": 0.5245294730840198, "No": 0.47545454294487716}, "ground_truth": 1}, {"key": "59be27355e5b86754678f70254cfbb1e6dc8bf0c", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.41697474190043576, "res": {"No": 0.5830120406085836, "Yes": 0.41697474190043576}, "ground_truth": 0}, {"key": "59be27355e5b86754678f70254cfbb1e6dc8bf0c", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4635439205487718, "res": {"No": 0.5364419092681362, "Yes": 0.4635439205487718}, "ground_truth": 0}, {"key": "ff10934c1dc127286575fb1a6b192da6da21f235", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.20968101065311331, "res": {"No": 0.7903066849951587, "Yes": 0.20968101065311331}, "ground_truth": 0}, {"key": "ff10934c1dc127286575fb1a6b192da6da21f235", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.32034004716750164, "res": {"No": 0.6796399678969193, "Yes": 0.32034004716750164}, "ground_truth": 0}, {"key": "ff10934c1dc127286575fb1a6b192da6da21f235", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4569497501690195, "res": {"No": 0.543030549522509, "Yes": 0.4569497501690195}, "ground_truth": 1}, {"key": "ff10934c1dc127286575fb1a6b192da6da21f235", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4910271781852852, "res": {"No": 0.5089452663207078, "Yes": 0.4910271781852852}, "ground_truth": 0}, {"key": "ff10934c1dc127286575fb1a6b192da6da21f235", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.33037539845154756, "res": {"No": 0.669611791630505, "Yes": 0.33037539845154756}, "ground_truth": 0}, {"key": "1997690ceabc952c3396cb2a30e9050241f6f6c7", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.32845839517224684, "res": {"No": 0.6715213292058925, "Yes": 0.32845839517224684}, "ground_truth": 0}, {"key": "1997690ceabc952c3396cb2a30e9050241f6f6c7", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.17814367345896964, "res": {"No": 0.8218384960950981, "Yes": 0.17814367345896964}, "ground_truth": 0}, {"key": "1997690ceabc952c3396cb2a30e9050241f6f6c7", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4887377313963378, "res": {"No": 0.5112225832285104, "Yes": 0.4887377313963378}, "ground_truth": 1}, {"key": "1997690ceabc952c3396cb2a30e9050241f6f6c7", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.40921344035571156, "res": {"No": 0.5907561987221283, "Yes": 0.40921344035571156}, "ground_truth": 0}, {"key": "1997690ceabc952c3396cb2a30e9050241f6f6c7", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.31999826662497144, "res": {"No": 0.679954129299367, "Yes": 0.31999826662497144}, "ground_truth": 0}, {"key": "c676027eea6109621206e3864c4eea30912fe09f", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.17608083493177246, "res": {"No": 0.8239000293807629, "Yes": 0.17608083493177246}, "ground_truth": 0}, {"key": "c676027eea6109621206e3864c4eea30912fe09f", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3657327147276683, "res": {"No": 0.634239421117092, "Yes": 0.3657327147276683}, "ground_truth": 0}, {"key": "c676027eea6109621206e3864c4eea30912fe09f", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.46457847030571325, "res": {"No": 0.5353888680231859, "Yes": 0.46457847030571325}, "ground_truth": 1}, {"key": "c676027eea6109621206e3864c4eea30912fe09f", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.46444701320067444, "res": {"No": 0.5355273856852493, "Yes": 0.46444701320067444}, "ground_truth": 0}, {"key": "c676027eea6109621206e3864c4eea30912fe09f", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5017313158330189, "res": {"Yes": 0.5017313158330189, "No": 0.4982352619277883}, "ground_truth": 0}, {"key": "78e22dd79495204cf719e3728927241bb60b80e1", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.35489493182629417, "res": {"No": 0.6450816829319401, "Yes": 0.35489493182629417}, "ground_truth": 0}, {"key": "78e22dd79495204cf719e3728927241bb60b80e1", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.28691206611863784, "res": {"No": 0.7130675161453238, "Yes": 0.28691206611863784}, "ground_truth": 0}, {"key": "78e22dd79495204cf719e3728927241bb60b80e1", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.28439971021163546, "res": {"No": 0.715581070732289, "Yes": 0.28439971021163546}, "ground_truth": 1}, {"key": "78e22dd79495204cf719e3728927241bb60b80e1", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.1680211860650004, "res": {"No": 0.8319639570106224, "Yes": 0.1680211860650004}, "ground_truth": 0}, {"key": "78e22dd79495204cf719e3728927241bb60b80e1", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3069425139210835, "res": {"No": 0.6930394881210198, "Yes": 0.3069425139210835}, "ground_truth": 0}, {"key": "6c1a1cf59028ba2567b0a1941e510c40133da1c0", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.44768791841227595, "res": {"No": 0.5522864699617495, "Yes": 0.44768791841227595}, "ground_truth": 0}, {"key": "6c1a1cf59028ba2567b0a1941e510c40133da1c0", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3676226406300165, "res": {"No": 0.6323529787904335, "Yes": 0.3676226406300165}, "ground_truth": 0}, {"key": "6c1a1cf59028ba2567b0a1941e510c40133da1c0", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.45938262982964395, "res": {"No": 0.5405700219337928, "Yes": 0.45938262982964395}, "ground_truth": 1}, {"key": "6c1a1cf59028ba2567b0a1941e510c40133da1c0", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5399660563557308, "res": {"Yes": 0.5399660563557308, "No": 0.4599953951925866}, "ground_truth": 0}, {"key": "6c1a1cf59028ba2567b0a1941e510c40133da1c0", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5645258746720045, "res": {"Yes": 0.5645258746720045, "No": 0.43544703888187153}, "ground_truth": 0}, {"key": "e8bbd5a77edf96611a4b8efcfbc625ec65985e98", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.44025625230135923, "res": {"No": 0.5597254406697496, "Yes": 0.44025625230135923}, "ground_truth": 0}, {"key": "e8bbd5a77edf96611a4b8efcfbc625ec65985e98", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.47697040692875536, "res": {"No": 0.5229739630498342, "Yes": 0.47697040692875536}, "ground_truth": 0}, {"key": "e8bbd5a77edf96611a4b8efcfbc625ec65985e98", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.33966182116177696, "res": {"No": 0.660311081103378, "Yes": 0.33966182116177696}, "ground_truth": 1}, {"key": "e8bbd5a77edf96611a4b8efcfbc625ec65985e98", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.563950481554974, "res": {"Yes": 0.563950481554974, "No": 0.4360099695300122}, "ground_truth": 0}, {"key": "e8bbd5a77edf96611a4b8efcfbc625ec65985e98", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.44863468570043674, "res": {"No": 0.5513209020462082, "Yes": 0.44863468570043674}, "ground_truth": 0}, {"key": "4141e012912dbf29c5e792b8654a9a36094b468c", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.06839599559500983, "res": {"No": 0.9315929803943162, "Yes": 0.06839599559500983}, "ground_truth": 0}, {"key": "4141e012912dbf29c5e792b8654a9a36094b468c", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.41842515338728364, "res": {"No": 0.5815566100494828, "Yes": 0.41842515338728364}, "ground_truth": 0}, {"key": "4141e012912dbf29c5e792b8654a9a36094b468c", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.47245924015100005, "res": {"No": 0.5275169402128398, "Yes": 0.47245924015100005}, "ground_truth": 1}, {"key": "4141e012912dbf29c5e792b8654a9a36094b468c", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4574637190656138, "res": {"No": 0.5425143566753445, "Yes": 0.4574637190656138}, "ground_truth": 0}, {"key": "4141e012912dbf29c5e792b8654a9a36094b468c", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.46212698793909907, "res": {"No": 0.5378546621600083, "Yes": 0.46212698793909907}, "ground_truth": 0}, {"key": "d80fa94dc14601e4b82321e2cf749d6915251a70", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.12687710984552705, "res": {"No": 0.87310322363554, "Yes": 0.12687710984552705}, "ground_truth": 0}, {"key": "d80fa94dc14601e4b82321e2cf749d6915251a70", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3908679890177924, "res": {"No": 0.6091049466062761, "Yes": 0.3908679890177924}, "ground_truth": 0}, {"key": "d80fa94dc14601e4b82321e2cf749d6915251a70", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5067646724745614, "res": {"Yes": 0.5067646724745614, "No": 0.4932056115095916}, "ground_truth": 1}, {"key": "d80fa94dc14601e4b82321e2cf749d6915251a70", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4268807836221722, "res": {"No": 0.5731001074129597, "Yes": 0.4268807836221722}, "ground_truth": 0}, {"key": "d80fa94dc14601e4b82321e2cf749d6915251a70", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.40053459366967503, "res": {"No": 0.5994414943614731, "Yes": 0.40053459366967503}, "ground_truth": 0}, {"key": "11e57b8ed79c4f4b110d81a5cd7152d42411c7a6", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.36435230811568176, "res": {"No": 0.6355865790152198, "Yes": 0.36435230811568176}, "ground_truth": 0}, {"key": "11e57b8ed79c4f4b110d81a5cd7152d42411c7a6", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.41190390381726727, "res": {"No": 0.5880712881981894, "Yes": 0.41190390381726727}, "ground_truth": 0}, {"key": "11e57b8ed79c4f4b110d81a5cd7152d42411c7a6", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3375434284698558, "res": {"No": 0.6624314885359163, "Yes": 0.3375434284698558}, "ground_truth": 1}, {"key": "11e57b8ed79c4f4b110d81a5cd7152d42411c7a6", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3540420102028592, "res": {"No": 0.645932964957234, "Yes": 0.3540420102028592}, "ground_truth": 0}, {"key": "11e57b8ed79c4f4b110d81a5cd7152d42411c7a6", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.36088317066302344, "res": {"No": 0.6390596372051068, "Yes": 0.36088317066302344}, "ground_truth": 0}, {"key": "b55c390e81c5d1d4fa077338d9daaed8978f710b", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4622281401203588, "res": {"No": 0.5377538776401241, "Yes": 0.4622281401203588}, "ground_truth": 0}, {"key": "b55c390e81c5d1d4fa077338d9daaed8978f710b", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.41276037921707737, "res": {"No": 0.5872217277895512, "Yes": 0.41276037921707737}, "ground_truth": 0}, {"key": "b55c390e81c5d1d4fa077338d9daaed8978f710b", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5031838040621059, "res": {"Yes": 0.5031838040621059, "No": 0.4968019913586639}, "ground_truth": 1}, {"key": "b55c390e81c5d1d4fa077338d9daaed8978f710b", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5295841516178434, "res": {"Yes": 0.5295841516178434, "No": 0.4703979219644774}, "ground_truth": 0}, {"key": "b55c390e81c5d1d4fa077338d9daaed8978f710b", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.46886545407416275, "res": {"No": 0.5311219091521047, "Yes": 0.46886545407416275}, "ground_truth": 0}, {"key": "d46e1849cbbf875ccd018fd778fa6c433cab4694", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.49239314814008633, "res": {"No": 0.5075865706764803, "Yes": 0.49239314814008633}, "ground_truth": 0}, {"key": "d46e1849cbbf875ccd018fd778fa6c433cab4694", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.48753912273506494, "res": {"No": 0.5124356893697267, "Yes": 0.48753912273506494}, "ground_truth": 0}, {"key": "d46e1849cbbf875ccd018fd778fa6c433cab4694", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4253749770982646, "res": {"No": 0.5746043856150067, "Yes": 0.4253749770982646}, "ground_truth": 1}, {"key": "d46e1849cbbf875ccd018fd778fa6c433cab4694", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3758352486876384, "res": {"No": 0.6241431561610427, "Yes": 0.3758352486876384}, "ground_truth": 0}, {"key": "d46e1849cbbf875ccd018fd778fa6c433cab4694", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.459636876569603, "res": {"No": 0.5403392207912475, "Yes": 0.459636876569603}, "ground_truth": 0}, {"key": "09b582acc63514f671628e1766ff7e99b258fe9a", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2822880148473407, "res": {"No": 0.7176991994391902, "Yes": 0.2822880148473407}, "ground_truth": 0}, {"key": "09b582acc63514f671628e1766ff7e99b258fe9a", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.28080334345790403, "res": {"No": 0.7191712295726158, "Yes": 0.28080334345790403}, "ground_truth": 0}, {"key": "09b582acc63514f671628e1766ff7e99b258fe9a", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5176834623761034, "res": {"Yes": 0.5176834623761034, "No": 0.48228377179787335}, "ground_truth": 1}, {"key": "09b582acc63514f671628e1766ff7e99b258fe9a", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4225411544273569, "res": {"No": 0.5774395369439232, "Yes": 0.4225411544273569}, "ground_truth": 0}, {"key": "09b582acc63514f671628e1766ff7e99b258fe9a", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.32075169211672333, "res": {"No": 0.6792308662559492, "Yes": 0.32075169211672333}, "ground_truth": 0}, {"key": "c4afe4db222c094c40bb3f271e37e006bcbebf88", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3824613792860789, "res": {"No": 0.6175166174449936, "Yes": 0.3824613792860789}, "ground_truth": 0}, {"key": "c4afe4db222c094c40bb3f271e37e006bcbebf88", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4758007047142204, "res": {"No": 0.5241660266992111, "Yes": 0.4758007047142204}, "ground_truth": 0}, {"key": "c4afe4db222c094c40bb3f271e37e006bcbebf88", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5078705432509876, "res": {"Yes": 0.5078705432509876, "No": 0.49210232818483907}, "ground_truth": 1}, {"key": "c4afe4db222c094c40bb3f271e37e006bcbebf88", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5257199092440861, "res": {"Yes": 0.5257199092440861, "No": 0.47425313488206716}, "ground_truth": 0}, {"key": "c4afe4db222c094c40bb3f271e37e006bcbebf88", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4564777137333814, "res": {"No": 0.543495935950939, "Yes": 0.4564777137333814}, "ground_truth": 0}, {"key": "e21871892c63ec9638bf1bd3c3a1c3ebdae88796", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.29748594464597694, "res": {"No": 0.70249689339327, "Yes": 0.29748594464597694}, "ground_truth": 0}, {"key": "e21871892c63ec9638bf1bd3c3a1c3ebdae88796", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5418639412283695, "res": {"Yes": 0.5418639412283695, "No": 0.458108162539493}, "ground_truth": 0}, {"key": "e21871892c63ec9638bf1bd3c3a1c3ebdae88796", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5870188601164, "res": {"Yes": 0.5870188601164, "No": 0.4129492396463585}, "ground_truth": 1}, {"key": "e21871892c63ec9638bf1bd3c3a1c3ebdae88796", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.42262757292942793, "res": {"No": 0.5773483722126243, "Yes": 0.42262757292942793}, "ground_truth": 0}, {"key": "e21871892c63ec9638bf1bd3c3a1c3ebdae88796", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4783161325159818, "res": {"No": 0.5216511341038877, "Yes": 0.4783161325159818}, "ground_truth": 0}, {"key": "065378e265a6d5ff38b2e6748ba66490cf8670a9", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.1515055531877288, "res": {"No": 0.8484474746460419, "Yes": 0.1515055531877288}, "ground_truth": 0}, {"key": "065378e265a6d5ff38b2e6748ba66490cf8670a9", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.27649932479328465, "res": {"No": 0.7234785651183462, "Yes": 0.27649932479328465}, "ground_truth": 0}, {"key": "065378e265a6d5ff38b2e6748ba66490cf8670a9", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.38751958094284517, "res": {"No": 0.6124475333922608, "Yes": 0.38751958094284517}, "ground_truth": 1}, {"key": "065378e265a6d5ff38b2e6748ba66490cf8670a9", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.38252412722210943, "res": {"No": 0.6174377162127237, "Yes": 0.38252412722210943}, "ground_truth": 0}, {"key": "065378e265a6d5ff38b2e6748ba66490cf8670a9", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5285284730025357, "res": {"Yes": 0.5285284730025357, "No": 0.471427100848508}, "ground_truth": 0}, {"key": "5852e143b34cc30e94aed29193fc2e1dce37b1b1", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.39383773287924373, "res": {"No": 0.6061408101019476, "Yes": 0.39383773287924373}, "ground_truth": 0}, {"key": "5852e143b34cc30e94aed29193fc2e1dce37b1b1", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.48409658325192173, "res": {"No": 0.5158796691590122, "Yes": 0.48409658325192173}, "ground_truth": 0}, {"key": "5852e143b34cc30e94aed29193fc2e1dce37b1b1", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.613401439836728, "res": {"Yes": 0.613401439836728, "No": 0.38657977060486104}, "ground_truth": 1}, {"key": "5852e143b34cc30e94aed29193fc2e1dce37b1b1", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.550020867811459, "res": {"Yes": 0.550020867811459, "No": 0.44996320330252026}, "ground_truth": 0}, {"key": "5852e143b34cc30e94aed29193fc2e1dce37b1b1", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.48096789859079075, "res": {"No": 0.5190137826516369, "Yes": 0.48096789859079075}, "ground_truth": 0}, {"key": "af76b4c33b65666487dcc07f75d37de03ff61207", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.5020282781002199, "res": {"Yes": 0.5020282781002199, "No": 0.49794572193298237}, "ground_truth": 0}, {"key": "af76b4c33b65666487dcc07f75d37de03ff61207", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.471764763521527, "res": {"No": 0.5282144090272433, "Yes": 0.471764763521527}, "ground_truth": 0}, {"key": "af76b4c33b65666487dcc07f75d37de03ff61207", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5295176929771823, "res": {"Yes": 0.5295176929771823, "No": 0.4704618063399311}, "ground_truth": 1}, {"key": "af76b4c33b65666487dcc07f75d37de03ff61207", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.534723092602225, "res": {"Yes": 0.534723092602225, "No": 0.46525385389562296}, "ground_truth": 0}, {"key": "af76b4c33b65666487dcc07f75d37de03ff61207", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.46482692359671984, "res": {"No": 0.5351482326977385, "Yes": 0.46482692359671984}, "ground_truth": 0}, {"key": "9918924dbaed44429c58ea3638c3847317b71127", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4750528822935652, "res": {"No": 0.5249237106484758, "Yes": 0.4750528822935652}, "ground_truth": 0}, {"key": "9918924dbaed44429c58ea3638c3847317b71127", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.38549548427125796, "res": {"No": 0.6144780627268641, "Yes": 0.38549548427125796}, "ground_truth": 0}, {"key": "9918924dbaed44429c58ea3638c3847317b71127", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4080693269571309, "res": {"No": 0.5919078588774346, "Yes": 0.4080693269571309}, "ground_truth": 1}, {"key": "9918924dbaed44429c58ea3638c3847317b71127", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.35959229615045046, "res": {"No": 0.6403888237467984, "Yes": 0.35959229615045046}, "ground_truth": 0}, {"key": "9918924dbaed44429c58ea3638c3847317b71127", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.28918108367000256, "res": {"No": 0.7108025892370422, "Yes": 0.28918108367000256}, "ground_truth": 0}, {"key": "c87e99f422b46c03ce77e7cd064bff87c076e1ed", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2504363456324852, "res": {"No": 0.7495381965721616, "Yes": 0.2504363456324852}, "ground_truth": 0}, {"key": "c87e99f422b46c03ce77e7cd064bff87c076e1ed", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.40801481253669375, "res": {"No": 0.5919507323162959, "Yes": 0.40801481253669375}, "ground_truth": 0}, {"key": "c87e99f422b46c03ce77e7cd064bff87c076e1ed", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5215687927552317, "res": {"Yes": 0.5215687927552317, "No": 0.4783893683219279}, "ground_truth": 1}, {"key": "c87e99f422b46c03ce77e7cd064bff87c076e1ed", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5722537540578426, "res": {"Yes": 0.5722537540578426, "No": 0.42771219978155545}, "ground_truth": 0}, {"key": "c87e99f422b46c03ce77e7cd064bff87c076e1ed", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3868104651714486, "res": {"No": 0.6131613892285165, "Yes": 0.3868104651714486}, "ground_truth": 0}, {"key": "aaca402152fbea1b16e14999374f6394520289a8", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.15996866926767633, "res": {"No": 0.8400180745960695, "Yes": 0.15996866926767633}, "ground_truth": 0}, {"key": "aaca402152fbea1b16e14999374f6394520289a8", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.33503566169562804, "res": {"No": 0.6649345262103605, "Yes": 0.33503566169562804}, "ground_truth": 0}, {"key": "aaca402152fbea1b16e14999374f6394520289a8", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3277784356233544, "res": {"No": 0.6721973540183865, "Yes": 0.3277784356233544}, "ground_truth": 1}, {"key": "aaca402152fbea1b16e14999374f6394520289a8", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3694401231624322, "res": {"No": 0.6305471906413374, "Yes": 0.3694401231624322}, "ground_truth": 0}, {"key": "aaca402152fbea1b16e14999374f6394520289a8", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.38100248940705383, "res": {"No": 0.6189437393550857, "Yes": 0.38100248940705383}, "ground_truth": 0}, {"key": "259d8cccfcb9b9edc00d757ec6efecde6fc06110", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.22532008223181615, "res": {"No": 0.7746667516309067, "Yes": 0.22532008223181615}, "ground_truth": 0}, {"key": "259d8cccfcb9b9edc00d757ec6efecde6fc06110", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4266086314616084, "res": {"No": 0.5733687078544302, "Yes": 0.4266086314616084}, "ground_truth": 0}, {"key": "259d8cccfcb9b9edc00d757ec6efecde6fc06110", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2610732133697725, "res": {"No": 0.7389082647812912, "Yes": 0.2610732133697725}, "ground_truth": 1}, {"key": "259d8cccfcb9b9edc00d757ec6efecde6fc06110", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.344543474690988, "res": {"No": 0.6554343718582679, "Yes": 0.344543474690988}, "ground_truth": 0}, {"key": "259d8cccfcb9b9edc00d757ec6efecde6fc06110", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.21677925527408212, "res": {"No": 0.7832053475244589, "Yes": 0.21677925527408212}, "ground_truth": 0}, {"key": "d076dbce6e84b20153db61c809d0f01f46ef1f45", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.33383036641758096, "res": {"No": 0.6661511381688188, "Yes": 0.33383036641758096}, "ground_truth": 0}, {"key": "d076dbce6e84b20153db61c809d0f01f46ef1f45", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3822373875531962, "res": {"No": 0.6177438188944875, "Yes": 0.3822373875531962}, "ground_truth": 0}, {"key": "d076dbce6e84b20153db61c809d0f01f46ef1f45", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5787648761390891, "res": {"Yes": 0.5787648761390891, "No": 0.42121663719041147}, "ground_truth": 1}, {"key": "d076dbce6e84b20153db61c809d0f01f46ef1f45", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4572463153622748, "res": {"No": 0.5427262933191876, "Yes": 0.4572463153622748}, "ground_truth": 0}, {"key": "d076dbce6e84b20153db61c809d0f01f46ef1f45", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5338404593090794, "res": {"Yes": 0.5338404593090794, "No": 0.4661428718003139}, "ground_truth": 0}, {"key": "c74e983d24450ed9c4ba3a97e345bba707d26853", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4232473580685792, "res": {"No": 0.576730167020643, "Yes": 0.4232473580685792}, "ground_truth": 0}, {"key": "c74e983d24450ed9c4ba3a97e345bba707d26853", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.21947154289430898, "res": {"No": 0.7805051739724498, "Yes": 0.21947154289430898}, "ground_truth": 0}, {"key": "c74e983d24450ed9c4ba3a97e345bba707d26853", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4343081386156393, "res": {"No": 0.5656693832398519, "Yes": 0.4343081386156393}, "ground_truth": 1}, {"key": "c74e983d24450ed9c4ba3a97e345bba707d26853", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4020286195446784, "res": {"No": 0.597941025177083, "Yes": 0.4020286195446784}, "ground_truth": 0}, {"key": "c74e983d24450ed9c4ba3a97e345bba707d26853", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3861624280387953, "res": {"No": 0.6138142015299808, "Yes": 0.3861624280387953}, "ground_truth": 0}, {"key": "0922357b2d0439cf4ba6ec9dc143e11a38b49d8a", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3412338362176348, "res": {"No": 0.6587511061255533, "Yes": 0.3412338362176348}, "ground_truth": 0}, {"key": "0922357b2d0439cf4ba6ec9dc143e11a38b49d8a", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.46553698184330583, "res": {"No": 0.5344487698376391, "Yes": 0.46553698184330583}, "ground_truth": 0}, {"key": "0922357b2d0439cf4ba6ec9dc143e11a38b49d8a", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5185098781996558, "res": {"Yes": 0.5185098781996558, "No": 0.4814752263760501}, "ground_truth": 1}, {"key": "0922357b2d0439cf4ba6ec9dc143e11a38b49d8a", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.503037851611974, "res": {"Yes": 0.503037851611974, "No": 0.4969431230388618}, "ground_truth": 0}, {"key": "0922357b2d0439cf4ba6ec9dc143e11a38b49d8a", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.37543018894088315, "res": {"No": 0.6245559261146453, "Yes": 0.37543018894088315}, "ground_truth": 0}, {"key": "c62ea6aaf31a7f5b6a7c2eac7780c86f9baed786", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4521558760871843, "res": {"No": 0.547813488181554, "Yes": 0.4521558760871843}, "ground_truth": 0}, {"key": "c62ea6aaf31a7f5b6a7c2eac7780c86f9baed786", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3991009047317703, "res": {"No": 0.6008711861190832, "Yes": 0.3991009047317703}, "ground_truth": 0}, {"key": "c62ea6aaf31a7f5b6a7c2eac7780c86f9baed786", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2790521770861867, "res": {"No": 0.7209156924131617, "Yes": 0.2790521770861867}, "ground_truth": 1}, {"key": "c62ea6aaf31a7f5b6a7c2eac7780c86f9baed786", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3942216911614431, "res": {"No": 0.605759279728272, "Yes": 0.3942216911614431}, "ground_truth": 0}, {"key": "c62ea6aaf31a7f5b6a7c2eac7780c86f9baed786", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.45004246230125683, "res": {"No": 0.549934169355609, "Yes": 0.45004246230125683}, "ground_truth": 0}, {"key": "6bb04883f35820a450ff821eda46a4002c0fc7e9", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.23750675827917198, "res": {"No": 0.7624637367440229, "Yes": 0.23750675827917198}, "ground_truth": 0}, {"key": "6bb04883f35820a450ff821eda46a4002c0fc7e9", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.37294735824822717, "res": {"No": 0.6270269480040449, "Yes": 0.37294735824822717}, "ground_truth": 0}, {"key": "6bb04883f35820a450ff821eda46a4002c0fc7e9", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.39640450968067914, "res": {"No": 0.6035691279101125, "Yes": 0.39640450968067914}, "ground_truth": 1}, {"key": "6bb04883f35820a450ff821eda46a4002c0fc7e9", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4301941932634772, "res": {"No": 0.5697832007877114, "Yes": 0.4301941932634772}, "ground_truth": 0}, {"key": "6bb04883f35820a450ff821eda46a4002c0fc7e9", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4610738286004603, "res": {"No": 0.5389090827201436, "Yes": 0.4610738286004603}, "ground_truth": 0}, {"key": "71f87a7cbda73f1c0656d811b0026437e4ee4aa0", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.38329817802443433, "res": {"No": 0.6166807574006734, "Yes": 0.38329817802443433}, "ground_truth": 0}, {"key": "71f87a7cbda73f1c0656d811b0026437e4ee4aa0", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3810697156064447, "res": {"No": 0.6189030141969086, "Yes": 0.3810697156064447}, "ground_truth": 0}, {"key": "71f87a7cbda73f1c0656d811b0026437e4ee4aa0", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.45439786958476297, "res": {"No": 0.5455676864072307, "Yes": 0.45439786958476297}, "ground_truth": 1}, {"key": "71f87a7cbda73f1c0656d811b0026437e4ee4aa0", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.36509072304389406, "res": {"No": 0.6348734952596129, "Yes": 0.36509072304389406}, "ground_truth": 0}, {"key": "71f87a7cbda73f1c0656d811b0026437e4ee4aa0", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3419571986150054, "res": {"No": 0.6579967084664201, "Yes": 0.3419571986150054}, "ground_truth": 0}, {"key": "fa5089d759a9fb810d28c140cf60e3a5fffff8ba", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.40874024193399167, "res": {"No": 0.591241331851348, "Yes": 0.40874024193399167}, "ground_truth": 0}, {"key": "fa5089d759a9fb810d28c140cf60e3a5fffff8ba", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.36213456851640435, "res": {"No": 0.6378320530656586, "Yes": 0.36213456851640435}, "ground_truth": 0}, {"key": "fa5089d759a9fb810d28c140cf60e3a5fffff8ba", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.46553617646502393, "res": {"No": 0.5344305240680939, "Yes": 0.46553617646502393}, "ground_truth": 1}, {"key": "fa5089d759a9fb810d28c140cf60e3a5fffff8ba", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4554238295889679, "res": {"No": 0.5445534597878743, "Yes": 0.4554238295889679}, "ground_truth": 0}, {"key": "fa5089d759a9fb810d28c140cf60e3a5fffff8ba", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.49559804695682363, "res": {"No": 0.5043627135534277, "Yes": 0.49559804695682363}, "ground_truth": 0}, {"key": "1f9aed77465301d7f3787a29555754a03728c4b7", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.38189430390875895, "res": {"No": 0.6180893825652735, "Yes": 0.38189430390875895}, "ground_truth": 0}, {"key": "1f9aed77465301d7f3787a29555754a03728c4b7", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.45897770641642677, "res": {"No": 0.5410039277476476, "Yes": 0.45897770641642677}, "ground_truth": 0}, {"key": "1f9aed77465301d7f3787a29555754a03728c4b7", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4621093627518935, "res": {"No": 0.537873127027508, "Yes": 0.4621093627518935}, "ground_truth": 1}, {"key": "1f9aed77465301d7f3787a29555754a03728c4b7", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.463950042304331, "res": {"No": 0.5360247197704833, "Yes": 0.463950042304331}, "ground_truth": 0}, {"key": "1f9aed77465301d7f3787a29555754a03728c4b7", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4210906027590385, "res": {"No": 0.5788919463396982, "Yes": 0.4210906027590385}, "ground_truth": 0}, {"key": "7237201bcb7291f2ededb818f901a451356db34b", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3978938968772545, "res": {"No": 0.602086112429678, "Yes": 0.3978938968772545}, "ground_truth": 0}, {"key": "7237201bcb7291f2ededb818f901a451356db34b", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.47627402862890666, "res": {"No": 0.5237056181248939, "Yes": 0.47627402862890666}, "ground_truth": 0}, {"key": "7237201bcb7291f2ededb818f901a451356db34b", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3850529198440184, "res": {"No": 0.6149338548803153, "Yes": 0.3850529198440184}, "ground_truth": 1}, {"key": "7237201bcb7291f2ededb818f901a451356db34b", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5133526207801876, "res": {"Yes": 0.5133526207801876, "No": 0.4866196827027455}, "ground_truth": 0}, {"key": "7237201bcb7291f2ededb818f901a451356db34b", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.49649245102160505, "res": {"No": 0.5034787360446678, "Yes": 0.49649245102160505}, "ground_truth": 0}, {"key": "38d13d78fedc889784ffb3cb213a758c758be145", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3404367591537383, "res": {"No": 0.659548123376618, "Yes": 0.3404367591537383}, "ground_truth": 0}, {"key": "38d13d78fedc889784ffb3cb213a758c758be145", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.41568001891339984, "res": {"No": 0.5843017700938071, "Yes": 0.41568001891339984}, "ground_truth": 0}, {"key": "38d13d78fedc889784ffb3cb213a758c758be145", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5977866866235395, "res": {"Yes": 0.5977866866235395, "No": 0.4021932437577971}, "ground_truth": 1}, {"key": "38d13d78fedc889784ffb3cb213a758c758be145", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5105735384193317, "res": {"Yes": 0.5105735384193317, "No": 0.48940652275373053}, "ground_truth": 0}, {"key": "38d13d78fedc889784ffb3cb213a758c758be145", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5056102315007046, "res": {"Yes": 0.5056102315007046, "No": 0.494376572342521}, "ground_truth": 0}, {"key": "98fd4b91ec12241a96493471a3e21c9ab96ec523", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.5545627575314428, "res": {"Yes": 0.5545627575314428, "No": 0.445413355144581}, "ground_truth": 0}, {"key": "98fd4b91ec12241a96493471a3e21c9ab96ec523", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5926828772559695, "res": {"Yes": 0.5926828772559695, "No": 0.4072878888407684}, "ground_truth": 0}, {"key": "98fd4b91ec12241a96493471a3e21c9ab96ec523", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5324138918075674, "res": {"Yes": 0.5324138918075674, "No": 0.46755608478236743}, "ground_truth": 1}, {"key": "98fd4b91ec12241a96493471a3e21c9ab96ec523", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5851410205684486, "res": {"Yes": 0.5851410205684486, "No": 0.4148337464422443}, "ground_truth": 0}, {"key": "98fd4b91ec12241a96493471a3e21c9ab96ec523", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.6115818081840535, "res": {"Yes": 0.6115818081840535, "No": 0.3884011584299608}, "ground_truth": 0}, {"key": "8cefdd23a829375abeb18b9bd44bb053a67dfdf8", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3495901537248443, "res": {"No": 0.6503892494636974, "Yes": 0.3495901537248443}, "ground_truth": 0}, {"key": "8cefdd23a829375abeb18b9bd44bb053a67dfdf8", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.531345186608992, "res": {"Yes": 0.531345186608992, "No": 0.46863598185227373}, "ground_truth": 0}, {"key": "8cefdd23a829375abeb18b9bd44bb053a67dfdf8", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4914781480983791, "res": {"No": 0.5085050156111919, "Yes": 0.4914781480983791}, "ground_truth": 1}, {"key": "8cefdd23a829375abeb18b9bd44bb053a67dfdf8", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.48275236435767843, "res": {"No": 0.517229601281133, "Yes": 0.48275236435767843}, "ground_truth": 0}, {"key": "8cefdd23a829375abeb18b9bd44bb053a67dfdf8", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4010021219675171, "res": {"No": 0.598982761547201, "Yes": 0.4010021219675171}, "ground_truth": 0}, {"key": "3de916a84cae6c1dee0076f4e5d6e26fc95c3242", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.436975984838317, "res": {"No": 0.563002899931766, "Yes": 0.436975984838317}, "ground_truth": 0}, {"key": "3de916a84cae6c1dee0076f4e5d6e26fc95c3242", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4800070440901824, "res": {"No": 0.5199754708300222, "Yes": 0.4800070440901824}, "ground_truth": 0}, {"key": "3de916a84cae6c1dee0076f4e5d6e26fc95c3242", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3316029365700853, "res": {"No": 0.6683822557267207, "Yes": 0.3316029365700853}, "ground_truth": 1}, {"key": "3de916a84cae6c1dee0076f4e5d6e26fc95c3242", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.47994579909887003, "res": {"No": 0.5200370603720479, "Yes": 0.47994579909887003}, "ground_truth": 0}, {"key": "3de916a84cae6c1dee0076f4e5d6e26fc95c3242", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4528096683310038, "res": {"No": 0.5471749457486893, "Yes": 0.4528096683310038}, "ground_truth": 0}, {"key": "24611abacb038c28a84c76b10165e474ac0fdd7e", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4173882646880877, "res": {"No": 0.5825715025015243, "Yes": 0.4173882646880877}, "ground_truth": 0}, {"key": "24611abacb038c28a84c76b10165e474ac0fdd7e", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3324093458900644, "res": {"No": 0.6675712518900111, "Yes": 0.3324093458900644}, "ground_truth": 0}, {"key": "24611abacb038c28a84c76b10165e474ac0fdd7e", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2916797865054225, "res": {"No": 0.7082876234315724, "Yes": 0.2916797865054225}, "ground_truth": 1}, {"key": "24611abacb038c28a84c76b10165e474ac0fdd7e", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.30557100656384006, "res": {"No": 0.6944090033960033, "Yes": 0.30557100656384006}, "ground_truth": 0}, {"key": "24611abacb038c28a84c76b10165e474ac0fdd7e", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3580006775447955, "res": {"No": 0.6419777328037447, "Yes": 0.3580006775447955}, "ground_truth": 0}, {"key": "4f3178963b84a0e3625f7486b045872c665073d7", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.27184625811658075, "res": {"No": 0.7281369211261521, "Yes": 0.27184625811658075}, "ground_truth": 0}, {"key": "4f3178963b84a0e3625f7486b045872c665073d7", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3888608155625552, "res": {"No": 0.6111073131271207, "Yes": 0.3888608155625552}, "ground_truth": 0}, {"key": "4f3178963b84a0e3625f7486b045872c665073d7", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4979201779726554, "res": {"No": 0.5020005970241398, "Yes": 0.4979201779726554}, "ground_truth": 1}, {"key": "4f3178963b84a0e3625f7486b045872c665073d7", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.37252568146828474, "res": {"No": 0.6274419325669447, "Yes": 0.37252568146828474}, "ground_truth": 0}, {"key": "4f3178963b84a0e3625f7486b045872c665073d7", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.44159831486611667, "res": {"No": 0.5583710016639554, "Yes": 0.44159831486611667}, "ground_truth": 0}, {"key": "6ff0470115b556c80229f4305c23afa683c9f8da", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4855330351511065, "res": {"No": 0.5144395928642432, "Yes": 0.4855330351511065}, "ground_truth": 0}, {"key": "6ff0470115b556c80229f4305c23afa683c9f8da", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4559688286627512, "res": {"No": 0.544001214278005, "Yes": 0.4559688286627512}, "ground_truth": 0}, {"key": "6ff0470115b556c80229f4305c23afa683c9f8da", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4115983227425724, "res": {"No": 0.5883786004311072, "Yes": 0.4115983227425724}, "ground_truth": 1}, {"key": "6ff0470115b556c80229f4305c23afa683c9f8da", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.36554068233801296, "res": {"No": 0.634437436199374, "Yes": 0.36554068233801296}, "ground_truth": 0}, {"key": "6ff0470115b556c80229f4305c23afa683c9f8da", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5418533153806687, "res": {"Yes": 0.5418533153806687, "No": 0.45811490594127685}, "ground_truth": 0}, {"key": "4be9a7e5e9115eb681541c4a8d247ac84f0eb774", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.44832877184028636, "res": {"No": 0.5516484287793698, "Yes": 0.44832877184028636}, "ground_truth": 0}, {"key": "4be9a7e5e9115eb681541c4a8d247ac84f0eb774", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5187460613730147, "res": {"Yes": 0.5187460613730147, "No": 0.48123262400289}, "ground_truth": 0}, {"key": "4be9a7e5e9115eb681541c4a8d247ac84f0eb774", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4123572680225901, "res": {"No": 0.5876236423750054, "Yes": 0.4123572680225901}, "ground_truth": 1}, {"key": "4be9a7e5e9115eb681541c4a8d247ac84f0eb774", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4831554908071646, "res": {"No": 0.5168298339233993, "Yes": 0.4831554908071646}, "ground_truth": 0}, {"key": "4be9a7e5e9115eb681541c4a8d247ac84f0eb774", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.44504743811140496, "res": {"No": 0.5549310817352028, "Yes": 0.44504743811140496}, "ground_truth": 0}, {"key": "de3ba4e29501a10effcbb5a61e7b9c0ab3721669", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4729273009892743, "res": {"No": 0.5270504207341368, "Yes": 0.4729273009892743}, "ground_truth": 0}, {"key": "de3ba4e29501a10effcbb5a61e7b9c0ab3721669", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5876947079989743, "res": {"Yes": 0.5876947079989743, "No": 0.412276585846585}, "ground_truth": 0}, {"key": "de3ba4e29501a10effcbb5a61e7b9c0ab3721669", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.49677369432314333, "res": {"No": 0.5032002383135165, "Yes": 0.49677369432314333}, "ground_truth": 1}, {"key": "de3ba4e29501a10effcbb5a61e7b9c0ab3721669", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.40999949130666685, "res": {"No": 0.5899816171149872, "Yes": 0.40999949130666685}, "ground_truth": 0}, {"key": "de3ba4e29501a10effcbb5a61e7b9c0ab3721669", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.40223994110412586, "res": {"No": 0.5977283454900186, "Yes": 0.40223994110412586}, "ground_truth": 0}, {"key": "d23090b75384c925e7f20b1ec5218394b52eae38", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.19212132605245716, "res": {"No": 0.8078573120929153, "Yes": 0.19212132605245716}, "ground_truth": 0}, {"key": "d23090b75384c925e7f20b1ec5218394b52eae38", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.33177186495964345, "res": {"No": 0.6681885064303955, "Yes": 0.33177186495964345}, "ground_truth": 0}, {"key": "d23090b75384c925e7f20b1ec5218394b52eae38", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.35704394840619924, "res": {"No": 0.6429396860496518, "Yes": 0.35704394840619924}, "ground_truth": 1}, {"key": "d23090b75384c925e7f20b1ec5218394b52eae38", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.44860886303111136, "res": {"No": 0.5513680254574963, "Yes": 0.44860886303111136}, "ground_truth": 0}, {"key": "d23090b75384c925e7f20b1ec5218394b52eae38", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.35441703296233024, "res": {"No": 0.6455536876254404, "Yes": 0.35441703296233024}, "ground_truth": 0}, {"key": "98008fdd5ea99bc7c437d6e1aa86c750bf667415", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3634117463753302, "res": {"No": 0.6365714708590803, "Yes": 0.3634117463753302}, "ground_truth": 0}, {"key": "98008fdd5ea99bc7c437d6e1aa86c750bf667415", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.34226253164936793, "res": {"No": 0.6577243081696329, "Yes": 0.34226253164936793}, "ground_truth": 0}, {"key": "98008fdd5ea99bc7c437d6e1aa86c750bf667415", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4536195308799779, "res": {"No": 0.5463603168055424, "Yes": 0.4536195308799779}, "ground_truth": 1}, {"key": "98008fdd5ea99bc7c437d6e1aa86c750bf667415", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3414866817499041, "res": {"No": 0.6584965480195587, "Yes": 0.3414866817499041}, "ground_truth": 0}, {"key": "98008fdd5ea99bc7c437d6e1aa86c750bf667415", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3576326273685289, "res": {"No": 0.6423324978097239, "Yes": 0.3576326273685289}, "ground_truth": 0}, {"key": "fd13c81373473f862d0f3559ffbc2a03fa95f492", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2920541350179766, "res": {"No": 0.7079235509519379, "Yes": 0.2920541350179766}, "ground_truth": 0}, {"key": "fd13c81373473f862d0f3559ffbc2a03fa95f492", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3470796453041521, "res": {"No": 0.6529041137984568, "Yes": 0.3470796453041521}, "ground_truth": 0}, {"key": "fd13c81373473f862d0f3559ffbc2a03fa95f492", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5663240432652497, "res": {"Yes": 0.5663240432652497, "No": 0.43365039976116193}, "ground_truth": 1}, {"key": "fd13c81373473f862d0f3559ffbc2a03fa95f492", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.49467306963531127, "res": {"No": 0.505302054899742, "Yes": 0.49467306963531127}, "ground_truth": 0}, {"key": "fd13c81373473f862d0f3559ffbc2a03fa95f492", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5084676927128131, "res": {"Yes": 0.5084676927128131, "No": 0.49151332444820917}, "ground_truth": 0}, {"key": "ad275932d9fb03762cc781e06955fb088e165ad9", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2256674553139137, "res": {"No": 0.7743161471026802, "Yes": 0.2256674553139137}, "ground_truth": 0}, {"key": "ad275932d9fb03762cc781e06955fb088e165ad9", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3550079128630726, "res": {"No": 0.6449560203596599, "Yes": 0.3550079128630726}, "ground_truth": 0}, {"key": "ad275932d9fb03762cc781e06955fb088e165ad9", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4019107218411094, "res": {"No": 0.5980701644847849, "Yes": 0.4019107218411094}, "ground_truth": 1}, {"key": "ad275932d9fb03762cc781e06955fb088e165ad9", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4700858694539362, "res": {"No": 0.5298924063136755, "Yes": 0.4700858694539362}, "ground_truth": 0}, {"key": "ad275932d9fb03762cc781e06955fb088e165ad9", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.29734077718739554, "res": {"No": 0.7026282725959071, "Yes": 0.29734077718739554}, "ground_truth": 0}, {"key": "f9fedde4734d8ac3431c0ed5d338efcd799b1aea", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.11602943152664247, "res": {"No": 0.8839458092295955, "Yes": 0.11602943152664247}, "ground_truth": 0}, {"key": "f9fedde4734d8ac3431c0ed5d338efcd799b1aea", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3695470915610163, "res": {"No": 0.6304266730646806, "Yes": 0.3695470915610163}, "ground_truth": 0}, {"key": "f9fedde4734d8ac3431c0ed5d338efcd799b1aea", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.44846823274134456, "res": {"No": 0.5515064748071599, "Yes": 0.44846823274134456}, "ground_truth": 1}, {"key": "f9fedde4734d8ac3431c0ed5d338efcd799b1aea", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4419131983560968, "res": {"No": 0.5580598021676012, "Yes": 0.4419131983560968}, "ground_truth": 0}, {"key": "f9fedde4734d8ac3431c0ed5d338efcd799b1aea", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2961175426309081, "res": {"No": 0.7038632210371599, "Yes": 0.2961175426309081}, "ground_truth": 0}, {"key": "bbe84a85e38aac4e2cff76da7ce4ed349474db25", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2899328310204149, "res": {"No": 0.7100412159484706, "Yes": 0.2899328310204149}, "ground_truth": 0}, {"key": "bbe84a85e38aac4e2cff76da7ce4ed349474db25", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3330486178863158, "res": {"No": 0.6669302374939001, "Yes": 0.3330486178863158}, "ground_truth": 0}, {"key": "bbe84a85e38aac4e2cff76da7ce4ed349474db25", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.32948735569315396, "res": {"No": 0.6704870100334333, "Yes": 0.32948735569315396}, "ground_truth": 1}, {"key": "bbe84a85e38aac4e2cff76da7ce4ed349474db25", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.39553966738590945, "res": {"No": 0.6044399815262452, "Yes": 0.39553966738590945}, "ground_truth": 0}, {"key": "bbe84a85e38aac4e2cff76da7ce4ed349474db25", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3502730800516059, "res": {"No": 0.6497063158265433, "Yes": 0.3502730800516059}, "ground_truth": 0}, {"key": "45884af9aff9abf54fec27f510c3f9ed000fc6e1", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3334903331845276, "res": {"No": 0.6664976934896796, "Yes": 0.3334903331845276}, "ground_truth": 0}, {"key": "45884af9aff9abf54fec27f510c3f9ed000fc6e1", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.25546828658429993, "res": {"No": 0.7445140167277807, "Yes": 0.25546828658429993}, "ground_truth": 0}, {"key": "45884af9aff9abf54fec27f510c3f9ed000fc6e1", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2929752044490044, "res": {"No": 0.707013937288867, "Yes": 0.2929752044490044}, "ground_truth": 1}, {"key": "45884af9aff9abf54fec27f510c3f9ed000fc6e1", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.33166034200670286, "res": {"No": 0.6683232936449761, "Yes": 0.33166034200670286}, "ground_truth": 0}, {"key": "45884af9aff9abf54fec27f510c3f9ed000fc6e1", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.309817024287351, "res": {"No": 0.6901690785719162, "Yes": 0.309817024287351}, "ground_truth": 0}, {"key": "59e60ba3c7ea5f5991db8ed4afd368c69e697dde", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.33408056584110196, "res": {"No": 0.6658968102571163, "Yes": 0.33408056584110196}, "ground_truth": 0}, {"key": "59e60ba3c7ea5f5991db8ed4afd368c69e697dde", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4239776446594378, "res": {"No": 0.5759978447769097, "Yes": 0.4239776446594378}, "ground_truth": 0}, {"key": "59e60ba3c7ea5f5991db8ed4afd368c69e697dde", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5622396480285397, "res": {"Yes": 0.5622396480285397, "No": 0.43770482995216903}, "ground_truth": 1}, {"key": "59e60ba3c7ea5f5991db8ed4afd368c69e697dde", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6197766548226045, "res": {"Yes": 0.6197766548226045, "No": 0.38016411420802854}, "ground_truth": 0}, {"key": "59e60ba3c7ea5f5991db8ed4afd368c69e697dde", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5060040326815733, "res": {"Yes": 0.5060040326815733, "No": 0.49395636285378397}, "ground_truth": 0}, {"key": "265ad4cbd8cbad94527db327a39c2d8f33db33ce", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.264733637499691, "res": {"No": 0.7352485306396592, "Yes": 0.264733637499691}, "ground_truth": 0}, {"key": "265ad4cbd8cbad94527db327a39c2d8f33db33ce", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5793073229078122, "res": {"Yes": 0.5793073229078122, "No": 0.42067000027352514}, "ground_truth": 0}, {"key": "265ad4cbd8cbad94527db327a39c2d8f33db33ce", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5392956999835317, "res": {"Yes": 0.5392956999835317, "No": 0.4606817598845429}, "ground_truth": 1}, {"key": "265ad4cbd8cbad94527db327a39c2d8f33db33ce", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.41549878037908644, "res": {"No": 0.5844827563762846, "Yes": 0.41549878037908644}, "ground_truth": 0}, {"key": "265ad4cbd8cbad94527db327a39c2d8f33db33ce", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.45561387205077347, "res": {"No": 0.5443683213084362, "Yes": 0.45561387205077347}, "ground_truth": 0}, {"key": "54e68f9b7cdd209c09b2a47feef62b9082346031", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.21647734046421185, "res": {"No": 0.7835002328453138, "Yes": 0.21647734046421185}, "ground_truth": 0}, {"key": "54e68f9b7cdd209c09b2a47feef62b9082346031", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5244073558589459, "res": {"Yes": 0.5244073558589459, "No": 0.4755460767542406}, "ground_truth": 0}, {"key": "54e68f9b7cdd209c09b2a47feef62b9082346031", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5235541058253885, "res": {"Yes": 0.5235541058253885, "No": 0.4764087665557312}, "ground_truth": 1}, {"key": "54e68f9b7cdd209c09b2a47feef62b9082346031", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5650257068619672, "res": {"Yes": 0.5650257068619672, "No": 0.43493489315643835}, "ground_truth": 0}, {"key": "54e68f9b7cdd209c09b2a47feef62b9082346031", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5232446459534089, "res": {"Yes": 0.5232446459534089, "No": 0.476729635821078}, "ground_truth": 0}, {"key": "3b5e3dff560e2233e8370c87e9d92461200f8bd4", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.23566921229369778, "res": {"No": 0.7643128225648202, "Yes": 0.23566921229369778}, "ground_truth": 0}, {"key": "3b5e3dff560e2233e8370c87e9d92461200f8bd4", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.36029724776099253, "res": {"No": 0.6396853910347277, "Yes": 0.36029724776099253}, "ground_truth": 0}, {"key": "3b5e3dff560e2233e8370c87e9d92461200f8bd4", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.38723522270003286, "res": {"No": 0.6127456412636693, "Yes": 0.38723522270003286}, "ground_truth": 1}, {"key": "3b5e3dff560e2233e8370c87e9d92461200f8bd4", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3968689950628294, "res": {"No": 0.603109944311529, "Yes": 0.3968689950628294}, "ground_truth": 0}, {"key": "3b5e3dff560e2233e8370c87e9d92461200f8bd4", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.48611123629768915, "res": {"No": 0.5138697758520792, "Yes": 0.48611123629768915}, "ground_truth": 0}, {"key": "242c51d129c998a2585feee12e109a2058eb7c2b", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4136360078412055, "res": {"No": 0.5863443113039092, "Yes": 0.4136360078412055}, "ground_truth": 0}, {"key": "242c51d129c998a2585feee12e109a2058eb7c2b", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4592723912276462, "res": {"No": 0.5407104261991937, "Yes": 0.4592723912276462}, "ground_truth": 0}, {"key": "242c51d129c998a2585feee12e109a2058eb7c2b", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4473845917762724, "res": {"No": 0.5526064193112177, "Yes": 0.4473845917762724}, "ground_truth": 1}, {"key": "242c51d129c998a2585feee12e109a2058eb7c2b", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3865434492760178, "res": {"No": 0.6134409318926604, "Yes": 0.3865434492760178}, "ground_truth": 0}, {"key": "242c51d129c998a2585feee12e109a2058eb7c2b", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3860213500641056, "res": {"No": 0.6139657709668156, "Yes": 0.3860213500641056}, "ground_truth": 0}, {"key": "5ce92746bc1f4f5cbb25956c1289ffcd7f50f07e", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.42261112458437644, "res": {"No": 0.5773611490734768, "Yes": 0.42261112458437644}, "ground_truth": 0}, {"key": "5ce92746bc1f4f5cbb25956c1289ffcd7f50f07e", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.21588716328314095, "res": {"No": 0.7840752508584551, "Yes": 0.21588716328314095}, "ground_truth": 0}, {"key": "5ce92746bc1f4f5cbb25956c1289ffcd7f50f07e", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5285845528485045, "res": {"Yes": 0.5285845528485045, "No": 0.4713908023592524}, "ground_truth": 1}, {"key": "5ce92746bc1f4f5cbb25956c1289ffcd7f50f07e", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4722790313279011, "res": {"No": 0.5276980678671979, "Yes": 0.4722790313279011}, "ground_truth": 0}, {"key": "5ce92746bc1f4f5cbb25956c1289ffcd7f50f07e", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.35990480969872307, "res": {"No": 0.6400681973154255, "Yes": 0.35990480969872307}, "ground_truth": 0}, {"key": "2f32a9eebbfa707a264d82a3bef5557f6b3b5f20", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.22254425461660005, "res": {"No": 0.77744797527274, "Yes": 0.22254425461660005}, "ground_truth": 0}, {"key": "2f32a9eebbfa707a264d82a3bef5557f6b3b5f20", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4281365466722487, "res": {"No": 0.5718488504425014, "Yes": 0.4281365466722487}, "ground_truth": 0}, {"key": "2f32a9eebbfa707a264d82a3bef5557f6b3b5f20", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3797527104776733, "res": {"No": 0.6202323395073731, "Yes": 0.3797527104776733}, "ground_truth": 1}, {"key": "2f32a9eebbfa707a264d82a3bef5557f6b3b5f20", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4473894101342726, "res": {"No": 0.5525944555117482, "Yes": 0.4473894101342726}, "ground_truth": 0}, {"key": "2f32a9eebbfa707a264d82a3bef5557f6b3b5f20", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.36194543882278846, "res": {"No": 0.638037767934465, "Yes": 0.36194543882278846}, "ground_truth": 0}, {"key": "57a373fa7d17600396fcee99164e79c0edccad14", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.43449979764326907, "res": {"No": 0.5654755615554582, "Yes": 0.43449979764326907}, "ground_truth": 0}, {"key": "57a373fa7d17600396fcee99164e79c0edccad14", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3056736651054817, "res": {"No": 0.6943152020196912, "Yes": 0.3056736651054817}, "ground_truth": 0}, {"key": "57a373fa7d17600396fcee99164e79c0edccad14", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.32961330982880194, "res": {"No": 0.6703673320794458, "Yes": 0.32961330982880194}, "ground_truth": 1}, {"key": "57a373fa7d17600396fcee99164e79c0edccad14", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3832257261852242, "res": {"No": 0.6167547018563673, "Yes": 0.3832257261852242}, "ground_truth": 0}, {"key": "57a373fa7d17600396fcee99164e79c0edccad14", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.253746409114291, "res": {"No": 0.7462390408584331, "Yes": 0.253746409114291}, "ground_truth": 0}, {"key": "9b087c5572cc091c519625d37d1a614b97b8ed30", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.35531267589297777, "res": {"No": 0.644669343094213, "Yes": 0.35531267589297777}, "ground_truth": 0}, {"key": "9b087c5572cc091c519625d37d1a614b97b8ed30", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.47753913705559187, "res": {"No": 0.522431397716312, "Yes": 0.47753913705559187}, "ground_truth": 0}, {"key": "9b087c5572cc091c519625d37d1a614b97b8ed30", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4582651647801264, "res": {"No": 0.5417147812002757, "Yes": 0.4582651647801264}, "ground_truth": 1}, {"key": "9b087c5572cc091c519625d37d1a614b97b8ed30", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5225415378633141, "res": {"Yes": 0.5225415378633141, "No": 0.4774421730286057}, "ground_truth": 0}, {"key": "9b087c5572cc091c519625d37d1a614b97b8ed30", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.589293569085672, "res": {"Yes": 0.589293569085672, "No": 0.4106766392350692}, "ground_truth": 0}, {"key": "b5211115ff68e12e759a474a02d3b163211af563", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.40936798852764844, "res": {"No": 0.5906096916319872, "Yes": 0.40936798852764844}, "ground_truth": 0}, {"key": "b5211115ff68e12e759a474a02d3b163211af563", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4846496888758919, "res": {"No": 0.5153355175797796, "Yes": 0.4846496888758919}, "ground_truth": 0}, {"key": "b5211115ff68e12e759a474a02d3b163211af563", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5066875943625556, "res": {"Yes": 0.5066875943625556, "No": 0.4932989989682635}, "ground_truth": 1}, {"key": "b5211115ff68e12e759a474a02d3b163211af563", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5454705840014208, "res": {"Yes": 0.5454705840014208, "No": 0.4545050746865426}, "ground_truth": 0}, {"key": "b5211115ff68e12e759a474a02d3b163211af563", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5506912344201975, "res": {"Yes": 0.5506912344201975, "No": 0.44929567007454}, "ground_truth": 0}, {"key": "6b75abea2de5c9c9952af0427ccec78910ac811f", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3372122246841045, "res": {"No": 0.6627642252947311, "Yes": 0.3372122246841045}, "ground_truth": 0}, {"key": "6b75abea2de5c9c9952af0427ccec78910ac811f", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3894113262702657, "res": {"No": 0.6105582600055549, "Yes": 0.3894113262702657}, "ground_truth": 0}, {"key": "6b75abea2de5c9c9952af0427ccec78910ac811f", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6769318336413235, "res": {"Yes": 0.6769318336413235, "No": 0.3230346777717082}, "ground_truth": 1}, {"key": "6b75abea2de5c9c9952af0427ccec78910ac811f", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4574091331769494, "res": {"No": 0.5425644764666117, "Yes": 0.4574091331769494}, "ground_truth": 0}, {"key": "6b75abea2de5c9c9952af0427ccec78910ac811f", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3550490961695503, "res": {"No": 0.6449201166573953, "Yes": 0.3550490961695503}, "ground_truth": 0}, {"key": "259cc1db3d4c814dafc02ed071327f65a7b9eaad", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.21174300081470995, "res": {"No": 0.7882422362147636, "Yes": 0.21174300081470995}, "ground_truth": 0}, {"key": "259cc1db3d4c814dafc02ed071327f65a7b9eaad", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.35922731552398796, "res": {"No": 0.6407487144888852, "Yes": 0.35922731552398796}, "ground_truth": 0}, {"key": "259cc1db3d4c814dafc02ed071327f65a7b9eaad", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2795282830260663, "res": {"No": 0.7204562118930712, "Yes": 0.2795282830260663}, "ground_truth": 1}, {"key": "259cc1db3d4c814dafc02ed071327f65a7b9eaad", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.37280858798335403, "res": {"No": 0.6271631718633786, "Yes": 0.37280858798335403}, "ground_truth": 0}, {"key": "259cc1db3d4c814dafc02ed071327f65a7b9eaad", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3152003848589347, "res": {"No": 0.6847821982764597, "Yes": 0.3152003848589347}, "ground_truth": 0}, {"key": "3bb1cda1e01cb6e14f6190ced3b1ec3f7ef50de9", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.502940251616705, "res": {"Yes": 0.502940251616705, "No": 0.4970353145386827}, "ground_truth": 0}, {"key": "3bb1cda1e01cb6e14f6190ced3b1ec3f7ef50de9", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5256290726922729, "res": {"Yes": 0.5256290726922729, "No": 0.4743340303953259}, "ground_truth": 0}, {"key": "3bb1cda1e01cb6e14f6190ced3b1ec3f7ef50de9", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5999468866105933, "res": {"Yes": 0.5999468866105933, "No": 0.40002371345254406}, "ground_truth": 1}, {"key": "3bb1cda1e01cb6e14f6190ced3b1ec3f7ef50de9", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5793315152867502, "res": {"Yes": 0.5793315152867502, "No": 0.4206290079793804}, "ground_truth": 0}, {"key": "3bb1cda1e01cb6e14f6190ced3b1ec3f7ef50de9", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4902351034435855, "res": {"No": 0.5097434444163341, "Yes": 0.4902351034435855}, "ground_truth": 0}, {"key": "7afbe903f1f7cdc1af509583649babf481d01729", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.26562145108868845, "res": {"No": 0.7343701396650184, "Yes": 0.26562145108868845}, "ground_truth": 0}, {"key": "7afbe903f1f7cdc1af509583649babf481d01729", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3403853911225973, "res": {"No": 0.6595931984355667, "Yes": 0.3403853911225973}, "ground_truth": 0}, {"key": "7afbe903f1f7cdc1af509583649babf481d01729", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4101652137847222, "res": {"No": 0.5898228390322374, "Yes": 0.4101652137847222}, "ground_truth": 1}, {"key": "7afbe903f1f7cdc1af509583649babf481d01729", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3610655349021705, "res": {"No": 0.6389205546213794, "Yes": 0.3610655349021705}, "ground_truth": 0}, {"key": "7afbe903f1f7cdc1af509583649babf481d01729", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2916335589228941, "res": {"No": 0.7083482271132217, "Yes": 0.2916335589228941}, "ground_truth": 0}, {"key": "79d625a3311f4ff11598a3ae4fca63103e84e9a1", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.5314995648080496, "res": {"Yes": 0.5314995648080496, "No": 0.46848434172843706}, "ground_truth": 0}, {"key": "79d625a3311f4ff11598a3ae4fca63103e84e9a1", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4444076976153505, "res": {"No": 0.5555790365529404, "Yes": 0.4444076976153505}, "ground_truth": 0}, {"key": "79d625a3311f4ff11598a3ae4fca63103e84e9a1", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4771253663924168, "res": {"No": 0.5228486212137105, "Yes": 0.4771253663924168}, "ground_truth": 1}, {"key": "79d625a3311f4ff11598a3ae4fca63103e84e9a1", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4392332876965708, "res": {"No": 0.560753816242991, "Yes": 0.4392332876965708}, "ground_truth": 0}, {"key": "79d625a3311f4ff11598a3ae4fca63103e84e9a1", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5100048211873084, "res": {"Yes": 0.5100048211873084, "No": 0.48997691560373946}, "ground_truth": 0}, {"key": "ea6c4267f17d55f1bd9f0950c31ef59565bdc30b", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.29901435401637755, "res": {"No": 0.7009666485786346, "Yes": 0.29901435401637755}, "ground_truth": 0}, {"key": "ea6c4267f17d55f1bd9f0950c31ef59565bdc30b", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.36365233187004065, "res": {"No": 0.6363322922448079, "Yes": 0.36365233187004065}, "ground_truth": 0}, {"key": "ea6c4267f17d55f1bd9f0950c31ef59565bdc30b", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4203355492042399, "res": {"No": 0.5796433374633586, "Yes": 0.4203355492042399}, "ground_truth": 1}, {"key": "ea6c4267f17d55f1bd9f0950c31ef59565bdc30b", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.37473733066595, "res": {"No": 0.6252490428832941, "Yes": 0.37473733066595}, "ground_truth": 0}, {"key": "ea6c4267f17d55f1bd9f0950c31ef59565bdc30b", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.31730767253145686, "res": {"No": 0.6826779727380415, "Yes": 0.31730767253145686}, "ground_truth": 0}, {"key": "851d02b689bec14607574096bb77b9acf53c15ee", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.34910079066741273, "res": {"No": 0.6508813574520079, "Yes": 0.34910079066741273}, "ground_truth": 0}, {"key": "851d02b689bec14607574096bb77b9acf53c15ee", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3181069919870634, "res": {"No": 0.6818751721017272, "Yes": 0.3181069919870634}, "ground_truth": 0}, {"key": "851d02b689bec14607574096bb77b9acf53c15ee", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.41768659516907997, "res": {"No": 0.58229109095913, "Yes": 0.41768659516907997}, "ground_truth": 1}, {"key": "851d02b689bec14607574096bb77b9acf53c15ee", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.47929596086799237, "res": {"No": 0.5206838318980975, "Yes": 0.47929596086799237}, "ground_truth": 0}, {"key": "851d02b689bec14607574096bb77b9acf53c15ee", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.44747115564672935, "res": {"No": 0.5525078154938788, "Yes": 0.44747115564672935}, "ground_truth": 0}, {"key": "f2d00c8c396698becc18fd2b41ba595c7a3e1cc6", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.33522584839401176, "res": {"No": 0.6647385988767969, "Yes": 0.33522584839401176}, "ground_truth": 0}, {"key": "f2d00c8c396698becc18fd2b41ba595c7a3e1cc6", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3052506306445028, "res": {"No": 0.6947084415477975, "Yes": 0.3052506306445028}, "ground_truth": 0}, {"key": "f2d00c8c396698becc18fd2b41ba595c7a3e1cc6", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.49764276436595273, "res": {"No": 0.5020741505196396, "Yes": 0.49764276436595273}, "ground_truth": 1}, {"key": "f2d00c8c396698becc18fd2b41ba595c7a3e1cc6", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5420132127890771, "res": {"Yes": 0.5420132127890771, "No": 0.45752172914295247}, "ground_truth": 0}, {"key": "f2d00c8c396698becc18fd2b41ba595c7a3e1cc6", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.42595952029900713, "res": {"No": 0.5739909209201192, "Yes": 0.42595952029900713}, "ground_truth": 0}, {"key": "70c6ca3234f9b1221c8aea7671dd78e1ca9d9139", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.6266864143889262, "res": {"Yes": 0.6266864143889262, "No": 0.3732677456834133}, "ground_truth": 0}, {"key": "70c6ca3234f9b1221c8aea7671dd78e1ca9d9139", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4329007292354703, "res": {"No": 0.5670675196891186, "Yes": 0.4329007292354703}, "ground_truth": 0}, {"key": "70c6ca3234f9b1221c8aea7671dd78e1ca9d9139", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6307274520120381, "res": {"Yes": 0.6307274520120381, "No": 0.36923783620831524}, "ground_truth": 1}, {"key": "70c6ca3234f9b1221c8aea7671dd78e1ca9d9139", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5190507066055042, "res": {"Yes": 0.5190507066055042, "No": 0.48090304444255505}, "ground_truth": 0}, {"key": "70c6ca3234f9b1221c8aea7671dd78e1ca9d9139", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.38257978853188235, "res": {"No": 0.6173803280441581, "Yes": 0.38257978853188235}, "ground_truth": 0}, {"key": "86eaa90c419d5ed93341248fecd8c25d820f2b82", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.16169039731007373, "res": {"No": 0.838287373065816, "Yes": 0.16169039731007373}, "ground_truth": 0}, {"key": "86eaa90c419d5ed93341248fecd8c25d820f2b82", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.17857869617308356, "res": {"No": 0.821400885025857, "Yes": 0.17857869617308356}, "ground_truth": 0}, {"key": "86eaa90c419d5ed93341248fecd8c25d820f2b82", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3549914763772146, "res": {"No": 0.6449852181795893, "Yes": 0.3549914763772146}, "ground_truth": 1}, {"key": "86eaa90c419d5ed93341248fecd8c25d820f2b82", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.35188991280952997, "res": {"No": 0.6480868654458571, "Yes": 0.35188991280952997}, "ground_truth": 0}, {"key": "86eaa90c419d5ed93341248fecd8c25d820f2b82", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.42562570742269, "res": {"No": 0.5743468137394503, "Yes": 0.42562570742269}, "ground_truth": 0}, {"key": "a1841cf61c53937fffc54c0e81d8f7cfcad2f0c2", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.40338011023293296, "res": {"No": 0.5965962455492418, "Yes": 0.40338011023293296}, "ground_truth": 0}, {"key": "a1841cf61c53937fffc54c0e81d8f7cfcad2f0c2", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3599514923798474, "res": {"No": 0.6400303384012428, "Yes": 0.3599514923798474}, "ground_truth": 0}, {"key": "a1841cf61c53937fffc54c0e81d8f7cfcad2f0c2", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.28341769937169525, "res": {"No": 0.7165678784880889, "Yes": 0.28341769937169525}, "ground_truth": 1}, {"key": "a1841cf61c53937fffc54c0e81d8f7cfcad2f0c2", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.39907388651509695, "res": {"No": 0.6009038744007213, "Yes": 0.39907388651509695}, "ground_truth": 0}, {"key": "a1841cf61c53937fffc54c0e81d8f7cfcad2f0c2", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.33775986316741474, "res": {"No": 0.6622268553165858, "Yes": 0.33775986316741474}, "ground_truth": 0}, {"key": "5d221c86fdadca378eeaabc39c881cc74efcff07", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.40373671863822813, "res": {"No": 0.5962370834447358, "Yes": 0.40373671863822813}, "ground_truth": 0}, {"key": "5d221c86fdadca378eeaabc39c881cc74efcff07", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4234938867750495, "res": {"No": 0.5764808255097784, "Yes": 0.4234938867750495}, "ground_truth": 0}, {"key": "5d221c86fdadca378eeaabc39c881cc74efcff07", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5160897471701924, "res": {"Yes": 0.5160897471701924, "No": 0.483883772774078}, "ground_truth": 1}, {"key": "5d221c86fdadca378eeaabc39c881cc74efcff07", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.38673502901509466, "res": {"No": 0.6132406270626788, "Yes": 0.38673502901509466}, "ground_truth": 0}, {"key": "5d221c86fdadca378eeaabc39c881cc74efcff07", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.1836165248897146, "res": {"No": 0.8163665348550251, "Yes": 0.1836165248897146}, "ground_truth": 0}, {"key": "7a49f5bc1d336309f57076bceba91885ee5368b5", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.5264503941424012, "res": {"Yes": 0.5264503941424012, "No": 0.4735228073188363}, "ground_truth": 0}, {"key": "7a49f5bc1d336309f57076bceba91885ee5368b5", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5887772795693024, "res": {"Yes": 0.5887772795693024, "No": 0.4112045087674128}, "ground_truth": 0}, {"key": "7a49f5bc1d336309f57076bceba91885ee5368b5", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6019121948753974, "res": {"Yes": 0.6019121948753974, "No": 0.3980597855615781}, "ground_truth": 1}, {"key": "7a49f5bc1d336309f57076bceba91885ee5368b5", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.44820448991767126, "res": {"No": 0.5517722546479442, "Yes": 0.44820448991767126}, "ground_truth": 0}, {"key": "7a49f5bc1d336309f57076bceba91885ee5368b5", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3787584924877635, "res": {"No": 0.6212334759459271, "Yes": 0.3787584924877635}, "ground_truth": 0}, {"key": "dd46893fd318dce4e209f9a335612659ebf8fd24", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.38716317267353517, "res": {"No": 0.6128186174873703, "Yes": 0.38716317267353517}, "ground_truth": 0}, {"key": "dd46893fd318dce4e209f9a335612659ebf8fd24", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2640169957677092, "res": {"No": 0.7359707742066782, "Yes": 0.2640169957677092}, "ground_truth": 0}, {"key": "dd46893fd318dce4e209f9a335612659ebf8fd24", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.38879350958088205, "res": {"No": 0.6111915539649073, "Yes": 0.38879350958088205}, "ground_truth": 1}, {"key": "dd46893fd318dce4e209f9a335612659ebf8fd24", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4265219704553637, "res": {"No": 0.5734589287844184, "Yes": 0.4265219704553637}, "ground_truth": 0}, {"key": "dd46893fd318dce4e209f9a335612659ebf8fd24", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2760115683035674, "res": {"No": 0.7239759538951372, "Yes": 0.2760115683035674}, "ground_truth": 0}, {"key": "acda8749fba5e9553e9f47cef89ef85eae5ad4e3", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.39386538125853865, "res": {"No": 0.6061209047645914, "Yes": 0.39386538125853865}, "ground_truth": 0}, {"key": "acda8749fba5e9553e9f47cef89ef85eae5ad4e3", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.430245587331511, "res": {"No": 0.5697349649837832, "Yes": 0.430245587331511}, "ground_truth": 0}, {"key": "acda8749fba5e9553e9f47cef89ef85eae5ad4e3", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.44247332448544363, "res": {"No": 0.557509910011762, "Yes": 0.44247332448544363}, "ground_truth": 1}, {"key": "acda8749fba5e9553e9f47cef89ef85eae5ad4e3", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3992945156174098, "res": {"No": 0.6006881886277242, "Yes": 0.3992945156174098}, "ground_truth": 0}, {"key": "acda8749fba5e9553e9f47cef89ef85eae5ad4e3", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3890836577508408, "res": {"No": 0.6108979836218351, "Yes": 0.3890836577508408}, "ground_truth": 0}, {"key": "20b11518df6520f552d691d5f6c0d02755d11809", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.24497157391635882, "res": {"No": 0.755010366019661, "Yes": 0.24497157391635882}, "ground_truth": 0}, {"key": "20b11518df6520f552d691d5f6c0d02755d11809", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.442483589985651, "res": {"No": 0.5574771070937281, "Yes": 0.442483589985651}, "ground_truth": 0}, {"key": "20b11518df6520f552d691d5f6c0d02755d11809", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3518655628700812, "res": {"No": 0.6481136708729444, "Yes": 0.3518655628700812}, "ground_truth": 1}, {"key": "20b11518df6520f552d691d5f6c0d02755d11809", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.2543057997728344, "res": {"No": 0.7456564556966412, "Yes": 0.2543057997728344}, "ground_truth": 0}, {"key": "20b11518df6520f552d691d5f6c0d02755d11809", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3141858780204372, "res": {"No": 0.6857844798038046, "Yes": 0.3141858780204372}, "ground_truth": 0}, {"key": "51fe083191bb5568bf6a5fea1610a8e19663d7b2", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.5300052112166833, "res": {"Yes": 0.5300052112166833, "No": 0.4699698665766988}, "ground_truth": 0}, {"key": "51fe083191bb5568bf6a5fea1610a8e19663d7b2", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3490456370997338, "res": {"No": 0.6509300126519428, "Yes": 0.3490456370997338}, "ground_truth": 0}, {"key": "51fe083191bb5568bf6a5fea1610a8e19663d7b2", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5534752739546116, "res": {"Yes": 0.5534752739546116, "No": 0.4465011513756674}, "ground_truth": 1}, {"key": "51fe083191bb5568bf6a5fea1610a8e19663d7b2", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4947283277032951, "res": {"No": 0.5052486472951808, "Yes": 0.4947283277032951}, "ground_truth": 0}, {"key": "51fe083191bb5568bf6a5fea1610a8e19663d7b2", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.6003871991418498, "res": {"Yes": 0.6003871991418498, "No": 0.3995796337892272}, "ground_truth": 0}, {"key": "8af047c2d83a91e8b745adfcaa7c282dfe1030a2", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4476339125994948, "res": {"No": 0.5523346480117793, "Yes": 0.4476339125994948}, "ground_truth": 0}, {"key": "8af047c2d83a91e8b745adfcaa7c282dfe1030a2", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4378179037125931, "res": {"No": 0.5621435975900163, "Yes": 0.4378179037125931}, "ground_truth": 0}, {"key": "8af047c2d83a91e8b745adfcaa7c282dfe1030a2", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3940472117839701, "res": {"No": 0.6059220933533931, "Yes": 0.3940472117839701}, "ground_truth": 1}, {"key": "8af047c2d83a91e8b745adfcaa7c282dfe1030a2", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.2989937526371114, "res": {"No": 0.7009785440835941, "Yes": 0.2989937526371114}, "ground_truth": 0}, {"key": "8af047c2d83a91e8b745adfcaa7c282dfe1030a2", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5285491124423888, "res": {"Yes": 0.5285491124423888, "No": 0.47140684406120464}, "ground_truth": 0}, {"key": "dfcacd955a9168b2cf599c57a539dbfce43ea2a9", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.35119867149058953, "res": {"No": 0.648777147188674, "Yes": 0.35119867149058953}, "ground_truth": 0}, {"key": "dfcacd955a9168b2cf599c57a539dbfce43ea2a9", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3110645599757249, "res": {"No": 0.6889156770506155, "Yes": 0.3110645599757249}, "ground_truth": 0}, {"key": "dfcacd955a9168b2cf599c57a539dbfce43ea2a9", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3659967226972353, "res": {"No": 0.6339761902885688, "Yes": 0.3659967226972353}, "ground_truth": 1}, {"key": "dfcacd955a9168b2cf599c57a539dbfce43ea2a9", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.33681606460269403, "res": {"No": 0.6631558158787059, "Yes": 0.33681606460269403}, "ground_truth": 0}, {"key": "dfcacd955a9168b2cf599c57a539dbfce43ea2a9", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3897162222382597, "res": {"No": 0.6102454656818042, "Yes": 0.3897162222382597}, "ground_truth": 0}, {"key": "f2463954bc7215d8a1f98cf0d85f280fd2affa58", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.10334421182358919, "res": {"No": 0.8966433617549375, "Yes": 0.10334421182358919}, "ground_truth": 0}, {"key": "f2463954bc7215d8a1f98cf0d85f280fd2affa58", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.24196323157725877, "res": {"No": 0.7580141166229911, "Yes": 0.24196323157725877}, "ground_truth": 0}, {"key": "f2463954bc7215d8a1f98cf0d85f280fd2affa58", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3577013351954219, "res": {"No": 0.6422832713334744, "Yes": 0.3577013351954219}, "ground_truth": 1}, {"key": "f2463954bc7215d8a1f98cf0d85f280fd2affa58", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4474001028689525, "res": {"No": 0.552581320497651, "Yes": 0.4474001028689525}, "ground_truth": 0}, {"key": "f2463954bc7215d8a1f98cf0d85f280fd2affa58", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.48591668491185697, "res": {"No": 0.5140640553438619, "Yes": 0.48591668491185697}, "ground_truth": 0}, {"key": "0e7a92a6f06e94cdd613860d4a3f4b92a06a2b43", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2557029652895473, "res": {"No": 0.7442818166875048, "Yes": 0.2557029652895473}, "ground_truth": 0}, {"key": "0e7a92a6f06e94cdd613860d4a3f4b92a06a2b43", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3694307284195548, "res": {"No": 0.6305576199781209, "Yes": 0.3694307284195548}, "ground_truth": 0}, {"key": "0e7a92a6f06e94cdd613860d4a3f4b92a06a2b43", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3715963330850683, "res": {"No": 0.6283897650816119, "Yes": 0.3715963330850683}, "ground_truth": 1}, {"key": "0e7a92a6f06e94cdd613860d4a3f4b92a06a2b43", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.41220908520484684, "res": {"No": 0.5877688621181967, "Yes": 0.41220908520484684}, "ground_truth": 0}, {"key": "0e7a92a6f06e94cdd613860d4a3f4b92a06a2b43", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.31575414195721424, "res": {"No": 0.6842198601775462, "Yes": 0.31575414195721424}, "ground_truth": 0}, {"key": "2905a15c5947042e42f4b52dc201d24822af20f1", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4296058579742413, "res": {"No": 0.5703665526574094, "Yes": 0.4296058579742413}, "ground_truth": 0}, {"key": "2905a15c5947042e42f4b52dc201d24822af20f1", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3723005205142842, "res": {"No": 0.6276813473296812, "Yes": 0.3723005205142842}, "ground_truth": 0}, {"key": "2905a15c5947042e42f4b52dc201d24822af20f1", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.49494287357130257, "res": {"No": 0.5050263867912675, "Yes": 0.49494287357130257}, "ground_truth": 1}, {"key": "2905a15c5947042e42f4b52dc201d24822af20f1", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4436299544385313, "res": {"No": 0.5563559453984055, "Yes": 0.4436299544385313}, "ground_truth": 0}, {"key": "2905a15c5947042e42f4b52dc201d24822af20f1", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4227453037823421, "res": {"No": 0.5772404932022981, "Yes": 0.4227453037823421}, "ground_truth": 0}, {"key": "808a3d788317af05686ee71a71d2fbc5c19267bc", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4689992076419866, "res": {"No": 0.5309816583768276, "Yes": 0.4689992076419866}, "ground_truth": 0}, {"key": "808a3d788317af05686ee71a71d2fbc5c19267bc", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5494239572856662, "res": {"Yes": 0.5494239572856662, "No": 0.45055640700221106}, "ground_truth": 0}, {"key": "808a3d788317af05686ee71a71d2fbc5c19267bc", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5374745870527013, "res": {"Yes": 0.5374745870527013, "No": 0.46250825663741507}, "ground_truth": 1}, {"key": "808a3d788317af05686ee71a71d2fbc5c19267bc", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3952110709355142, "res": {"No": 0.6047712449334469, "Yes": 0.3952110709355142}, "ground_truth": 0}, {"key": "808a3d788317af05686ee71a71d2fbc5c19267bc", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4274542131859179, "res": {"No": 0.5725273910897558, "Yes": 0.4274542131859179}, "ground_truth": 0}, {"key": "5a1bec30205b6938392f3119366e27d15b4849e8", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.33252803721168755, "res": {"No": 0.6674416688612653, "Yes": 0.33252803721168755}, "ground_truth": 0}, {"key": "5a1bec30205b6938392f3119366e27d15b4849e8", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.23843837208755247, "res": {"No": 0.7615415840970037, "Yes": 0.23843837208755247}, "ground_truth": 0}, {"key": "5a1bec30205b6938392f3119366e27d15b4849e8", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3482574060754635, "res": {"No": 0.6517209496527843, "Yes": 0.3482574060754635}, "ground_truth": 1}, {"key": "5a1bec30205b6938392f3119366e27d15b4849e8", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.43944823515981196, "res": {"No": 0.560515882816422, "Yes": 0.43944823515981196}, "ground_truth": 0}, {"key": "5a1bec30205b6938392f3119366e27d15b4849e8", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.445506964142184, "res": {"No": 0.5544540184369077, "Yes": 0.445506964142184}, "ground_truth": 0}, {"key": "7ff39e1220c0f084434786ed145e15c6abe48def", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.49988170427605855, "res": {"No": 0.5000933990022601, "Yes": 0.49988170427605855}, "ground_truth": 0}, {"key": "7ff39e1220c0f084434786ed145e15c6abe48def", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.552776438247198, "res": {"Yes": 0.552776438247198, "No": 0.4472073775824203}, "ground_truth": 0}, {"key": "7ff39e1220c0f084434786ed145e15c6abe48def", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5541963403818992, "res": {"Yes": 0.5541963403818992, "No": 0.44578176627483845}, "ground_truth": 1}, {"key": "7ff39e1220c0f084434786ed145e15c6abe48def", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5949218658253804, "res": {"Yes": 0.5949218658253804, "No": 0.40504966085406013}, "ground_truth": 0}, {"key": "7ff39e1220c0f084434786ed145e15c6abe48def", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.47066981995424545, "res": {"No": 0.5293104485476882, "Yes": 0.47066981995424545}, "ground_truth": 0}, {"key": "7551503e7e57bd519913b0df90ca1e80d5305b05", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2917217622467828, "res": {"No": 0.7082504322249297, "Yes": 0.2917217622467828}, "ground_truth": 0}, {"key": "7551503e7e57bd519913b0df90ca1e80d5305b05", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4374582265126451, "res": {"No": 0.5625137335509045, "Yes": 0.4374582265126451}, "ground_truth": 0}, {"key": "7551503e7e57bd519913b0df90ca1e80d5305b05", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5832732535215422, "res": {"Yes": 0.5832732535215422, "No": 0.41668999121694217}, "ground_truth": 1}, {"key": "7551503e7e57bd519913b0df90ca1e80d5305b05", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5453931108619419, "res": {"Yes": 0.5453931108619419, "No": 0.45457575572096437}, "ground_truth": 0}, {"key": "7551503e7e57bd519913b0df90ca1e80d5305b05", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.49638075278664784, "res": {"No": 0.5035758009959198, "Yes": 0.49638075278664784}, "ground_truth": 0}, {"key": "c34a0cbf6db61323766fc86b1b89b399ff6b0666", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2914034050933035, "res": {"No": 0.7085700314956126, "Yes": 0.2914034050933035}, "ground_truth": 0}, {"key": "c34a0cbf6db61323766fc86b1b89b399ff6b0666", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.398180025734568, "res": {"No": 0.6018023860130605, "Yes": 0.398180025734568}, "ground_truth": 0}, {"key": "c34a0cbf6db61323766fc86b1b89b399ff6b0666", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.39052687111327183, "res": {"No": 0.6094503643782526, "Yes": 0.39052687111327183}, "ground_truth": 1}, {"key": "c34a0cbf6db61323766fc86b1b89b399ff6b0666", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.29176499861568356, "res": {"No": 0.708220247234755, "Yes": 0.29176499861568356}, "ground_truth": 0}, {"key": "c34a0cbf6db61323766fc86b1b89b399ff6b0666", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.40290037340178325, "res": {"No": 0.5970733194388788, "Yes": 0.40290037340178325}, "ground_truth": 0}, {"key": "d07a61368a195444b14bd4b382e0fc3d64890b22", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.39267958696282856, "res": {"No": 0.6073014794323403, "Yes": 0.39267958696282856}, "ground_truth": 0}, {"key": "d07a61368a195444b14bd4b382e0fc3d64890b22", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3723296951261181, "res": {"No": 0.627655104521159, "Yes": 0.3723296951261181}, "ground_truth": 0}, {"key": "d07a61368a195444b14bd4b382e0fc3d64890b22", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.35630267452979253, "res": {"No": 0.6436787903852311, "Yes": 0.35630267452979253}, "ground_truth": 1}, {"key": "d07a61368a195444b14bd4b382e0fc3d64890b22", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4788915099605758, "res": {"No": 0.521082604443956, "Yes": 0.4788915099605758}, "ground_truth": 0}, {"key": "d07a61368a195444b14bd4b382e0fc3d64890b22", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.1789724134182786, "res": {"No": 0.821007134134887, "Yes": 0.1789724134182786}, "ground_truth": 0}, {"key": "aa68dee4bc246e5aae9507342f8520a2a301a310", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.09151034069738355, "res": {"No": 0.9084658420832136, "Yes": 0.09151034069738355}, "ground_truth": 0}, {"key": "aa68dee4bc246e5aae9507342f8520a2a301a310", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.30730866900785814, "res": {"No": 0.6926708199022686, "Yes": 0.30730866900785814}, "ground_truth": 0}, {"key": "aa68dee4bc246e5aae9507342f8520a2a301a310", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.502685039007803, "res": {"Yes": 0.502685039007803, "No": 0.4972760621760652}, "ground_truth": 1}, {"key": "aa68dee4bc246e5aae9507342f8520a2a301a310", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.21928364599615327, "res": {"No": 0.7806986149269234, "Yes": 0.21928364599615327}, "ground_truth": 0}, {"key": "aa68dee4bc246e5aae9507342f8520a2a301a310", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.39039989242263995, "res": {"No": 0.6095625684233354, "Yes": 0.39039989242263995}, "ground_truth": 0}, {"key": "87ef15064035db73cb29f5cec2f0442f2c527ef0", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.30277022333060705, "res": {"No": 0.6972138153893261, "Yes": 0.30277022333060705}, "ground_truth": 0}, {"key": "87ef15064035db73cb29f5cec2f0442f2c527ef0", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.42379575911731526, "res": {"No": 0.576186872199881, "Yes": 0.42379575911731526}, "ground_truth": 0}, {"key": "87ef15064035db73cb29f5cec2f0442f2c527ef0", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.39585919465909764, "res": {"No": 0.6041188070357457, "Yes": 0.39585919465909764}, "ground_truth": 1}, {"key": "87ef15064035db73cb29f5cec2f0442f2c527ef0", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.47874738530792604, "res": {"No": 0.5212348870283766, "Yes": 0.47874738530792604}, "ground_truth": 0}, {"key": "87ef15064035db73cb29f5cec2f0442f2c527ef0", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.36629432576294535, "res": {"No": 0.6336898118668937, "Yes": 0.36629432576294535}, "ground_truth": 0}, {"key": "a577b934e210ef7dce07390711f03db8bb2e005a", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4003749483727741, "res": {"No": 0.5996096013239868, "Yes": 0.4003749483727741}, "ground_truth": 0}, {"key": "a577b934e210ef7dce07390711f03db8bb2e005a", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.41603249865724373, "res": {"No": 0.5839534548088234, "Yes": 0.41603249865724373}, "ground_truth": 0}, {"key": "a577b934e210ef7dce07390711f03db8bb2e005a", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4696097539131867, "res": {"No": 0.5303724199756862, "Yes": 0.4696097539131867}, "ground_truth": 1}, {"key": "a577b934e210ef7dce07390711f03db8bb2e005a", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4133971193290662, "res": {"No": 0.5865838232256512, "Yes": 0.4133971193290662}, "ground_truth": 0}, {"key": "a577b934e210ef7dce07390711f03db8bb2e005a", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.389904477244905, "res": {"No": 0.6100734373271866, "Yes": 0.389904477244905}, "ground_truth": 0}, {"key": "c9414bf55859cb9ca7e925a4a09b416b6f66446a", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.42557631216954844, "res": {"No": 0.5743984843029214, "Yes": 0.42557631216954844}, "ground_truth": 0}, {"key": "c9414bf55859cb9ca7e925a4a09b416b6f66446a", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.352466111126379, "res": {"No": 0.647510836267642, "Yes": 0.352466111126379}, "ground_truth": 0}, {"key": "c9414bf55859cb9ca7e925a4a09b416b6f66446a", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5596969737577568, "res": {"Yes": 0.5596969737577568, "No": 0.44028005319769103}, "ground_truth": 1}, {"key": "c9414bf55859cb9ca7e925a4a09b416b6f66446a", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.597666543574309, "res": {"Yes": 0.597666543574309, "No": 0.40231188815126523}, "ground_truth": 0}, {"key": "c9414bf55859cb9ca7e925a4a09b416b6f66446a", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5984210714243915, "res": {"Yes": 0.5984210714243915, "No": 0.4015456312993702}, "ground_truth": 0}, {"key": "f12cda150b96acfa1499b2afb0a81029545abadf", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3036280033700611, "res": {"No": 0.6963476404102099, "Yes": 0.3036280033700611}, "ground_truth": 0}, {"key": "f12cda150b96acfa1499b2afb0a81029545abadf", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.24197535423883093, "res": {"No": 0.757984122597832, "Yes": 0.24197535423883093}, "ground_truth": 0}, {"key": "f12cda150b96acfa1499b2afb0a81029545abadf", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2823468780347103, "res": {"No": 0.7176256892750791, "Yes": 0.2823468780347103}, "ground_truth": 1}, {"key": "f12cda150b96acfa1499b2afb0a81029545abadf", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.32732196174034867, "res": {"No": 0.6726491955451072, "Yes": 0.32732196174034867}, "ground_truth": 0}, {"key": "f12cda150b96acfa1499b2afb0a81029545abadf", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2986308750078021, "res": {"No": 0.7013252987592655, "Yes": 0.2986308750078021}, "ground_truth": 0}, {"key": "7359741b0e60b4402f069963a26c7aa766f45e7c", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3477051946657893, "res": {"No": 0.6522731454792723, "Yes": 0.3477051946657893}, "ground_truth": 0}, {"key": "7359741b0e60b4402f069963a26c7aa766f45e7c", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.522058097186728, "res": {"Yes": 0.522058097186728, "No": 0.4779038391763367}, "ground_truth": 0}, {"key": "7359741b0e60b4402f069963a26c7aa766f45e7c", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3945155588724629, "res": {"No": 0.6054585948970732, "Yes": 0.3945155588724629}, "ground_truth": 1}, {"key": "7359741b0e60b4402f069963a26c7aa766f45e7c", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3286345946532454, "res": {"No": 0.6713460850093322, "Yes": 0.3286345946532454}, "ground_truth": 0}, {"key": "7359741b0e60b4402f069963a26c7aa766f45e7c", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3490057085628055, "res": {"No": 0.650975969933109, "Yes": 0.3490057085628055}, "ground_truth": 0}, {"key": "b10c71f49dbeabad23325d625e8e7b8529429607", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.25263647025853275, "res": {"No": 0.7473295697857011, "Yes": 0.25263647025853275}, "ground_truth": 0}, {"key": "b10c71f49dbeabad23325d625e8e7b8529429607", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3245284413472505, "res": {"No": 0.6754419781105923, "Yes": 0.3245284413472505}, "ground_truth": 0}, {"key": "b10c71f49dbeabad23325d625e8e7b8529429607", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.38514734639618636, "res": {"No": 0.6148172375845663, "Yes": 0.38514734639618636}, "ground_truth": 1}, {"key": "b10c71f49dbeabad23325d625e8e7b8529429607", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.31835148749993153, "res": {"No": 0.681626517118929, "Yes": 0.31835148749993153}, "ground_truth": 0}, {"key": "b10c71f49dbeabad23325d625e8e7b8529429607", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.27300804919528926, "res": {"No": 0.7269641317579959, "Yes": 0.27300804919528926}, "ground_truth": 0}, {"key": "c5a1e04f5f84d406ffedfc2f312c314d1bc03c46", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.5093687831094469, "res": {"Yes": 0.5093687831094469, "No": 0.49060412947497045}, "ground_truth": 0}, {"key": "c5a1e04f5f84d406ffedfc2f312c314d1bc03c46", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.31810285662304755, "res": {"No": 0.6818611051620277, "Yes": 0.31810285662304755}, "ground_truth": 0}, {"key": "c5a1e04f5f84d406ffedfc2f312c314d1bc03c46", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4387345800521293, "res": {"No": 0.5612282126561798, "Yes": 0.4387345800521293}, "ground_truth": 1}, {"key": "c5a1e04f5f84d406ffedfc2f312c314d1bc03c46", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4577001058763391, "res": {"No": 0.5422597005462194, "Yes": 0.4577001058763391}, "ground_truth": 0}, {"key": "c5a1e04f5f84d406ffedfc2f312c314d1bc03c46", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3056625387865721, "res": {"No": 0.6943176598998568, "Yes": 0.3056625387865721}, "ground_truth": 0}, {"key": "8a5035d3527c4db4357dd1db9fb2712008dccea9", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.46511057777427844, "res": {"No": 0.5348674605862649, "Yes": 0.46511057777427844}, "ground_truth": 0}, {"key": "8a5035d3527c4db4357dd1db9fb2712008dccea9", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.6091169278184108, "res": {"Yes": 0.6091169278184108, "No": 0.3908607580268825}, "ground_truth": 0}, {"key": "8a5035d3527c4db4357dd1db9fb2712008dccea9", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3582763725677693, "res": {"No": 0.6417005224888235, "Yes": 0.3582763725677693}, "ground_truth": 1}, {"key": "8a5035d3527c4db4357dd1db9fb2712008dccea9", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4945213074351718, "res": {"No": 0.5054467688615952, "Yes": 0.4945213074351718}, "ground_truth": 0}, {"key": "8a5035d3527c4db4357dd1db9fb2712008dccea9", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.45078309388190396, "res": {"No": 0.5491942362247512, "Yes": 0.45078309388190396}, "ground_truth": 0}, {"key": "3f33d4cb25cd121619ba99eeb02709b72716d622", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3878426355916705, "res": {"No": 0.6121367828828329, "Yes": 0.3878426355916705}, "ground_truth": 0}, {"key": "3f33d4cb25cd121619ba99eeb02709b72716d622", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4401320163116585, "res": {"No": 0.5598418476781362, "Yes": 0.4401320163116585}, "ground_truth": 0}, {"key": "3f33d4cb25cd121619ba99eeb02709b72716d622", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.45258030589374, "res": {"No": 0.5473945546974646, "Yes": 0.45258030589374}, "ground_truth": 1}, {"key": "3f33d4cb25cd121619ba99eeb02709b72716d622", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4378532808284293, "res": {"No": 0.5621257498141278, "Yes": 0.4378532808284293}, "ground_truth": 0}, {"key": "3f33d4cb25cd121619ba99eeb02709b72716d622", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5350461361552598, "res": {"Yes": 0.5350461361552598, "No": 0.46492087926061776}, "ground_truth": 0}, {"key": "e6f451d6667f142a64818a9d05dbc49115e7cbf9", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.28958271674482944, "res": {"No": 0.7103886957108283, "Yes": 0.28958271674482944}, "ground_truth": 0}, {"key": "e6f451d6667f142a64818a9d05dbc49115e7cbf9", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.14909098825257666, "res": {"No": 0.8508980226184432, "Yes": 0.14909098825257666}, "ground_truth": 0}, {"key": "e6f451d6667f142a64818a9d05dbc49115e7cbf9", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2741398229546613, "res": {"No": 0.7258349511621746, "Yes": 0.2741398229546613}, "ground_truth": 1}, {"key": "e6f451d6667f142a64818a9d05dbc49115e7cbf9", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.2471891613577548, "res": {"No": 0.7527898152201127, "Yes": 0.2471891613577548}, "ground_truth": 0}, {"key": "e6f451d6667f142a64818a9d05dbc49115e7cbf9", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.15107495116267472, "res": {"No": 0.8488971351695548, "Yes": 0.15107495116267472}, "ground_truth": 0}, {"key": "39622c01625247af942a6439699ff529db1d54d0", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3144000686734832, "res": {"No": 0.685576944824193, "Yes": 0.3144000686734832}, "ground_truth": 0}, {"key": "39622c01625247af942a6439699ff529db1d54d0", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3951999261404562, "res": {"No": 0.6047772563894985, "Yes": 0.3951999261404562}, "ground_truth": 0}, {"key": "39622c01625247af942a6439699ff529db1d54d0", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3705113131108591, "res": {"No": 0.6294681012090588, "Yes": 0.3705113131108591}, "ground_truth": 1}, {"key": "39622c01625247af942a6439699ff529db1d54d0", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4672848266769068, "res": {"No": 0.5326899347601185, "Yes": 0.4672848266769068}, "ground_truth": 0}, {"key": "39622c01625247af942a6439699ff529db1d54d0", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3915636096473105, "res": {"No": 0.608401568820437, "Yes": 0.3915636096473105}, "ground_truth": 0}, {"key": "02927c88776bcc84fc883435a5037b4c8ca70e83", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.23237140586985078, "res": {"No": 0.7676155017223063, "Yes": 0.23237140586985078}, "ground_truth": 0}, {"key": "02927c88776bcc84fc883435a5037b4c8ca70e83", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.468762938474272, "res": {"No": 0.531224584940439, "Yes": 0.468762938474272}, "ground_truth": 0}, {"key": "02927c88776bcc84fc883435a5037b4c8ca70e83", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.43704238401367096, "res": {"No": 0.5629381020271748, "Yes": 0.43704238401367096}, "ground_truth": 1}, {"key": "02927c88776bcc84fc883435a5037b4c8ca70e83", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5154684912961869, "res": {"Yes": 0.5154684912961869, "No": 0.4845039184829083}, "ground_truth": 0}, {"key": "02927c88776bcc84fc883435a5037b4c8ca70e83", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4075502463402773, "res": {"No": 0.5924315559674553, "Yes": 0.4075502463402773}, "ground_truth": 0}, {"key": "3ee31d8987c07ba4ea9423d39c5e333525316958", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.466646452059716, "res": {"No": 0.5333337383587533, "Yes": 0.466646452059716}, "ground_truth": 0}, {"key": "3ee31d8987c07ba4ea9423d39c5e333525316958", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5189412763011118, "res": {"Yes": 0.5189412763011118, "No": 0.4810337235516831}, "ground_truth": 0}, {"key": "3ee31d8987c07ba4ea9423d39c5e333525316958", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4655860427154245, "res": {"No": 0.5343919609528511, "Yes": 0.4655860427154245}, "ground_truth": 1}, {"key": "3ee31d8987c07ba4ea9423d39c5e333525316958", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4736561038061189, "res": {"No": 0.5263244664780486, "Yes": 0.4736561038061189}, "ground_truth": 0}, {"key": "3ee31d8987c07ba4ea9423d39c5e333525316958", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.47483775581497756, "res": {"No": 0.5251445610041133, "Yes": 0.47483775581497756}, "ground_truth": 0}, {"key": "f8ddc6d36f378757d27a244529f8fe11672baa41", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.11862393499570462, "res": {"No": 0.8813688224139766, "Yes": 0.11862393499570462}, "ground_truth": 0}, {"key": "f8ddc6d36f378757d27a244529f8fe11672baa41", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.40890999838619674, "res": {"No": 0.5910740933355706, "Yes": 0.40890999838619674}, "ground_truth": 0}, {"key": "f8ddc6d36f378757d27a244529f8fe11672baa41", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.41703799759654164, "res": {"No": 0.5829403578552164, "Yes": 0.41703799759654164}, "ground_truth": 1}, {"key": "f8ddc6d36f378757d27a244529f8fe11672baa41", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.43084443228911223, "res": {"No": 0.5691367954240691, "Yes": 0.43084443228911223}, "ground_truth": 0}, {"key": "f8ddc6d36f378757d27a244529f8fe11672baa41", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.4747594615245853, "res": {"No": 0.525225229403858, "Yes": 0.4747594615245853}, "ground_truth": 0}, {"key": "4e97228ceef1be54ab9151fc6759017e6a54a18d", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3787617270991011, "res": {"No": 0.6212198338085866, "Yes": 0.3787617270991011}, "ground_truth": 0}, {"key": "4e97228ceef1be54ab9151fc6759017e6a54a18d", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4067062900988867, "res": {"No": 0.5932651421082514, "Yes": 0.4067062900988867}, "ground_truth": 0}, {"key": "4e97228ceef1be54ab9151fc6759017e6a54a18d", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5350252002095665, "res": {"Yes": 0.5350252002095665, "No": 0.46495056075697305}, "ground_truth": 1}, {"key": "4e97228ceef1be54ab9151fc6759017e6a54a18d", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3658672559607612, "res": {"No": 0.634112205449374, "Yes": 0.3658672559607612}, "ground_truth": 0}, {"key": "4e97228ceef1be54ab9151fc6759017e6a54a18d", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.40604885260734297, "res": {"No": 0.5939306602614308, "Yes": 0.40604885260734297}, "ground_truth": 0}, {"key": "0e649ccabaf0e650e9757ee256c2749d37486ba6", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4262376731920991, "res": {"No": 0.5737427235233772, "Yes": 0.4262376731920991}, "ground_truth": 0}, {"key": "0e649ccabaf0e650e9757ee256c2749d37486ba6", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.347669313341208, "res": {"No": 0.6523128244615358, "Yes": 0.347669313341208}, "ground_truth": 0}, {"key": "0e649ccabaf0e650e9757ee256c2749d37486ba6", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.43303228177749137, "res": {"No": 0.5669413271851593, "Yes": 0.43303228177749137}, "ground_truth": 1}, {"key": "0e649ccabaf0e650e9757ee256c2749d37486ba6", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3962347857941295, "res": {"No": 0.603743542333806, "Yes": 0.3962347857941295}, "ground_truth": 0}, {"key": "0e649ccabaf0e650e9757ee256c2749d37486ba6", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.40515571673738116, "res": {"No": 0.594820023919867, "Yes": 0.40515571673738116}, "ground_truth": 0}, {"key": "9eccfcc231c7c0d65cb0c333cb3d1731a5fe2f3f", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.41022822000031306, "res": {"No": 0.5897491924525473, "Yes": 0.41022822000031306}, "ground_truth": 0}, {"key": "9eccfcc231c7c0d65cb0c333cb3d1731a5fe2f3f", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.24454398218685383, "res": {"No": 0.7554397869865911, "Yes": 0.24454398218685383}, "ground_truth": 0}, {"key": "9eccfcc231c7c0d65cb0c333cb3d1731a5fe2f3f", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4523786810347885, "res": {"No": 0.5475933661771335, "Yes": 0.4523786810347885}, "ground_truth": 1}, {"key": "9eccfcc231c7c0d65cb0c333cb3d1731a5fe2f3f", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5685282015919695, "res": {"Yes": 0.5685282015919695, "No": 0.43144316158369767}, "ground_truth": 0}, {"key": "9eccfcc231c7c0d65cb0c333cb3d1731a5fe2f3f", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5445116179076098, "res": {"Yes": 0.5445116179076098, "No": 0.45545659395778876}, "ground_truth": 0}, {"key": "ddf109b3de6022a4ecd62ee2cd31d2c2ac51035b", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3959107230452691, "res": {"No": 0.6040603311655024, "Yes": 0.3959107230452691}, "ground_truth": 0}, {"key": "ddf109b3de6022a4ecd62ee2cd31d2c2ac51035b", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3153460095461335, "res": {"No": 0.684637457466485, "Yes": 0.3153460095461335}, "ground_truth": 0}, {"key": "ddf109b3de6022a4ecd62ee2cd31d2c2ac51035b", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.31263803354051445, "res": {"No": 0.687342394558804, "Yes": 0.31263803354051445}, "ground_truth": 1}, {"key": "ddf109b3de6022a4ecd62ee2cd31d2c2ac51035b", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.393031113906943, "res": {"No": 0.6069449402663709, "Yes": 0.393031113906943}, "ground_truth": 0}, {"key": "ddf109b3de6022a4ecd62ee2cd31d2c2ac51035b", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.42538956771020664, "res": {"No": 0.5745889864238223, "Yes": 0.42538956771020664}, "ground_truth": 0}, {"key": "bd92eab23374d95ac4430e2e1a8bf7561c662309", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.28770328222248986, "res": {"No": 0.712277540108958, "Yes": 0.28770328222248986}, "ground_truth": 0}, {"key": "bd92eab23374d95ac4430e2e1a8bf7561c662309", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.2426483022156554, "res": {"No": 0.7573339072477219, "Yes": 0.2426483022156554}, "ground_truth": 0}, {"key": "bd92eab23374d95ac4430e2e1a8bf7561c662309", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.25271578529439054, "res": {"No": 0.7472605197236148, "Yes": 0.25271578529439054}, "ground_truth": 1}, {"key": "bd92eab23374d95ac4430e2e1a8bf7561c662309", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.37816391113895, "res": {"No": 0.6218147252424395, "Yes": 0.37816391113895}, "ground_truth": 0}, {"key": "bd92eab23374d95ac4430e2e1a8bf7561c662309", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.32764866105990703, "res": {"No": 0.6723260585216587, "Yes": 0.32764866105990703}, "ground_truth": 0}, {"key": "d84440effc748f9220c7dde068962e803f566bc7", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.49972616529280894, "res": {"No": 0.5002382670331226, "Yes": 0.49972616529280894}, "ground_truth": 0}, {"key": "d84440effc748f9220c7dde068962e803f566bc7", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5723733446966853, "res": {"Yes": 0.5723733446966853, "No": 0.42758704384412904}, "ground_truth": 0}, {"key": "d84440effc748f9220c7dde068962e803f566bc7", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6418675211081935, "res": {"Yes": 0.6418675211081935, "No": 0.3580935549662314}, "ground_truth": 1}, {"key": "d84440effc748f9220c7dde068962e803f566bc7", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4695568271805652, "res": {"No": 0.5304067202691741, "Yes": 0.4695568271805652}, "ground_truth": 0}, {"key": "d84440effc748f9220c7dde068962e803f566bc7", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.524540073931792, "res": {"Yes": 0.524540073931792, "No": 0.4754278706931816}, "ground_truth": 0}, {"key": "e2866f596c85b0191ce812260d23855c70a01192", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3819935482179489, "res": {"No": 0.6179870972368033, "Yes": 0.3819935482179489}, "ground_truth": 0}, {"key": "e2866f596c85b0191ce812260d23855c70a01192", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3361495596663412, "res": {"No": 0.6638259283327523, "Yes": 0.3361495596663412}, "ground_truth": 0}, {"key": "e2866f596c85b0191ce812260d23855c70a01192", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.46557792296564393, "res": {"No": 0.5343928159806726, "Yes": 0.46557792296564393}, "ground_truth": 1}, {"key": "e2866f596c85b0191ce812260d23855c70a01192", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5959751544202911, "res": {"Yes": 0.5959751544202911, "No": 0.4039975602598455}, "ground_truth": 0}, {"key": "e2866f596c85b0191ce812260d23855c70a01192", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.5094643292784465, "res": {"Yes": 0.5094643292784465, "No": 0.49050433602774596}, "ground_truth": 0}, {"key": "4ebd9195bc8c12272b6281a5a610a7b5fb49b8cd", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3987419751903796, "res": {"No": 0.6012280533098087, "Yes": 0.3987419751903796}, "ground_truth": 0}, {"key": "4ebd9195bc8c12272b6281a5a610a7b5fb49b8cd", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3421904587497453, "res": {"No": 0.6577827429928071, "Yes": 0.3421904587497453}, "ground_truth": 0}, {"key": "4ebd9195bc8c12272b6281a5a610a7b5fb49b8cd", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.46027210121506756, "res": {"No": 0.5396862644630541, "Yes": 0.46027210121506756}, "ground_truth": 1}, {"key": "4ebd9195bc8c12272b6281a5a610a7b5fb49b8cd", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.24353400385191992, "res": {"No": 0.7564421343159182, "Yes": 0.24353400385191992}, "ground_truth": 0}, {"key": "4ebd9195bc8c12272b6281a5a610a7b5fb49b8cd", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.6042170447402786, "res": {"Yes": 0.6042170447402786, "No": 0.3957506649480255}, "ground_truth": 0}, {"key": "4bd37aa7004cd1fdf1ccb506d2159a2cf26995e0", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4892925826043995, "res": {"No": 0.5106903710100608, "Yes": 0.4892925826043995}, "ground_truth": 0}, {"key": "4bd37aa7004cd1fdf1ccb506d2159a2cf26995e0", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.4680309559779318, "res": {"No": 0.5319573457587812, "Yes": 0.4680309559779318}, "ground_truth": 0}, {"key": "4bd37aa7004cd1fdf1ccb506d2159a2cf26995e0", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.49006556044545635, "res": {"No": 0.5099181277368586, "Yes": 0.49006556044545635}, "ground_truth": 1}, {"key": "4bd37aa7004cd1fdf1ccb506d2159a2cf26995e0", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4608855875392857, "res": {"No": 0.5390971193400619, "Yes": 0.4608855875392857}, "ground_truth": 0}, {"key": "4bd37aa7004cd1fdf1ccb506d2159a2cf26995e0", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.44672999719092316, "res": {"No": 0.553252179837498, "Yes": 0.44672999719092316}, "ground_truth": 0}, {"key": "29b87dc549b8c64b35517e684e79b1da6316bd88", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2793344694312841, "res": {"No": 0.7206490750124663, "Yes": 0.2793344694312841}, "ground_truth": 0}, {"key": "29b87dc549b8c64b35517e684e79b1da6316bd88", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.28940016344875386, "res": {"No": 0.710583013590864, "Yes": 0.28940016344875386}, "ground_truth": 0}, {"key": "29b87dc549b8c64b35517e684e79b1da6316bd88", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4194665265254685, "res": {"No": 0.5805117714079018, "Yes": 0.4194665265254685}, "ground_truth": 1}, {"key": "29b87dc549b8c64b35517e684e79b1da6316bd88", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5034573134802173, "res": {"Yes": 0.5034573134802173, "No": 0.496521496679567}, "ground_truth": 0}, {"key": "29b87dc549b8c64b35517e684e79b1da6316bd88", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.472414972794156, "res": {"No": 0.5275530447077097, "Yes": 0.472414972794156}, "ground_truth": 0}, {"key": "d2f2fe0b8797b416bae71305c9d1c874e2fdd0cc", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.337003251603921, "res": {"No": 0.6629741161872009, "Yes": 0.337003251603921}, "ground_truth": 0}, {"key": "d2f2fe0b8797b416bae71305c9d1c874e2fdd0cc", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.34425483377671995, "res": {"No": 0.6557139545674158, "Yes": 0.34425483377671995}, "ground_truth": 0}, {"key": "d2f2fe0b8797b416bae71305c9d1c874e2fdd0cc", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5098201053995003, "res": {"Yes": 0.5098201053995003, "No": 0.49015270184874477}, "ground_truth": 1}, {"key": "d2f2fe0b8797b416bae71305c9d1c874e2fdd0cc", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.43659626047863853, "res": {"No": 0.5633694214722145, "Yes": 0.43659626047863853}, "ground_truth": 0}, {"key": "d2f2fe0b8797b416bae71305c9d1c874e2fdd0cc", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.28559550141180157, "res": {"No": 0.7143779749730631, "Yes": 0.28559550141180157}, "ground_truth": 0}, {"key": "0269d39abbea3edadd225fa97d818f5a789b4c72", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.39657923175051996, "res": {"No": 0.6033919158561349, "Yes": 0.39657923175051996}, "ground_truth": 0}, {"key": "0269d39abbea3edadd225fa97d818f5a789b4c72", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3873162446559801, "res": {"No": 0.6126564564991516, "Yes": 0.3873162446559801}, "ground_truth": 0}, {"key": "0269d39abbea3edadd225fa97d818f5a789b4c72", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.341769447282761, "res": {"No": 0.6582114544073283, "Yes": 0.341769447282761}, "ground_truth": 1}, {"key": "0269d39abbea3edadd225fa97d818f5a789b4c72", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.37819720993728384, "res": {"No": 0.6217769449275113, "Yes": 0.37819720993728384}, "ground_truth": 0}, {"key": "0269d39abbea3edadd225fa97d818f5a789b4c72", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.33458952398168246, "res": {"No": 0.6653840874139423, "Yes": 0.33458952398168246}, "ground_truth": 0}, {"key": "394f20c9bee9bc8d0e6a65bac48de57bd9f4733d", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3314457277347246, "res": {"No": 0.6685357873941178, "Yes": 0.3314457277347246}, "ground_truth": 0}, {"key": "394f20c9bee9bc8d0e6a65bac48de57bd9f4733d", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.5078161532284431, "res": {"Yes": 0.5078161532284431, "No": 0.49214629949225136}, "ground_truth": 0}, {"key": "394f20c9bee9bc8d0e6a65bac48de57bd9f4733d", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3782572194670426, "res": {"No": 0.6217190788400871, "Yes": 0.3782572194670426}, "ground_truth": 1}, {"key": "394f20c9bee9bc8d0e6a65bac48de57bd9f4733d", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.48608239617359034, "res": {"No": 0.5138961637425724, "Yes": 0.48608239617359034}, "ground_truth": 0}, {"key": "394f20c9bee9bc8d0e6a65bac48de57bd9f4733d", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.487189487597534, "res": {"No": 0.5127855973813085, "Yes": 0.487189487597534}, "ground_truth": 0}, {"key": "6e63c8b1ae01ab91b4832222edaf4bb461b6c568", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.4219032612370974, "res": {"No": 0.5780620460290015, "Yes": 0.4219032612370974}, "ground_truth": 0}, {"key": "6e63c8b1ae01ab91b4832222edaf4bb461b6c568", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.17590179898047334, "res": {"No": 0.8240786785118922, "Yes": 0.17590179898047334}, "ground_truth": 0}, {"key": "6e63c8b1ae01ab91b4832222edaf4bb461b6c568", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5168244072386332, "res": {"Yes": 0.5168244072386332, "No": 0.483149475558749}, "ground_truth": 1}, {"key": "6e63c8b1ae01ab91b4832222edaf4bb461b6c568", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.39600654107417366, "res": {"No": 0.6039645710317058, "Yes": 0.39600654107417366}, "ground_truth": 0}, {"key": "6e63c8b1ae01ab91b4832222edaf4bb461b6c568", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.42103496352295516, "res": {"No": 0.5789402163625333, "Yes": 0.42103496352295516}, "ground_truth": 0}, {"key": "355875856932fd634aaf7c6b26155403aaf15904", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.08156459467139622, "res": {"No": 0.9184264993603181, "Yes": 0.08156459467139622}, "ground_truth": 0}, {"key": "355875856932fd634aaf7c6b26155403aaf15904", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.3784080379841605, "res": {"No": 0.6215684918502614, "Yes": 0.3784080379841605}, "ground_truth": 0}, {"key": "355875856932fd634aaf7c6b26155403aaf15904", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4281578084610823, "res": {"No": 0.5718205274703633, "Yes": 0.4281578084610823}, "ground_truth": 1}, {"key": "355875856932fd634aaf7c6b26155403aaf15904", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.38059675125797093, "res": {"No": 0.6193858725409297, "Yes": 0.38059675125797093}, "ground_truth": 0}, {"key": "355875856932fd634aaf7c6b26155403aaf15904", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3896337670097049, "res": {"No": 0.6103375281539396, "Yes": 0.3896337670097049}, "ground_truth": 0}, {"key": "999523913d281ab57977d79b644f3879ac4d2e37", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.34226112837586486, "res": {"No": 0.6577178098855698, "Yes": 0.34226112837586486}, "ground_truth": 0}, {"key": "999523913d281ab57977d79b644f3879ac4d2e37", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.40607561210844567, "res": {"No": 0.5938893538204578, "Yes": 0.40607561210844567}, "ground_truth": 0}, {"key": "999523913d281ab57977d79b644f3879ac4d2e37", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5333168852789012, "res": {"Yes": 0.5333168852789012, "No": 0.4666539511284555}, "ground_truth": 1}, {"key": "999523913d281ab57977d79b644f3879ac4d2e37", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.46796086891021516, "res": {"No": 0.5320136138629282, "Yes": 0.46796086891021516}, "ground_truth": 0}, {"key": "999523913d281ab57977d79b644f3879ac4d2e37", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.38627068453881425, "res": {"No": 0.6137146734890123, "Yes": 0.38627068453881425}, "ground_truth": 0}, {"key": "5f7424604ec11358dc6fdfa47732f90422949041", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.196882219420931, "res": {"No": 0.803098995955146, "Yes": 0.196882219420931}, "ground_truth": 0}, {"key": "5f7424604ec11358dc6fdfa47732f90422949041", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.26197486891327876, "res": {"No": 0.7380019294161906, "Yes": 0.26197486891327876}, "ground_truth": 0}, {"key": "5f7424604ec11358dc6fdfa47732f90422949041", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3251058361410415, "res": {"No": 0.6748753444857705, "Yes": 0.3251058361410415}, "ground_truth": 1}, {"key": "5f7424604ec11358dc6fdfa47732f90422949041", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.31526280071410256, "res": {"No": 0.6847114296932975, "Yes": 0.31526280071410256}, "ground_truth": 0}, {"key": "5f7424604ec11358dc6fdfa47732f90422949041", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3144342143750192, "res": {"No": 0.6855402057401524, "Yes": 0.3144342143750192}, "ground_truth": 0}, {"key": "fdb2b65b9cb99b7d52559ae647cae293cb5910d3", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.39029418644500724, "res": {"No": 0.6096846760344596, "Yes": 0.39029418644500724}, "ground_truth": 0}, {"key": "fdb2b65b9cb99b7d52559ae647cae293cb5910d3", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.39444818136921006, "res": {"No": 0.605535311360932, "Yes": 0.39444818136921006}, "ground_truth": 0}, {"key": "fdb2b65b9cb99b7d52559ae647cae293cb5910d3", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.41174671009950975, "res": {"No": 0.5882349359190329, "Yes": 0.41174671009950975}, "ground_truth": 1}, {"key": "fdb2b65b9cb99b7d52559ae647cae293cb5910d3", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.39292233794990056, "res": {"No": 0.607057114055871, "Yes": 0.39292233794990056}, "ground_truth": 0}, {"key": "fdb2b65b9cb99b7d52559ae647cae293cb5910d3", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.39371688262444327, "res": {"No": 0.6062611773708685, "Yes": 0.39371688262444327}, "ground_truth": 0}, {"key": "f974fe1b3777ffc5756322c0bb3f31a9b23ec09a", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.23835540997031165, "res": {"No": 0.7616223270158562, "Yes": 0.23835540997031165}, "ground_truth": 0}, {"key": "f974fe1b3777ffc5756322c0bb3f31a9b23ec09a", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.33105548144259456, "res": {"No": 0.6689277645133703, "Yes": 0.33105548144259456}, "ground_truth": 0}, {"key": "f974fe1b3777ffc5756322c0bb3f31a9b23ec09a", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4393063382663013, "res": {"No": 0.5606534727190425, "Yes": 0.4393063382663013}, "ground_truth": 1}, {"key": "f974fe1b3777ffc5756322c0bb3f31a9b23ec09a", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.40225977202205043, "res": {"No": 0.5977133606282332, "Yes": 0.40225977202205043}, "ground_truth": 0}, {"key": "f974fe1b3777ffc5756322c0bb3f31a9b23ec09a", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.32791042578937396, "res": {"No": 0.6720640704985824, "Yes": 0.32791042578937396}, "ground_truth": 0}, {"key": "2930e1a7ac24bce649bfbf5f5971e74515e38a0a", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2710067637301648, "res": {"No": 0.728969854266979, "Yes": 0.2710067637301648}, "ground_truth": 0}, {"key": "2930e1a7ac24bce649bfbf5f5971e74515e38a0a", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.375726557819939, "res": {"No": 0.6242364225991983, "Yes": 0.375726557819939}, "ground_truth": 0}, {"key": "2930e1a7ac24bce649bfbf5f5971e74515e38a0a", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.32592949304831575, "res": {"No": 0.6740334636297609, "Yes": 0.32592949304831575}, "ground_truth": 1}, {"key": "2930e1a7ac24bce649bfbf5f5971e74515e38a0a", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.38728737130620844, "res": {"No": 0.6126762333687643, "Yes": 0.38728737130620844}, "ground_truth": 0}, {"key": "2930e1a7ac24bce649bfbf5f5971e74515e38a0a", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.38674967108046576, "res": {"No": 0.613214712061367, "Yes": 0.38674967108046576}, "ground_truth": 0}, {"key": "f5f7571a69e96c1d288d939861b7216c2bdc0ae4", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.24591308954635518, "res": {"No": 0.7540694850360402, "Yes": 0.24591308954635518}, "ground_truth": 0}, {"key": "f5f7571a69e96c1d288d939861b7216c2bdc0ae4", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.35581725805292985, "res": {"No": 0.6441593342807664, "Yes": 0.35581725805292985}, "ground_truth": 0}, {"key": "f5f7571a69e96c1d288d939861b7216c2bdc0ae4", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3111076765116444, "res": {"No": 0.688871815185815, "Yes": 0.3111076765116444}, "ground_truth": 1}, {"key": "f5f7571a69e96c1d288d939861b7216c2bdc0ae4", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.626559272614416, "res": {"Yes": 0.626559272614416, "No": 0.37340276237656167}, "ground_truth": 0}, {"key": "f5f7571a69e96c1d288d939861b7216c2bdc0ae4", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.20074500663777198, "res": {"No": 0.7992341198932285, "Yes": 0.20074500663777198}, "ground_truth": 0}, {"key": "5f02aa32bd1dc95e47355755398e31550b232f8a", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.2552465086983889, "res": {"No": 0.7447379555451635, "Yes": 0.2552465086983889}, "ground_truth": 0}, {"key": "5f02aa32bd1dc95e47355755398e31550b232f8a", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.31214735843380614, "res": {"No": 0.6878230629974357, "Yes": 0.31214735843380614}, "ground_truth": 0}, {"key": "5f02aa32bd1dc95e47355755398e31550b232f8a", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.478212636103621, "res": {"No": 0.5217631133204914, "Yes": 0.478212636103621}, "ground_truth": 1}, {"key": "5f02aa32bd1dc95e47355755398e31550b232f8a", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3796449785252456, "res": {"No": 0.6203355423492266, "Yes": 0.3796449785252456}, "ground_truth": 0}, {"key": "5f02aa32bd1dc95e47355755398e31550b232f8a", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.3520661129991103, "res": {"No": 0.6479094241149806, "Yes": 0.3520661129991103}, "ground_truth": 0}, {"key": "9177e5ac94f038749e8d4eb526a65461e0f6df4c", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.3834222947334803, "res": {"No": 0.6165604793832312, "Yes": 0.3834222947334803}, "ground_truth": 0}, {"key": "9177e5ac94f038749e8d4eb526a65461e0f6df4c", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.44228721510868496, "res": {"No": 0.557696484039746, "Yes": 0.44228721510868496}, "ground_truth": 0}, {"key": "9177e5ac94f038749e8d4eb526a65461e0f6df4c", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5933272958200932, "res": {"Yes": 0.5933272958200932, "No": 0.4066527304074638}, "ground_truth": 1}, {"key": "9177e5ac94f038749e8d4eb526a65461e0f6df4c", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.45925152695684823, "res": {"No": 0.5407291891765186, "Yes": 0.45925152695684823}, "ground_truth": 0}, {"key": "9177e5ac94f038749e8d4eb526a65461e0f6df4c", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.338821316680571, "res": {"No": 0.6611642783358944, "Yes": 0.338821316680571}, "ground_truth": 0}, {"key": "f12e4bbb07211de7d43b4e331dc73404aa804562", "model": "xsum_readability_ft_gpt35", "target_model": "human", "recognition_score": 0.39777730717655896, "res": {"No": 0.6021887166448668, "Yes": 0.39777730717655896}, "ground_truth": 0}, {"key": "f12e4bbb07211de7d43b4e331dc73404aa804562", "model": "xsum_readability_ft_gpt35", "target_model": "claude", "recognition_score": 0.44403819299075215, "res": {"No": 0.5559054072782246, "Yes": 0.44403819299075215}, "ground_truth": 0}, {"key": "f12e4bbb07211de7d43b4e331dc73404aa804562", "model": "xsum_readability_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4598978589112194, "res": {"No": 0.5400740063596398, "Yes": 0.4598978589112194}, "ground_truth": 1}, {"key": "f12e4bbb07211de7d43b4e331dc73404aa804562", "model": "xsum_readability_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3925551429638716, "res": {"No": 0.6074078150837106, "Yes": 0.3925551429638716}, "ground_truth": 0}, {"key": "f12e4bbb07211de7d43b4e331dc73404aa804562", "model": "xsum_readability_ft_gpt35", "target_model": "llama", "recognition_score": 0.2224084772946717, "res": {"No": 0.7775681002643363, "Yes": 0.2224084772946717}, "ground_truth": 0}]