[{"key": "0ae5e35dc2844afc251d082d8d5ef4be8edce58c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9998578522539484, "res": {"Yes": 0.9998578522539484, "No": 0.00014212437548899632}, "ground_truth": 0}, {"key": "0ae5e35dc2844afc251d082d8d5ef4be8edce58c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998845431116596, "res": {"Yes": 0.9998845431116596, "No": 0.00011541987404008215}, "ground_truth": 0}, {"key": "0ae5e35dc2844afc251d082d8d5ef4be8edce58c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9993917483001111, "res": {"Yes": 0.9993917483001111, "No": 0.0006082130051099599}, "ground_truth": 1}, {"key": "0ae5e35dc2844afc251d082d8d5ef4be8edce58c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998051804597917, "res": {"Yes": 0.9998051804597917, "No": 0.0001947293329049983}, "ground_truth": 0}, {"key": "0ae5e35dc2844afc251d082d8d5ef4be8edce58c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998879996225106, "res": {"Yes": 0.9998879996225106, "No": 0.00011188030091626666}, "ground_truth": 0}, {"key": "ffb817ce85d7c19720ebbf0b43b01d0da61e9c06", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9543325102709392, "res": {"Yes": 0.9543325102709392, "No": 0.04566741751604954}, "ground_truth": 0}, {"key": "ffb817ce85d7c19720ebbf0b43b01d0da61e9c06", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9976439005697112, "res": {"Yes": 0.9976439005697112, "No": 0.002356089560784464}, "ground_truth": 0}, {"key": "ffb817ce85d7c19720ebbf0b43b01d0da61e9c06", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9860668169560344, "res": {"Yes": 0.9860668169560344, "No": 0.01393305227463362}, "ground_truth": 1}, {"key": "ffb817ce85d7c19720ebbf0b43b01d0da61e9c06", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9917214356189625, "res": {"Yes": 0.9917214356189625, "No": 0.008278550386553226}, "ground_truth": 0}, {"key": "ffb817ce85d7c19720ebbf0b43b01d0da61e9c06", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.999123936967488, "res": {"Yes": 0.999123936967488, "No": 0.000875992963144689}, "ground_truth": 0}, {"key": "9bfd6a5beb4e8dec59ba6d805dc9349a401da51d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9660697301060429, "res": {"Yes": 0.9660697301060429, "No": 0.03393007804540068}, "ground_truth": 0}, {"key": "9bfd6a5beb4e8dec59ba6d805dc9349a401da51d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.994523987617465, "res": {"Yes": 0.994523987617465, "No": 0.005475958226799631}, "ground_truth": 0}, {"key": "9bfd6a5beb4e8dec59ba6d805dc9349a401da51d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9938754400486789, "res": {"Yes": 0.9938754400486789, "No": 0.006124576915239975}, "ground_truth": 1}, {"key": "9bfd6a5beb4e8dec59ba6d805dc9349a401da51d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9983940663936188, "res": {"Yes": 0.9983940663936188, "No": 0.0016058613494982412}, "ground_truth": 0}, {"key": "9bfd6a5beb4e8dec59ba6d805dc9349a401da51d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9794835970361377, "res": {"Yes": 0.9794835970361377, "No": 0.020516363330398323}, "ground_truth": 0}, {"key": "dc7f4527bf654918c7a4bb19179949ecd5982c0f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9972118521242357, "res": {"Yes": 0.9972118521242357, "No": 0.002788107022455875}, "ground_truth": 0}, {"key": "dc7f4527bf654918c7a4bb19179949ecd5982c0f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9816274347148423, "res": {"Yes": 0.9816274347148423, "No": 0.018372523839459493}, "ground_truth": 0}, {"key": "dc7f4527bf654918c7a4bb19179949ecd5982c0f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9949888877264786, "res": {"Yes": 0.9949888877264786, "No": 0.005011059044983659}, "ground_truth": 1}, {"key": "dc7f4527bf654918c7a4bb19179949ecd5982c0f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9966272969835613, "res": {"Yes": 0.9966272969835613, "No": 0.0033726348011222447}, "ground_truth": 0}, {"key": "dc7f4527bf654918c7a4bb19179949ecd5982c0f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9918526490444717, "res": {"Yes": 0.9918526490444717, "No": 0.008147242113321646}, "ground_truth": 0}, {"key": "c60e77736087bc85b372a28b4724aae6dcf0b052", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9990342170688264, "res": {"Yes": 0.9990342170688264, "No": 0.000965765216880452}, "ground_truth": 0}, {"key": "c60e77736087bc85b372a28b4724aae6dcf0b052", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9902201528097521, "res": {"Yes": 0.9902201528097521, "No": 0.009779733938087449}, "ground_truth": 0}, {"key": "c60e77736087bc85b372a28b4724aae6dcf0b052", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9993655617411157, "res": {"Yes": 0.9993655617411157, "No": 0.000634437676639875}, "ground_truth": 1}, {"key": "c60e77736087bc85b372a28b4724aae6dcf0b052", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9973767770781717, "res": {"Yes": 0.9973767770781717, "No": 0.002623246253808221}, "ground_truth": 0}, {"key": "c60e77736087bc85b372a28b4724aae6dcf0b052", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9829355702313612, "res": {"Yes": 0.9829355702313612, "No": 0.017064370255499377}, "ground_truth": 0}, {"key": "e0953e2de7cf286ac1d87a41c62e37f21b7c7564", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9985485600593004, "res": {"Yes": 0.9985485600593004, "No": 0.0014513821059742198}, "ground_truth": 0}, {"key": "e0953e2de7cf286ac1d87a41c62e37f21b7c7564", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9954029415683043, "res": {"Yes": 0.9954029415683043, "No": 0.004597004423042666}, "ground_truth": 0}, {"key": "e0953e2de7cf286ac1d87a41c62e37f21b7c7564", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9993715144798129, "res": {"Yes": 0.9993715144798129, "No": 0.0006283838827413283}, "ground_truth": 1}, {"key": "e0953e2de7cf286ac1d87a41c62e37f21b7c7564", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996773410256068, "res": {"Yes": 0.9996773410256068, "No": 0.00032255157888160614}, "ground_truth": 0}, {"key": "e0953e2de7cf286ac1d87a41c62e37f21b7c7564", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9981535055723129, "res": {"Yes": 0.9981535055723129, "No": 0.001846516108329975}, "ground_truth": 0}, {"key": "8b6f228661fd5f3170dc2c2e6b353e1d6c3859ec", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9891685511955826, "res": {"Yes": 0.9891685511955826, "No": 0.010831356195515096}, "ground_truth": 0}, {"key": "8b6f228661fd5f3170dc2c2e6b353e1d6c3859ec", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9959244686120445, "res": {"Yes": 0.9959244686120445, "No": 0.004075538882206971}, "ground_truth": 0}, {"key": "8b6f228661fd5f3170dc2c2e6b353e1d6c3859ec", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997756278152045, "res": {"Yes": 0.9997756278152045, "No": 0.0002243395063958291}, "ground_truth": 1}, {"key": "8b6f228661fd5f3170dc2c2e6b353e1d6c3859ec", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.985151988665361, "res": {"Yes": 0.985151988665361, "No": 0.01484794213654051}, "ground_truth": 0}, {"key": "8b6f228661fd5f3170dc2c2e6b353e1d6c3859ec", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.998170018861058, "res": {"Yes": 0.998170018861058, "No": 0.001830004597201492}, "ground_truth": 0}, {"key": "2f482e021caf07e2f07baf111c36d1587758f188", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9976032025169316, "res": {"Yes": 0.9976032025169316, "No": 0.0023967769060837044}, "ground_truth": 0}, {"key": "2f482e021caf07e2f07baf111c36d1587758f188", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9812113197750598, "res": {"Yes": 0.9812113197750598, "No": 0.018788702887584812}, "ground_truth": 0}, {"key": "2f482e021caf07e2f07baf111c36d1587758f188", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999478200954498, "res": {"Yes": 0.999478200954498, "No": 0.0005216900084638529}, "ground_truth": 1}, {"key": "2f482e021caf07e2f07baf111c36d1587758f188", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9980291792226901, "res": {"Yes": 0.9980291792226901, "No": 0.001970781725436661}, "ground_truth": 0}, {"key": "2f482e021caf07e2f07baf111c36d1587758f188", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9979620922594883, "res": {"Yes": 0.9979620922594883, "No": 0.0020378714962936044}, "ground_truth": 0}, {"key": "dc8ce4207b91a323bf6e2fbab889efeed22aa428", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9987723114269833, "res": {"Yes": 0.9987723114269833, "No": 0.0012275956238822672}, "ground_truth": 0}, {"key": "dc8ce4207b91a323bf6e2fbab889efeed22aa428", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995764461642621, "res": {"Yes": 0.9995764461642621, "No": 0.0004234744399512223}, "ground_truth": 0}, {"key": "dc8ce4207b91a323bf6e2fbab889efeed22aa428", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9990131679393333, "res": {"Yes": 0.9990131679393333, "No": 0.000986834117531167}, "ground_truth": 1}, {"key": "dc8ce4207b91a323bf6e2fbab889efeed22aa428", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9989806908488607, "res": {"Yes": 0.9989806908488607, "No": 0.0010193118869317635}, "ground_truth": 0}, {"key": "dc8ce4207b91a323bf6e2fbab889efeed22aa428", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.998278818035523, "res": {"Yes": 0.998278818035523, "No": 0.001721121785285275}, "ground_truth": 0}, {"key": "22a9005fe99c5dd536a4f41de4eb59d4f633ef9e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9989187097160919, "res": {"Yes": 0.9989187097160919, "No": 0.0010812371328968488}, "ground_truth": 0}, {"key": "22a9005fe99c5dd536a4f41de4eb59d4f633ef9e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9971895703723842, "res": {"Yes": 0.9971895703723842, "No": 0.002810426578114046}, "ground_truth": 0}, {"key": "22a9005fe99c5dd536a4f41de4eb59d4f633ef9e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999834495597365, "res": {"Yes": 0.999834495597365, "No": 0.00016548516576571145}, "ground_truth": 1}, {"key": "22a9005fe99c5dd536a4f41de4eb59d4f633ef9e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999805776363853, "res": {"Yes": 0.999805776363853, "No": 0.0001941419432890388}, "ground_truth": 0}, {"key": "22a9005fe99c5dd536a4f41de4eb59d4f633ef9e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9974300253537994, "res": {"Yes": 0.9974300253537994, "No": 0.002569929017773038}, "ground_truth": 0}, {"key": "1b00b2d2edef396855ad392f08a6e74550af569b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9841835487918602, "res": {"Yes": 0.9841835487918602, "No": 0.015816414275713758}, "ground_truth": 0}, {"key": "1b00b2d2edef396855ad392f08a6e74550af569b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995782334885119, "res": {"Yes": 0.9995782334885119, "No": 0.00042175811308801795}, "ground_truth": 0}, {"key": "1b00b2d2edef396855ad392f08a6e74550af569b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995077338027821, "res": {"Yes": 0.9995077338027821, "No": 0.0004921615705279083}, "ground_truth": 1}, {"key": "1b00b2d2edef396855ad392f08a6e74550af569b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9987192793236308, "res": {"Yes": 0.9987192793236308, "No": 0.0012806872897972213}, "ground_truth": 0}, {"key": "1b00b2d2edef396855ad392f08a6e74550af569b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9966758497619349, "res": {"Yes": 0.9966758497619349, "No": 0.003324184161312625}, "ground_truth": 0}, {"key": "9b43472077873c45e7f278418885ad028eb993e3", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.99796268684548, "res": {"Yes": 0.99796268684548, "No": 0.002037273876229008}, "ground_truth": 0}, {"key": "9b43472077873c45e7f278418885ad028eb993e3", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9980475829506329, "res": {"Yes": 0.9980475829506329, "No": 0.0019523502026255518}, "ground_truth": 0}, {"key": "9b43472077873c45e7f278418885ad028eb993e3", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9991599954015585, "res": {"Yes": 0.9991599954015585, "No": 0.0008399296664912978}, "ground_truth": 1}, {"key": "9b43472077873c45e7f278418885ad028eb993e3", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9990496877829326, "res": {"Yes": 0.9990496877829326, "No": 0.0009503029780044444}, "ground_truth": 0}, {"key": "9b43472077873c45e7f278418885ad028eb993e3", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9967335205900602, "res": {"Yes": 0.9967335205900602, "No": 0.0032664255366808012}, "ground_truth": 0}, {"key": "281347d0e99bc099d6991a6a60e30dc13d558216", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9992740115072616, "res": {"Yes": 0.9992740115072616, "No": 0.0007259587956885855}, "ground_truth": 0}, {"key": "281347d0e99bc099d6991a6a60e30dc13d558216", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994086577118094, "res": {"Yes": 0.9994086577118094, "No": 0.0005913276469591115}, "ground_truth": 0}, {"key": "281347d0e99bc099d6991a6a60e30dc13d558216", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.997816081028802, "res": {"Yes": 0.997816081028802, "No": 0.0021839131246091767}, "ground_truth": 1}, {"key": "281347d0e99bc099d6991a6a60e30dc13d558216", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996636407814501, "res": {"Yes": 0.9996636407814501, "No": 0.0003363451813137734}, "ground_truth": 0}, {"key": "281347d0e99bc099d6991a6a60e30dc13d558216", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.996153783312469, "res": {"Yes": 0.996153783312469, "No": 0.0038461985810891086}, "ground_truth": 0}, {"key": "703d248cce913a6036347aabd2ba011a7f660bf4", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9995883579178599, "res": {"Yes": 0.9995883579178599, "No": 0.0004115981700322707}, "ground_truth": 0}, {"key": "703d248cce913a6036347aabd2ba011a7f660bf4", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994957075981784, "res": {"Yes": 0.9994957075981784, "No": 0.0005042133832643278}, "ground_truth": 0}, {"key": "703d248cce913a6036347aabd2ba011a7f660bf4", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995766844533196, "res": {"Yes": 0.9995766844533196, "No": 0.00042319021463735383}, "ground_truth": 1}, {"key": "703d248cce913a6036347aabd2ba011a7f660bf4", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996547035685396, "res": {"Yes": 0.9996547035685396, "No": 0.00034524163320063205}, "ground_truth": 0}, {"key": "703d248cce913a6036347aabd2ba011a7f660bf4", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9992691276573997, "res": {"Yes": 0.9992691276573997, "No": 0.0007307899394821261}, "ground_truth": 0}, {"key": "517b86691dda04f41123354c1b7a7bfe1290eadc", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9988305781073296, "res": {"Yes": 0.9988305781073296, "No": 0.001169390039353651}, "ground_truth": 0}, {"key": "517b86691dda04f41123354c1b7a7bfe1290eadc", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9952426870310949, "res": {"Yes": 0.9952426870310949, "No": 0.004757293808680634}, "ground_truth": 0}, {"key": "517b86691dda04f41123354c1b7a7bfe1290eadc", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9936391672945429, "res": {"Yes": 0.9936391672945429, "No": 0.006360800784776473}, "ground_truth": 1}, {"key": "517b86691dda04f41123354c1b7a7bfe1290eadc", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9976252777248361, "res": {"Yes": 0.9976252777248361, "No": 0.0023747146406964065}, "ground_truth": 0}, {"key": "517b86691dda04f41123354c1b7a7bfe1290eadc", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9803983777610894, "res": {"Yes": 0.9803983777610894, "No": 0.01960163495800226}, "ground_truth": 0}, {"key": "7b361feeebfb078b80831d40d2ad8640c361e049", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.951399535701649, "res": {"Yes": 0.951399535701649, "No": 0.04860031337748882}, "ground_truth": 0}, {"key": "7b361feeebfb078b80831d40d2ad8640c361e049", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.992668272213278, "res": {"Yes": 0.992668272213278, "No": 0.007331655030217951}, "ground_truth": 0}, {"key": "7b361feeebfb078b80831d40d2ad8640c361e049", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9967810163729215, "res": {"Yes": 0.9967810163729215, "No": 0.0032190058800415054}, "ground_truth": 1}, {"key": "7b361feeebfb078b80831d40d2ad8640c361e049", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9932857261439888, "res": {"Yes": 0.9932857261439888, "No": 0.006714264405374872}, "ground_truth": 0}, {"key": "7b361feeebfb078b80831d40d2ad8640c361e049", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9902867859754289, "res": {"Yes": 0.9902867859754289, "No": 0.009713097951844054}, "ground_truth": 0}, {"key": "7d8e196095e4561ee4f01c650101f5591fce4a8d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.2289925290948567, "res": {"No": 0.7710071745112815, "Yes": 0.2289925290948567}, "ground_truth": 0}, {"key": "7d8e196095e4561ee4f01c650101f5591fce4a8d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9982808408499414, "res": {"Yes": 0.9982808408499414, "No": 0.0017191049525912493}, "ground_truth": 0}, {"key": "7d8e196095e4561ee4f01c650101f5591fce4a8d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9979223253602089, "res": {"Yes": 0.9979223253602089, "No": 0.0020776170314362598}, "ground_truth": 1}, {"key": "7d8e196095e4561ee4f01c650101f5591fce4a8d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9990582512739377, "res": {"Yes": 0.9990582512739377, "No": 0.0009417294808020758}, "ground_truth": 0}, {"key": "7d8e196095e4561ee4f01c650101f5591fce4a8d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9980165905612294, "res": {"Yes": 0.9980165905612294, "No": 0.0019834351763319043}, "ground_truth": 0}, {"key": "ae03390949407f55c8d8f794896af3020d180b12", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9984868802218221, "res": {"Yes": 0.9984868802218221, "No": 0.0015131109738077554}, "ground_truth": 0}, {"key": "ae03390949407f55c8d8f794896af3020d180b12", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9905860354398471, "res": {"Yes": 0.9905860354398471, "No": 0.009413871368012814}, "ground_truth": 0}, {"key": "ae03390949407f55c8d8f794896af3020d180b12", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999688062262696, "res": {"Yes": 0.999688062262696, "No": 0.00031183885311852346}, "ground_truth": 1}, {"key": "ae03390949407f55c8d8f794896af3020d180b12", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9989530898931649, "res": {"Yes": 0.9989530898931649, "No": 0.001046868135611669}, "ground_truth": 0}, {"key": "ae03390949407f55c8d8f794896af3020d180b12", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9983755336760918, "res": {"Yes": 0.9983755336760918, "No": 0.0016244012380403946}, "ground_truth": 0}, {"key": "4160464d3fcba547cb1664e54ce51ad06f78b773", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9691384229035422, "res": {"Yes": 0.9691384229035422, "No": 0.030861457615396996}, "ground_truth": 0}, {"key": "4160464d3fcba547cb1664e54ce51ad06f78b773", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9865338025275646, "res": {"Yes": 0.9865338025275646, "No": 0.013466104885184405}, "ground_truth": 0}, {"key": "4160464d3fcba547cb1664e54ce51ad06f78b773", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9989258469158782, "res": {"Yes": 0.9989258469158782, "No": 0.0010741205986001816}, "ground_truth": 1}, {"key": "4160464d3fcba547cb1664e54ce51ad06f78b773", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9992006967131954, "res": {"Yes": 0.9992006967131954, "No": 0.0007992610057677152}, "ground_truth": 0}, {"key": "4160464d3fcba547cb1664e54ce51ad06f78b773", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9942118606993476, "res": {"Yes": 0.9942118606993476, "No": 0.005788063495940544}, "ground_truth": 0}, {"key": "72b396634cc46f29b5ef146a61073ee7e7fa3cdc", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9962143563844358, "res": {"Yes": 0.9962143563844358, "No": 0.0037856881456187837}, "ground_truth": 0}, {"key": "72b396634cc46f29b5ef146a61073ee7e7fa3cdc", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9835356448034167, "res": {"Yes": 0.9835356448034167, "No": 0.016464317756462576}, "ground_truth": 0}, {"key": "72b396634cc46f29b5ef146a61073ee7e7fa3cdc", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9952327619233639, "res": {"Yes": 0.9952327619233639, "No": 0.004767237415666219}, "ground_truth": 1}, {"key": "72b396634cc46f29b5ef146a61073ee7e7fa3cdc", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9955431682826706, "res": {"Yes": 0.9955431682826706, "No": 0.004456774414733015}, "ground_truth": 0}, {"key": "72b396634cc46f29b5ef146a61073ee7e7fa3cdc", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9956381652541632, "res": {"Yes": 0.9956381652541632, "No": 0.00436178020960371}, "ground_truth": 0}, {"key": "b9b09b290cdf4d74cb64684a6cae8b6832645607", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9946532351035413, "res": {"Yes": 0.9946532351035413, "No": 0.005346703907462537}, "ground_truth": 0}, {"key": "b9b09b290cdf4d74cb64684a6cae8b6832645607", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9993823516633125, "res": {"Yes": 0.9993823516633125, "No": 0.0006176272115945466}, "ground_truth": 0}, {"key": "b9b09b290cdf4d74cb64684a6cae8b6832645607", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9990037711657679, "res": {"Yes": 0.9990037711657679, "No": 0.00099621665393846}, "ground_truth": 1}, {"key": "b9b09b290cdf4d74cb64684a6cae8b6832645607", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998443879188161, "res": {"Yes": 0.9998443879188161, "No": 0.0001555970352600742}, "ground_truth": 0}, {"key": "b9b09b290cdf4d74cb64684a6cae8b6832645607", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997388174840934, "res": {"Yes": 0.9997388174840934, "No": 0.00026108566137160086}, "ground_truth": 0}, {"key": "9e49d58fa5ab7f3e4356cb61337605ac007c49ca", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9897149793015291, "res": {"Yes": 0.9897149793015291, "No": 0.01028492138049766}, "ground_truth": 0}, {"key": "9e49d58fa5ab7f3e4356cb61337605ac007c49ca", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996233789595161, "res": {"Yes": 0.9996233789595161, "No": 0.0003766146657452973}, "ground_truth": 0}, {"key": "9e49d58fa5ab7f3e4356cb61337605ac007c49ca", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9961219581020956, "res": {"Yes": 0.9961219581020956, "No": 0.0038779872885638176}, "ground_truth": 1}, {"key": "9e49d58fa5ab7f3e4356cb61337605ac007c49ca", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9938480035019893, "res": {"Yes": 0.9938480035019893, "No": 0.006151920312299251}, "ground_truth": 0}, {"key": "9e49d58fa5ab7f3e4356cb61337605ac007c49ca", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9990412360081817, "res": {"Yes": 0.9990412360081817, "No": 0.0009586619958541121}, "ground_truth": 0}, {"key": "47fe0e2bba8206014fb53a7d5247efd1e316e087", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9999645227323332, "res": {"Yes": 0.9999645227323332, "No": 3.5358420157383036e-05}, "ground_truth": 0}, {"key": "47fe0e2bba8206014fb53a7d5247efd1e316e087", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9991492797977913, "res": {"Yes": 0.9991492797977913, "No": 0.0008506158001648618}, "ground_truth": 0}, {"key": "47fe0e2bba8206014fb53a7d5247efd1e316e087", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994576006517389, "res": {"Yes": 0.9994576006517389, "No": 0.0005423283110317595}, "ground_truth": 1}, {"key": "47fe0e2bba8206014fb53a7d5247efd1e316e087", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997103393197577, "res": {"Yes": 0.9997103393197577, "No": 0.00028956217997205515}, "ground_truth": 0}, {"key": "47fe0e2bba8206014fb53a7d5247efd1e316e087", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9995582232321059, "res": {"Yes": 0.9995582232321059, "No": 0.0004416881799842604}, "ground_truth": 0}, {"key": "f54f84affaebd1d86011ad31adbf9fac4754a8f2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9967511654213868, "res": {"Yes": 0.9967511654213868, "No": 0.0032487758991660886}, "ground_truth": 0}, {"key": "f54f84affaebd1d86011ad31adbf9fac4754a8f2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9846906008158954, "res": {"Yes": 0.9846906008158954, "No": 0.015309347338086867}, "ground_truth": 0}, {"key": "f54f84affaebd1d86011ad31adbf9fac4754a8f2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9909189490799164, "res": {"Yes": 0.9909189490799164, "No": 0.009080972396294197}, "ground_truth": 1}, {"key": "f54f84affaebd1d86011ad31adbf9fac4754a8f2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9978875411883722, "res": {"Yes": 0.9978875411883722, "No": 0.0021124032470390677}, "ground_truth": 0}, {"key": "f54f84affaebd1d86011ad31adbf9fac4754a8f2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.998825227486137, "res": {"Yes": 0.998825227486137, "No": 0.001174681772479437}, "ground_truth": 0}, {"key": "bc4992871e25e6584936af456ad4b819dcc52f27", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.997087511348253, "res": {"Yes": 0.997087511348253, "No": 0.002912466390206979}, "ground_truth": 0}, {"key": "bc4992871e25e6584936af456ad4b819dcc52f27", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9943011838104161, "res": {"Yes": 0.9943011838104161, "No": 0.005698732484557624}, "ground_truth": 0}, {"key": "bc4992871e25e6584936af456ad4b819dcc52f27", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9981972119780178, "res": {"Yes": 0.9981972119780178, "No": 0.0018027824536994068}, "ground_truth": 1}, {"key": "bc4992871e25e6584936af456ad4b819dcc52f27", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9979061803050955, "res": {"Yes": 0.9979061803050955, "No": 0.0020937503400059693}, "ground_truth": 0}, {"key": "bc4992871e25e6584936af456ad4b819dcc52f27", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9994398564390128, "res": {"Yes": 0.9994398564390128, "No": 0.0005601040432356213}, "ground_truth": 0}, {"key": "271416bf0f15c52a4417339de78dcecf378b3950", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9995776376900835, "res": {"Yes": 0.9995776376900835, "No": 0.0004222650333288036}, "ground_truth": 0}, {"key": "271416bf0f15c52a4417339de78dcecf378b3950", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999113611986508, "res": {"Yes": 0.9999113611986508, "No": 8.851000159171858e-05}, "ground_truth": 0}, {"key": "271416bf0f15c52a4417339de78dcecf378b3950", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9979008345360062, "res": {"Yes": 0.9979008345360062, "No": 0.0020991214057744117}, "ground_truth": 1}, {"key": "271416bf0f15c52a4417339de78dcecf378b3950", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998080407965261, "res": {"Yes": 0.9998080407965261, "No": 0.00019186088612174458}, "ground_truth": 0}, {"key": "271416bf0f15c52a4417339de78dcecf378b3950", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999714363229496, "res": {"Yes": 0.9999714363229496, "No": 2.844400898668419e-05}, "ground_truth": 0}, {"key": "0add6b43e18072837f0bef41031d6f92f740d625", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9988541274139425, "res": {"Yes": 0.9988541274139425, "No": 0.0011458632453105369}, "ground_truth": 0}, {"key": "0add6b43e18072837f0bef41031d6f92f740d625", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9755643362674189, "res": {"Yes": 0.9755643362674189, "No": 0.024435634053959827}, "ground_truth": 0}, {"key": "0add6b43e18072837f0bef41031d6f92f740d625", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996978302924648, "res": {"Yes": 0.9996978302924648, "No": 0.00030204244624063996}, "ground_truth": 1}, {"key": "0add6b43e18072837f0bef41031d6f92f740d625", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994041343484907, "res": {"Yes": 0.9994041343484907, "No": 0.0005957651572556299}, "ground_truth": 0}, {"key": "0add6b43e18072837f0bef41031d6f92f740d625", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.996096657224515, "res": {"Yes": 0.996096657224515, "No": 0.003903385599475144}, "ground_truth": 0}, {"key": "7060e3feb685bda7b5102e1cc8e9b35da37ca2a1", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9945288266846042, "res": {"Yes": 0.9945288266846042, "No": 0.00547116338783904}, "ground_truth": 0}, {"key": "7060e3feb685bda7b5102e1cc8e9b35da37ca2a1", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9806850391275114, "res": {"Yes": 0.9806850391275114, "No": 0.019314951381577274}, "ground_truth": 0}, {"key": "7060e3feb685bda7b5102e1cc8e9b35da37ca2a1", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9944155284584804, "res": {"Yes": 0.9944155284584804, "No": 0.005584463008301774}, "ground_truth": 1}, {"key": "7060e3feb685bda7b5102e1cc8e9b35da37ca2a1", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.99916368027045, "res": {"Yes": 0.99916368027045, "No": 0.0008362942777010502}, "ground_truth": 0}, {"key": "7060e3feb685bda7b5102e1cc8e9b35da37ca2a1", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9800541771886484, "res": {"Yes": 0.9800541771886484, "No": 0.019945817634604773}, "ground_truth": 0}, {"key": "04fbd3088e8610dd4d5bea709e7cc5ee3e53710d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9830377408812611, "res": {"Yes": 0.9830377408812611, "No": 0.016962232907544096}, "ground_truth": 0}, {"key": "04fbd3088e8610dd4d5bea709e7cc5ee3e53710d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9735762174903438, "res": {"Yes": 0.9735762174903438, "No": 0.026423664176707783}, "ground_truth": 0}, {"key": "04fbd3088e8610dd4d5bea709e7cc5ee3e53710d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9904440500612872, "res": {"Yes": 0.9904440500612872, "No": 0.00955585713426745}, "ground_truth": 1}, {"key": "04fbd3088e8610dd4d5bea709e7cc5ee3e53710d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9939909773295599, "res": {"Yes": 0.9939909773295599, "No": 0.006009015825505591}, "ground_truth": 0}, {"key": "04fbd3088e8610dd4d5bea709e7cc5ee3e53710d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9982756159615217, "res": {"Yes": 0.9982756159615217, "No": 0.0017243105319844681}, "ground_truth": 0}, {"key": "b198f0356c0f0cb09aba4d7d5541dd7c5ce91678", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9927442668562926, "res": {"Yes": 0.9927442668562926, "No": 0.007255699362352779}, "ground_truth": 0}, {"key": "b198f0356c0f0cb09aba4d7d5541dd7c5ce91678", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9923986224152771, "res": {"Yes": 0.9923986224152771, "No": 0.007601318195147978}, "ground_truth": 0}, {"key": "b198f0356c0f0cb09aba4d7d5541dd7c5ce91678", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998737042159841, "res": {"Yes": 0.9998737042159841, "No": 0.0001261889715934789}, "ground_truth": 1}, {"key": "b198f0356c0f0cb09aba4d7d5541dd7c5ce91678", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9991847471992908, "res": {"Yes": 0.9991847471992908, "No": 0.000815173976016051}, "ground_truth": 0}, {"key": "b198f0356c0f0cb09aba4d7d5541dd7c5ce91678", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996643557411417, "res": {"Yes": 0.9996643557411417, "No": 0.00033553039819791746}, "ground_truth": 0}, {"key": "0f60442453e038c6bbe3aa525bedf2a97f459e04", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9992961565141147, "res": {"Yes": 0.9992961565141147, "No": 0.0007037460438877953}, "ground_truth": 0}, {"key": "0f60442453e038c6bbe3aa525bedf2a97f459e04", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.99917546791355, "res": {"Yes": 0.99917546791355, "No": 0.0008245138648794048}, "ground_truth": 0}, {"key": "0f60442453e038c6bbe3aa525bedf2a97f459e04", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9989044353696116, "res": {"Yes": 0.9989044353696116, "No": 0.001095503723532777}, "ground_truth": 1}, {"key": "0f60442453e038c6bbe3aa525bedf2a97f459e04", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9985014982766018, "res": {"Yes": 0.9985014982766018, "No": 0.0014984110984417598}, "ground_truth": 0}, {"key": "0f60442453e038c6bbe3aa525bedf2a97f459e04", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998373560279651, "res": {"Yes": 0.9998373560279651, "No": 0.00016256681036206917}, "ground_truth": 0}, {"key": "79b423b90af6b3381ebfa34a7382ca472bdb5b35", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.909967626844679, "res": {"Yes": 0.909967626844679, "No": 0.09003234358898617}, "ground_truth": 0}, {"key": "79b423b90af6b3381ebfa34a7382ca472bdb5b35", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9971087359777518, "res": {"Yes": 0.9971087359777518, "No": 0.0028912191922144874}, "ground_truth": 0}, {"key": "79b423b90af6b3381ebfa34a7382ca472bdb5b35", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9980080433841444, "res": {"Yes": 0.9980080433841444, "No": 0.001991993665090976}, "ground_truth": 1}, {"key": "79b423b90af6b3381ebfa34a7382ca472bdb5b35", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994065133429176, "res": {"Yes": 0.9994065133429176, "No": 0.000593418900154806}, "ground_truth": 0}, {"key": "79b423b90af6b3381ebfa34a7382ca472bdb5b35", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9994067516414351, "res": {"Yes": 0.9994067516414351, "No": 0.000593136144093114}, "ground_truth": 0}, {"key": "3e5a3a2dddee7ebebe57c12d0ace97a24cbabd72", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9887943933010848, "res": {"Yes": 0.9887943933010848, "No": 0.011205477026380058}, "ground_truth": 0}, {"key": "3e5a3a2dddee7ebebe57c12d0ace97a24cbabd72", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9840816074188483, "res": {"Yes": 0.9840816074188483, "No": 0.01591839373020796}, "ground_truth": 0}, {"key": "3e5a3a2dddee7ebebe57c12d0ace97a24cbabd72", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9966987066916168, "res": {"Yes": 0.9966987066916168, "No": 0.003301260215248422}, "ground_truth": 1}, {"key": "3e5a3a2dddee7ebebe57c12d0ace97a24cbabd72", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994721284232504, "res": {"Yes": 0.9994721284232504, "No": 0.000527802340214996}, "ground_truth": 0}, {"key": "3e5a3a2dddee7ebebe57c12d0ace97a24cbabd72", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.988368932968671, "res": {"Yes": 0.988368932968671, "No": 0.011630992226135714}, "ground_truth": 0}, {"key": "c623e4914c04811d354fa137e3bf7a00980ef7d7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9998038694761064, "res": {"Yes": 0.9998038694761064, "No": 0.00019608800390275886}, "ground_truth": 0}, {"key": "c623e4914c04811d354fa137e3bf7a00980ef7d7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9991999819851927, "res": {"Yes": 0.9991999819851927, "No": 0.0007999958642477676}, "ground_truth": 0}, {"key": "c623e4914c04811d354fa137e3bf7a00980ef7d7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.998855437112333, "res": {"Yes": 0.998855437112333, "No": 0.0011445582777889552}, "ground_truth": 1}, {"key": "c623e4914c04811d354fa137e3bf7a00980ef7d7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998020817783959, "res": {"Yes": 0.9998020817783959, "No": 0.000197873622747916}, "ground_truth": 0}, {"key": "c623e4914c04811d354fa137e3bf7a00980ef7d7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997179661490261, "res": {"Yes": 0.9997179661490261, "No": 0.00028201508374476583}, "ground_truth": 0}, {"key": "d5ef1fb6f2df20e46fb032b9e318bc8555283e78", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.07181032579264925, "res": {"No": 0.9281896804922576, "Yes": 0.07181032579264925}, "ground_truth": 0}, {"key": "d5ef1fb6f2df20e46fb032b9e318bc8555283e78", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995360723577238, "res": {"Yes": 0.9995360723577238, "No": 0.00046384899523936547}, "ground_truth": 0}, {"key": "d5ef1fb6f2df20e46fb032b9e318bc8555283e78", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9915428942314144, "res": {"Yes": 0.9915428942314144, "No": 0.008457006581088174}, "ground_truth": 1}, {"key": "d5ef1fb6f2df20e46fb032b9e318bc8555283e78", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996954469456997, "res": {"Yes": 0.9996954469456997, "No": 0.0003044477738422443}, "ground_truth": 0}, {"key": "d5ef1fb6f2df20e46fb032b9e318bc8555283e78", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9953995157954356, "res": {"Yes": 0.9953995157954356, "No": 0.004600427707123436}, "ground_truth": 0}, {"key": "e1a36c50adadc5db9ffe6fd2c5e275d522902193", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9992358122802784, "res": {"Yes": 0.9992358122802784, "No": 0.0007641052626122691}, "ground_truth": 0}, {"key": "e1a36c50adadc5db9ffe6fd2c5e275d522902193", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.996661046245477, "res": {"Yes": 0.996661046245477, "No": 0.0033389279385484657}, "ground_truth": 0}, {"key": "e1a36c50adadc5db9ffe6fd2c5e275d522902193", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9937618231250925, "res": {"Yes": 0.9937618231250925, "No": 0.00623816132173467}, "ground_truth": 1}, {"key": "e1a36c50adadc5db9ffe6fd2c5e275d522902193", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9958383961733067, "res": {"Yes": 0.9958383961733067, "No": 0.00416163605322013}, "ground_truth": 0}, {"key": "e1a36c50adadc5db9ffe6fd2c5e275d522902193", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997769387718559, "res": {"Yes": 0.9997769387718559, "No": 0.00022301868517319827}, "ground_truth": 0}, {"key": "bad69c9a3c7d84a4717705a3063dd7367243772b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9892924463800108, "res": {"Yes": 0.9892924463800108, "No": 0.010707449123164238}, "ground_truth": 0}, {"key": "bad69c9a3c7d84a4717705a3063dd7367243772b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9982821498964662, "res": {"Yes": 0.9982821498964662, "No": 0.001717788965974144}, "ground_truth": 0}, {"key": "bad69c9a3c7d84a4717705a3063dd7367243772b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9920988723066435, "res": {"Yes": 0.9920988723066435, "No": 0.00790109754035549}, "ground_truth": 1}, {"key": "bad69c9a3c7d84a4717705a3063dd7367243772b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9872825736815942, "res": {"Yes": 0.9872825736815942, "No": 0.012717392935218246}, "ground_truth": 0}, {"key": "bad69c9a3c7d84a4717705a3063dd7367243772b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9992127193181871, "res": {"Yes": 0.9992127193181871, "No": 0.0007872151240097713}, "ground_truth": 0}, {"key": "4b40ad1f0e59b139e9a1466859ee08a048ca8d2c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9987966819684584, "res": {"Yes": 0.9987966819684584, "No": 0.001203256677792585}, "ground_truth": 0}, {"key": "4b40ad1f0e59b139e9a1466859ee08a048ca8d2c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9906460499315428, "res": {"Yes": 0.9906460499315428, "No": 0.009353833684350731}, "ground_truth": 0}, {"key": "4b40ad1f0e59b139e9a1466859ee08a048ca8d2c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9905711737890424, "res": {"Yes": 0.9905711737890424, "No": 0.009428789099187944}, "ground_truth": 1}, {"key": "4b40ad1f0e59b139e9a1466859ee08a048ca8d2c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994604563360588, "res": {"Yes": 0.9994604563360588, "No": 0.0005395173577562789}, "ground_truth": 0}, {"key": "4b40ad1f0e59b139e9a1466859ee08a048ca8d2c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9888580376171815, "res": {"Yes": 0.9888580376171815, "No": 0.011141876629590127}, "ground_truth": 0}, {"key": "c17a30b4c0f8f58e3afa6279a39470f65a179f6b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9991981990143356, "res": {"Yes": 0.9991981990143356, "No": 0.0008017754314250743}, "ground_truth": 0}, {"key": "c17a30b4c0f8f58e3afa6279a39470f65a179f6b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999737011318213, "res": {"Yes": 0.9999737011318213, "No": 2.6193690416438194e-05}, "ground_truth": 0}, {"key": "c17a30b4c0f8f58e3afa6279a39470f65a179f6b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998934823934031, "res": {"Yes": 0.9998934823934031, "No": 0.00010641595918119369}, "ground_truth": 1}, {"key": "c17a30b4c0f8f58e3afa6279a39470f65a179f6b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999838786246334, "res": {"Yes": 0.999838786246334, "No": 0.00016116610769957717}, "ground_truth": 0}, {"key": "c17a30b4c0f8f58e3afa6279a39470f65a179f6b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9995210634667341, "res": {"Yes": 0.9995210634667341, "No": 0.0004788982044232298}, "ground_truth": 0}, {"key": "6f579aad2d1f601b17e01057cd4b5e4942627230", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9644571874089816, "res": {"Yes": 0.9644571874089816, "No": 0.035542696033579374}, "ground_truth": 0}, {"key": "6f579aad2d1f601b17e01057cd4b5e4942627230", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9976823446202387, "res": {"Yes": 0.9976823446202387, "No": 0.0023175990996337427}, "ground_truth": 0}, {"key": "6f579aad2d1f601b17e01057cd4b5e4942627230", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9989523755420657, "res": {"Yes": 0.9989523755420657, "No": 0.001047598685609284}, "ground_truth": 1}, {"key": "6f579aad2d1f601b17e01057cd4b5e4942627230", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9975466227613541, "res": {"Yes": 0.9975466227613541, "No": 0.002453391318329778}, "ground_truth": 0}, {"key": "6f579aad2d1f601b17e01057cd4b5e4942627230", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9942876409239432, "res": {"Yes": 0.9942876409239432, "No": 0.0057123658878511585}, "ground_truth": 0}, {"key": "7551f8e13485bb1738c16eccad343a14a47b2903", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9970125084607033, "res": {"Yes": 0.9970125084607033, "No": 0.0029874903256676315}, "ground_truth": 0}, {"key": "7551f8e13485bb1738c16eccad343a14a47b2903", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994905843562311, "res": {"Yes": 0.9994905843562311, "No": 0.0005093432536673559}, "ground_truth": 0}, {"key": "7551f8e13485bb1738c16eccad343a14a47b2903", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999303130782463, "res": {"Yes": 0.9999303130782463, "No": 6.963202271193242e-05}, "ground_truth": 1}, {"key": "7551f8e13485bb1738c16eccad343a14a47b2903", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9971890951119483, "res": {"Yes": 0.9971890951119483, "No": 0.0028108748769041327}, "ground_truth": 0}, {"key": "7551f8e13485bb1738c16eccad343a14a47b2903", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9963627238728152, "res": {"Yes": 0.9963627238728152, "No": 0.003637259738272616}, "ground_truth": 0}, {"key": "257d8d38863175ec9401d0532963fb71a47f0734", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9978798241547007, "res": {"Yes": 0.9978798241547007, "No": 0.002120201813037387}, "ground_truth": 0}, {"key": "257d8d38863175ec9401d0532963fb71a47f0734", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9986822953362086, "res": {"Yes": 0.9986822953362086, "No": 0.0013177132909335276}, "ground_truth": 0}, {"key": "257d8d38863175ec9401d0532963fb71a47f0734", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9992460449670382, "res": {"Yes": 0.9992460449670382, "No": 0.0007539297982953567}, "ground_truth": 1}, {"key": "257d8d38863175ec9401d0532963fb71a47f0734", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9981487573673804, "res": {"Yes": 0.9981487573673804, "No": 0.0018512327693528572}, "ground_truth": 0}, {"key": "257d8d38863175ec9401d0532963fb71a47f0734", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996821039696543, "res": {"Yes": 0.9996821039696543, "No": 0.0003178166226019444}, "ground_truth": 0}, {"key": "aa596e10dfe78dd875a7e15bfd53138bc8bde471", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9984419563122585, "res": {"Yes": 0.9984419563122585, "No": 0.001557984936602138}, "ground_truth": 0}, {"key": "aa596e10dfe78dd875a7e15bfd53138bc8bde471", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9953501521713041, "res": {"Yes": 0.9953501521713041, "No": 0.0046498993722191816}, "ground_truth": 0}, {"key": "aa596e10dfe78dd875a7e15bfd53138bc8bde471", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.994989121250398, "res": {"Yes": 0.994989121250398, "No": 0.005010868628357888}, "ground_truth": 1}, {"key": "aa596e10dfe78dd875a7e15bfd53138bc8bde471", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998113778613928, "res": {"Yes": 0.9998113778613928, "No": 0.0001884989728806636}, "ground_truth": 0}, {"key": "aa596e10dfe78dd875a7e15bfd53138bc8bde471", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9974555226841975, "res": {"Yes": 0.9974555226841975, "No": 0.00254446593429763}, "ground_truth": 0}, {"key": "a0dff972172125f32b79977a47b0d250be8f71e4", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9926905913198171, "res": {"Yes": 0.9926905913198171, "No": 0.007309338513092493}, "ground_truth": 0}, {"key": "a0dff972172125f32b79977a47b0d250be8f71e4", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9991370306124431, "res": {"Yes": 0.9991370306124431, "No": 0.0008629230815462122}, "ground_truth": 0}, {"key": "a0dff972172125f32b79977a47b0d250be8f71e4", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9993706806045691, "res": {"Yes": 0.9993706806045691, "No": 0.000629218049980968}, "ground_truth": 1}, {"key": "a0dff972172125f32b79977a47b0d250be8f71e4", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999745845032847, "res": {"Yes": 0.999745845032847, "No": 0.00025406637657163824}, "ground_truth": 0}, {"key": "a0dff972172125f32b79977a47b0d250be8f71e4", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.999613254165458, "res": {"Yes": 0.999613254165458, "No": 0.00038665837760121176}, "ground_truth": 0}, {"key": "faac3a6337907b1552b7068c8701cae665c4885c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9966132033779557, "res": {"Yes": 0.9966132033779557, "No": 0.0033868194951412354}, "ground_truth": 0}, {"key": "faac3a6337907b1552b7068c8701cae665c4885c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9972184905856314, "res": {"Yes": 0.9972184905856314, "No": 0.0027815178807881606}, "ground_truth": 0}, {"key": "faac3a6337907b1552b7068c8701cae665c4885c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9983518876312787, "res": {"Yes": 0.9983518876312787, "No": 0.0016480435860478932}, "ground_truth": 1}, {"key": "faac3a6337907b1552b7068c8701cae665c4885c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9954647270514029, "res": {"Yes": 0.9954647270514029, "No": 0.004535209501046152}, "ground_truth": 0}, {"key": "faac3a6337907b1552b7068c8701cae665c4885c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.999014835093899, "res": {"Yes": 0.999014835093899, "No": 0.000985087094390549}, "ground_truth": 0}, {"key": "c741b211364d761605050776064a506d24378d10", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9875771077082974, "res": {"Yes": 0.9875771077082974, "No": 0.01242275260774299}, "ground_truth": 0}, {"key": "c741b211364d761605050776064a506d24378d10", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.966565415256209, "res": {"Yes": 0.966565415256209, "No": 0.033434566132741636}, "ground_truth": 0}, {"key": "c741b211364d761605050776064a506d24378d10", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9984614425948504, "res": {"Yes": 0.9984614425948504, "No": 0.0015385014818872637}, "ground_truth": 1}, {"key": "c741b211364d761605050776064a506d24378d10", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996983069884919, "res": {"Yes": 0.9996983069884919, "No": 0.0003016051036672524}, "ground_truth": 0}, {"key": "c741b211364d761605050776064a506d24378d10", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9817652982621472, "res": {"Yes": 0.9817652982621472, "No": 0.018234680064653116}, "ground_truth": 0}, {"key": "a062b8ba08369be2364a863020a28de63a3cbf86", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9980685896780097, "res": {"Yes": 0.9980685896780097, "No": 0.0019313983591440122}, "ground_truth": 0}, {"key": "a062b8ba08369be2364a863020a28de63a3cbf86", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999192279722359, "res": {"Yes": 0.9999192279722359, "No": 8.065941671085583e-05}, "ground_truth": 0}, {"key": "a062b8ba08369be2364a863020a28de63a3cbf86", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997788456082386, "res": {"Yes": 0.9997788456082386, "No": 0.00022106517288253148}, "ground_truth": 1}, {"key": "a062b8ba08369be2364a863020a28de63a3cbf86", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999909692497968, "res": {"Yes": 0.999909692497968, "No": 9.027969359154873e-05}, "ground_truth": 0}, {"key": "a062b8ba08369be2364a863020a28de63a3cbf86", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998044653793863, "res": {"Yes": 0.9998044653793863, "No": 0.00019546074334626714}, "ground_truth": 0}, {"key": "f11102404d23702fe398b1d690d13418703e617a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9997066487358987, "res": {"Yes": 0.9997066487358987, "No": 0.00029323154648444144}, "ground_truth": 0}, {"key": "f11102404d23702fe398b1d690d13418703e617a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9937481649564547, "res": {"Yes": 0.9937481649564547, "No": 0.006251820985438939}, "ground_truth": 0}, {"key": "f11102404d23702fe398b1d690d13418703e617a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999700809486411, "res": {"Yes": 0.999700809486411, "No": 0.0002990909052036925}, "ground_truth": 1}, {"key": "f11102404d23702fe398b1d690d13418703e617a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9989113344268095, "res": {"Yes": 0.9989113344268095, "No": 0.0010885711655060867}, "ground_truth": 0}, {"key": "f11102404d23702fe398b1d690d13418703e617a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997799181915551, "res": {"Yes": 0.9997799181915551, "No": 0.0002200187033496103}, "ground_truth": 0}, {"key": "62481fda7e7b9714b994617909349550707fe8a7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9751528851450285, "res": {"Yes": 0.9751528851450285, "No": 0.024847006412349257}, "ground_truth": 0}, {"key": "62481fda7e7b9714b994617909349550707fe8a7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9987662433043617, "res": {"Yes": 0.9987662433043617, "No": 0.0012337568685725784}, "ground_truth": 0}, {"key": "62481fda7e7b9714b994617909349550707fe8a7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9956541221762143, "res": {"Yes": 0.9956541221762143, "No": 0.0043458713479090135}, "ground_truth": 1}, {"key": "62481fda7e7b9714b994617909349550707fe8a7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9740703665804649, "res": {"Yes": 0.9740703665804649, "No": 0.025929589775905454}, "ground_truth": 0}, {"key": "62481fda7e7b9714b994617909349550707fe8a7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9926987016350788, "res": {"Yes": 0.9926987016350788, "No": 0.007301233299211077}, "ground_truth": 0}, {"key": "13943ef2d9c3b737c9a900f0cdb32a9c39121e6f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9992136721878748, "res": {"Yes": 0.9992136721878748, "No": 0.0007862317198603993}, "ground_truth": 0}, {"key": "13943ef2d9c3b737c9a900f0cdb32a9c39121e6f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9988695916933656, "res": {"Yes": 0.9988695916933656, "No": 0.0011303383119050888}, "ground_truth": 0}, {"key": "13943ef2d9c3b737c9a900f0cdb32a9c39121e6f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995542949160343, "res": {"Yes": 0.9995542949160343, "No": 0.00044567231308983486}, "ground_truth": 1}, {"key": "13943ef2d9c3b737c9a900f0cdb32a9c39121e6f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997664512367594, "res": {"Yes": 0.9997664512367594, "No": 0.00023352229584411394}, "ground_truth": 0}, {"key": "13943ef2d9c3b737c9a900f0cdb32a9c39121e6f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9993988925375228, "res": {"Yes": 0.9993988925375228, "No": 0.0006010534453571062}, "ground_truth": 0}, {"key": "0c25e0cd62589fde1b378606febbcc5a2bafc770", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9649410119736093, "res": {"Yes": 0.9649410119736093, "No": 0.03505892015071609}, "ground_truth": 0}, {"key": "0c25e0cd62589fde1b378606febbcc5a2bafc770", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9660646128482356, "res": {"Yes": 0.9660646128482356, "No": 0.03393533761510383}, "ground_truth": 0}, {"key": "0c25e0cd62589fde1b378606febbcc5a2bafc770", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9987012020690486, "res": {"Yes": 0.9987012020690486, "No": 0.0012987094998281406}, "ground_truth": 1}, {"key": "0c25e0cd62589fde1b378606febbcc5a2bafc770", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.983289394376148, "res": {"Yes": 0.983289394376148, "No": 0.016710545362722776}, "ground_truth": 0}, {"key": "0c25e0cd62589fde1b378606febbcc5a2bafc770", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.984791170432975, "res": {"Yes": 0.984791170432975, "No": 0.015208805646828598}, "ground_truth": 0}, {"key": "a86bc16fad35d84e988af4cc64bf14877337d47e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9948571923506608, "res": {"Yes": 0.9948571923506608, "No": 0.005142772232833558}, "ground_truth": 0}, {"key": "a86bc16fad35d84e988af4cc64bf14877337d47e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998924096782539, "res": {"Yes": 0.9998924096782539, "No": 0.00010751426731431946}, "ground_truth": 0}, {"key": "a86bc16fad35d84e988af4cc64bf14877337d47e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996320776000522, "res": {"Yes": 0.9996320776000522, "No": 0.0003678964666514869}, "ground_truth": 1}, {"key": "a86bc16fad35d84e988af4cc64bf14877337d47e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9986400934121323, "res": {"Yes": 0.9986400934121323, "No": 0.0013598471163522895}, "ground_truth": 0}, {"key": "a86bc16fad35d84e988af4cc64bf14877337d47e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998800139029076, "res": {"Yes": 0.9998800139029076, "No": 0.0001199558259544728}, "ground_truth": 0}, {"key": "0f937bcc8cd07bab2d5aa9a4492d3c2430063b67", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9961893873814799, "res": {"Yes": 0.9961893873814799, "No": 0.003810570742048452}, "ground_truth": 0}, {"key": "0f937bcc8cd07bab2d5aa9a4492d3c2430063b67", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9981413986426074, "res": {"Yes": 0.9981413986426074, "No": 0.001858606993629506}, "ground_truth": 0}, {"key": "0f937bcc8cd07bab2d5aa9a4492d3c2430063b67", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9984035765417527, "res": {"Yes": 0.9984035765417527, "No": 0.0015963913551816768}, "ground_truth": 1}, {"key": "0f937bcc8cd07bab2d5aa9a4492d3c2430063b67", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9983112501454401, "res": {"Yes": 0.9983112501454401, "No": 0.001688666356071524}, "ground_truth": 0}, {"key": "0f937bcc8cd07bab2d5aa9a4492d3c2430063b67", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996978302924648, "res": {"Yes": 0.9996978302924648, "No": 0.0003020723499230258}, "ground_truth": 0}, {"key": "a6c0617d92807f05f02d767261c45b616382e499", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9967055719759526, "res": {"Yes": 0.9967055719759526, "No": 0.003294440263384048}, "ground_truth": 0}, {"key": "a6c0617d92807f05f02d767261c45b616382e499", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9912727033871578, "res": {"Yes": 0.9912727033871578, "No": 0.008727275871820242}, "ground_truth": 0}, {"key": "a6c0617d92807f05f02d767261c45b616382e499", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9944609111382903, "res": {"Yes": 0.9944609111382903, "No": 0.005539034895428669}, "ground_truth": 1}, {"key": "a6c0617d92807f05f02d767261c45b616382e499", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9977940127741416, "res": {"Yes": 0.9977940127741416, "No": 0.002205944317183495}, "ground_truth": 0}, {"key": "a6c0617d92807f05f02d767261c45b616382e499", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9969709105768164, "res": {"Yes": 0.9969709105768164, "No": 0.003029114337630219}, "ground_truth": 0}, {"key": "4f45a0576d353fa9b5b9d1c8e3cc0e6a1cd4897b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9714841541526799, "res": {"Yes": 0.9714841541526799, "No": 0.028515596379600626}, "ground_truth": 0}, {"key": "4f45a0576d353fa9b5b9d1c8e3cc0e6a1cd4897b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9614253462244232, "res": {"Yes": 0.9614253462244232, "No": 0.038574405206357756}, "ground_truth": 0}, {"key": "4f45a0576d353fa9b5b9d1c8e3cc0e6a1cd4897b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9686922393392763, "res": {"Yes": 0.9686922393392763, "No": 0.0313076907736492}, "ground_truth": 1}, {"key": "4f45a0576d353fa9b5b9d1c8e3cc0e6a1cd4897b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9954836256309031, "res": {"Yes": 0.9954836256309031, "No": 0.004516373184667778}, "ground_truth": 0}, {"key": "4f45a0576d353fa9b5b9d1c8e3cc0e6a1cd4897b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9836232905861455, "res": {"Yes": 0.9836232905861455, "No": 0.016376645728615337}, "ground_truth": 0}, {"key": "f6d43a207c2b3c48e787087b345c23d2fb1dc92f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9971574453309408, "res": {"Yes": 0.9971574453309408, "No": 0.0028425728608590583}, "ground_truth": 0}, {"key": "f6d43a207c2b3c48e787087b345c23d2fb1dc92f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9966415057012307, "res": {"Yes": 0.9966415057012307, "No": 0.003358432270677769}, "ground_truth": 0}, {"key": "f6d43a207c2b3c48e787087b345c23d2fb1dc92f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999165109515167, "res": {"Yes": 0.999165109515167, "No": 0.0008347908863388217}, "ground_truth": 1}, {"key": "f6d43a207c2b3c48e787087b345c23d2fb1dc92f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9743851327440484, "res": {"Yes": 0.9743851327440484, "No": 0.025614773605153223}, "ground_truth": 0}, {"key": "f6d43a207c2b3c48e787087b345c23d2fb1dc92f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9988166621970067, "res": {"Yes": 0.9988166621970067, "No": 0.0011832617014276286}, "ground_truth": 0}, {"key": "e0127487aee3a00fe164d72eb697221079518471", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9330218493755968, "res": {"Yes": 0.9330218493755968, "No": 0.06697804499057686}, "ground_truth": 0}, {"key": "e0127487aee3a00fe164d72eb697221079518471", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9856808838258673, "res": {"Yes": 0.9856808838258673, "No": 0.014318984997852204}, "ground_truth": 0}, {"key": "e0127487aee3a00fe164d72eb697221079518471", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9663517370646418, "res": {"Yes": 0.9663517370646418, "No": 0.03364825101574958}, "ground_truth": 1}, {"key": "e0127487aee3a00fe164d72eb697221079518471", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9880809059239131, "res": {"Yes": 0.9880809059239131, "No": 0.011919018039200431}, "ground_truth": 0}, {"key": "e0127487aee3a00fe164d72eb697221079518471", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9963318495612706, "res": {"Yes": 0.9963318495612706, "No": 0.003668177258871431}, "ground_truth": 0}, {"key": "2f3af8decce1c0a6cd22b21df96acd40b14bb62b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9972590433998094, "res": {"Yes": 0.9972590433998094, "No": 0.0027409511055502393}, "ground_truth": 0}, {"key": "2f3af8decce1c0a6cd22b21df96acd40b14bb62b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.8896655619199638, "res": {"Yes": 0.8896655619199638, "No": 0.11033439168588392}, "ground_truth": 0}, {"key": "2f3af8decce1c0a6cd22b21df96acd40b14bb62b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9989604654907416, "res": {"Yes": 0.9989604654907416, "No": 0.0010394786391869973}, "ground_truth": 1}, {"key": "2f3af8decce1c0a6cd22b21df96acd40b14bb62b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9993157967738484, "res": {"Yes": 0.9993157967738484, "No": 0.0006841481980459971}, "ground_truth": 0}, {"key": "2f3af8decce1c0a6cd22b21df96acd40b14bb62b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999440206399028, "res": {"Yes": 0.9999440206399028, "No": 5.595017474366764e-05}, "ground_truth": 0}, {"key": "13a366f303e6ea2f07c353a9f5fb49a44840f808", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.999965953125608, "res": {"Yes": 0.999965953125608, "No": 3.393675203490969e-05}, "ground_truth": 0}, {"key": "13a366f303e6ea2f07c353a9f5fb49a44840f808", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996096794548915, "res": {"Yes": 0.9996096794548915, "No": 0.00039021320060159305}, "ground_truth": 0}, {"key": "13a366f303e6ea2f07c353a9f5fb49a44840f808", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999475965531086, "res": {"Yes": 0.9999475965531086, "No": 5.231946899735269e-05}, "ground_truth": 1}, {"key": "13a366f303e6ea2f07c353a9f5fb49a44840f808", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9992408076324679, "res": {"Yes": 0.9992408076324679, "No": 0.000759192871653033}, "ground_truth": 0}, {"key": "13a366f303e6ea2f07c353a9f5fb49a44840f808", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999390143796986, "res": {"Yes": 0.9999390143796986, "No": 6.087244187693346e-05}, "ground_truth": 0}, {"key": "ee36a8b5db09b9e9a6d462116539ffc095d5c017", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9953073918519363, "res": {"Yes": 0.9953073918519363, "No": 0.004692625711429557}, "ground_truth": 0}, {"key": "ee36a8b5db09b9e9a6d462116539ffc095d5c017", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9987839572799075, "res": {"Yes": 0.9987839572799075, "No": 0.0012160553237112832}, "ground_truth": 0}, {"key": "ee36a8b5db09b9e9a6d462116539ffc095d5c017", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9983754147695727, "res": {"Yes": 0.9983754147695727, "No": 0.0016245311953376597}, "ground_truth": 1}, {"key": "ee36a8b5db09b9e9a6d462116539ffc095d5c017", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994485498045803, "res": {"Yes": 0.9994485498045803, "No": 0.000551345171165294}, "ground_truth": 0}, {"key": "ee36a8b5db09b9e9a6d462116539ffc095d5c017", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9994336650282237, "res": {"Yes": 0.9994336650282237, "No": 0.0005662814170823509}, "ground_truth": 0}, {"key": "d3b3bcf90226a694361cb27f1a4d481c79b7368a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9839577518139975, "res": {"Yes": 0.9839577518139975, "No": 0.016042267840067972}, "ground_truth": 0}, {"key": "d3b3bcf90226a694361cb27f1a4d481c79b7368a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9879970016560077, "res": {"Yes": 0.9879970016560077, "No": 0.012002963516203911}, "ground_truth": 0}, {"key": "d3b3bcf90226a694361cb27f1a4d481c79b7368a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998406931906776, "res": {"Yes": 0.9998406931906776, "No": 0.0001592401687677623}, "ground_truth": 1}, {"key": "d3b3bcf90226a694361cb27f1a4d481c79b7368a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.998967011200637, "res": {"Yes": 0.998967011200637, "No": 0.0010329159056996623}, "ground_truth": 0}, {"key": "d3b3bcf90226a694361cb27f1a4d481c79b7368a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.999908619765428, "res": {"Yes": 0.999908619765428, "No": 9.133541029926259e-05}, "ground_truth": 0}, {"key": "e5e837f2cc75fc44ebf826822aa4c9dc59344b8b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9933228916180056, "res": {"Yes": 0.9933228916180056, "No": 0.006677070069516124}, "ground_truth": 0}, {"key": "e5e837f2cc75fc44ebf826822aa4c9dc59344b8b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9956807134659404, "res": {"Yes": 0.9956807134659404, "No": 0.00431922928021605}, "ground_truth": 0}, {"key": "e5e837f2cc75fc44ebf826822aa4c9dc59344b8b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9992602123179406, "res": {"Yes": 0.9992602123179406, "No": 0.000739699378186585}, "ground_truth": 1}, {"key": "e5e837f2cc75fc44ebf826822aa4c9dc59344b8b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9737817903376766, "res": {"Yes": 0.9737817903376766, "No": 0.02621807029596677}, "ground_truth": 0}, {"key": "e5e837f2cc75fc44ebf826822aa4c9dc59344b8b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9832574635548187, "res": {"Yes": 0.9832574635548187, "No": 0.016742543232205625}, "ground_truth": 0}, {"key": "1317a869cc1d6d534a93eaad67fb6550aa155ae6", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.972198304845152, "res": {"Yes": 0.972198304845152, "No": 0.027801602946549306}, "ground_truth": 0}, {"key": "1317a869cc1d6d534a93eaad67fb6550aa155ae6", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9946262296401693, "res": {"Yes": 0.9946262296401693, "No": 0.005373814432475167}, "ground_truth": 0}, {"key": "1317a869cc1d6d534a93eaad67fb6550aa155ae6", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9857554770634187, "res": {"Yes": 0.9857554770634187, "No": 0.014244441764093037}, "ground_truth": 1}, {"key": "1317a869cc1d6d534a93eaad67fb6550aa155ae6", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9802055739297055, "res": {"Yes": 0.9802055739297055, "No": 0.019794404148272014}, "ground_truth": 0}, {"key": "1317a869cc1d6d534a93eaad67fb6550aa155ae6", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9916504983384029, "res": {"Yes": 0.9916504983384029, "No": 0.008349441738659613}, "ground_truth": 0}, {"key": "f856ca4cd15992e0e88ae9113bf31b2a5ab55072", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9971419143258178, "res": {"Yes": 0.9971419143258178, "No": 0.0028580891694890242}, "ground_truth": 0}, {"key": "f856ca4cd15992e0e88ae9113bf31b2a5ab55072", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9970396443200176, "res": {"Yes": 0.9970396443200176, "No": 0.0029603783436903193}, "ground_truth": 0}, {"key": "f856ca4cd15992e0e88ae9113bf31b2a5ab55072", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996631641019533, "res": {"Yes": 0.9996631641019533, "No": 0.0003367377905667445}, "ground_truth": 1}, {"key": "f856ca4cd15992e0e88ae9113bf31b2a5ab55072", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999427094741649, "res": {"Yes": 0.9999427094741649, "No": 5.7169329553526735e-05}, "ground_truth": 0}, {"key": "f856ca4cd15992e0e88ae9113bf31b2a5ab55072", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9982536382673185, "res": {"Yes": 0.9982536382673185, "No": 0.0017463349525765495}, "ground_truth": 0}, {"key": "438e8eaa1c64cb59288f9954355d0100181d19a2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9975492352396254, "res": {"Yes": 0.9975492352396254, "No": 0.0024507595060425267}, "ground_truth": 0}, {"key": "438e8eaa1c64cb59288f9954355d0100181d19a2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9935146502493447, "res": {"Yes": 0.9935146502493447, "No": 0.006485368755454883}, "ground_truth": 0}, {"key": "438e8eaa1c64cb59288f9954355d0100181d19a2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9953581840822283, "res": {"Yes": 0.9953581840822283, "No": 0.004641820224066239}, "ground_truth": 1}, {"key": "438e8eaa1c64cb59288f9954355d0100181d19a2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9970171286273725, "res": {"Yes": 0.9970171286273725, "No": 0.0029828471951138017}, "ground_truth": 0}, {"key": "438e8eaa1c64cb59288f9954355d0100181d19a2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999436630499856, "res": {"Yes": 0.9999436630499856, "No": 5.6295947830522425e-05}, "ground_truth": 0}, {"key": "470618f9fd48a5d134a44b5de166b625f596a0b3", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9966965719646129, "res": {"Yes": 0.9966965719646129, "No": 0.003303433159290246}, "ground_truth": 0}, {"key": "470618f9fd48a5d134a44b5de166b625f596a0b3", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.997381644388458, "res": {"Yes": 0.997381644388458, "No": 0.0026183613390263114}, "ground_truth": 0}, {"key": "470618f9fd48a5d134a44b5de166b625f596a0b3", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996133733593495, "res": {"Yes": 0.9996133733593495, "No": 0.00038661569287256475}, "ground_truth": 1}, {"key": "470618f9fd48a5d134a44b5de166b625f596a0b3", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999622306544138, "res": {"Yes": 0.999622306544138, "No": 0.00037763867716714497}, "ground_truth": 0}, {"key": "470618f9fd48a5d134a44b5de166b625f596a0b3", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9990897853968517, "res": {"Yes": 0.9990897853968517, "No": 0.0009101765193107275}, "ground_truth": 0}, {"key": "751e8f720d24c57e79cf63d9a3f72cd80c5d9619", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.994178740574203, "res": {"Yes": 0.994178740574203, "No": 0.005821193322651437}, "ground_truth": 0}, {"key": "751e8f720d24c57e79cf63d9a3f72cd80c5d9619", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9980028197239151, "res": {"Yes": 0.9980028197239151, "No": 0.0019971036960154333}, "ground_truth": 0}, {"key": "751e8f720d24c57e79cf63d9a3f72cd80c5d9619", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9986255923644843, "res": {"Yes": 0.9986255923644843, "No": 0.001374362213775034}, "ground_truth": 1}, {"key": "751e8f720d24c57e79cf63d9a3f72cd80c5d9619", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9968703291620299, "res": {"Yes": 0.9968703291620299, "No": 0.00312964092956381}, "ground_truth": 0}, {"key": "751e8f720d24c57e79cf63d9a3f72cd80c5d9619", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9975276413255451, "res": {"Yes": 0.9975276413255451, "No": 0.0024722846409502824}, "ground_truth": 0}, {"key": "6a985a1090f146976d33c8b33342320a65a50211", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9997278536785302, "res": {"Yes": 0.9997278536785302, "No": 0.0002720700467516118}, "ground_truth": 0}, {"key": "6a985a1090f146976d33c8b33342320a65a50211", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994681967574983, "res": {"Yes": 0.9994681967574983, "No": 0.0005317266587525775}, "ground_truth": 0}, {"key": "6a985a1090f146976d33c8b33342320a65a50211", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9961845444185451, "res": {"Yes": 0.9961845444185451, "No": 0.0038154105704386277}, "ground_truth": 1}, {"key": "6a985a1090f146976d33c8b33342320a65a50211", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9987642265975993, "res": {"Yes": 0.9987642265975993, "No": 0.0012356950174873928}, "ground_truth": 0}, {"key": "6a985a1090f146976d33c8b33342320a65a50211", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9969301578310553, "res": {"Yes": 0.9969301578310553, "No": 0.0030698548457975677}, "ground_truth": 0}, {"key": "4640d2f968f31bcf384d8253e055611b10f4a38b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9968535028306916, "res": {"Yes": 0.9968535028306916, "No": 0.003146442907266935}, "ground_truth": 0}, {"key": "4640d2f968f31bcf384d8253e055611b10f4a38b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995650148634783, "res": {"Yes": 0.9995650148634783, "No": 0.0004349181958837008}, "ground_truth": 0}, {"key": "4640d2f968f31bcf384d8253e055611b10f4a38b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9992618762974242, "res": {"Yes": 0.9992618762974242, "No": 0.0007380488080363876}, "ground_truth": 1}, {"key": "4640d2f968f31bcf384d8253e055611b10f4a38b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9991973652837062, "res": {"Yes": 0.9991973652837062, "No": 0.0008026345929109822}, "ground_truth": 0}, {"key": "4640d2f968f31bcf384d8253e055611b10f4a38b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997271386334359, "res": {"Yes": 0.9997271386334359, "No": 0.0002728430053976513}, "ground_truth": 0}, {"key": "22e0c9ba3465840583a5fd79714d2f7663c7e3a1", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9995423872866515, "res": {"Yes": 0.9995423872866515, "No": 0.0004574957330311779}, "ground_truth": 0}, {"key": "22e0c9ba3465840583a5fd79714d2f7663c7e3a1", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9950776422060715, "res": {"Yes": 0.9950776422060715, "No": 0.004922306783119669}, "ground_truth": 0}, {"key": "22e0c9ba3465840583a5fd79714d2f7663c7e3a1", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9990292190130443, "res": {"Yes": 0.9990292190130443, "No": 0.0009706797099088429}, "ground_truth": 1}, {"key": "22e0c9ba3465840583a5fd79714d2f7663c7e3a1", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994208020399346, "res": {"Yes": 0.9994208020399346, "No": 0.0005791967083536442}, "ground_truth": 0}, {"key": "22e0c9ba3465840583a5fd79714d2f7663c7e3a1", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9993447255861131, "res": {"Yes": 0.9993447255861131, "No": 0.0006552026374640399}, "ground_truth": 0}, {"key": "10dafcc21761c60f8fc5bd832daf8f21cf0fc66d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9957361467447886, "res": {"Yes": 0.9957361467447886, "No": 0.004263889956613681}, "ground_truth": 0}, {"key": "10dafcc21761c60f8fc5bd832daf8f21cf0fc66d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9861920115087148, "res": {"Yes": 0.9861920115087148, "No": 0.013807932957366587}, "ground_truth": 0}, {"key": "10dafcc21761c60f8fc5bd832daf8f21cf0fc66d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9521041327803864, "res": {"Yes": 0.9521041327803864, "No": 0.04789578376724689}, "ground_truth": 1}, {"key": "10dafcc21761c60f8fc5bd832daf8f21cf0fc66d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9958490361659077, "res": {"Yes": 0.9958490361659077, "No": 0.004150995890143573}, "ground_truth": 0}, {"key": "10dafcc21761c60f8fc5bd832daf8f21cf0fc66d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9808885729855605, "res": {"Yes": 0.9808885729855605, "No": 0.019111374454574746}, "ground_truth": 0}, {"key": "75da85a72e0028e1b07c65f6b771a1eaa20b4e04", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9968340848120104, "res": {"Yes": 0.9968340848120104, "No": 0.003165909192581116}, "ground_truth": 0}, {"key": "75da85a72e0028e1b07c65f6b771a1eaa20b4e04", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9896743404594107, "res": {"Yes": 0.9896743404594107, "No": 0.010325579663165326}, "ground_truth": 0}, {"key": "75da85a72e0028e1b07c65f6b771a1eaa20b4e04", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9990174513173748, "res": {"Yes": 0.9990174513173748, "No": 0.0009824849818367751}, "ground_truth": 1}, {"key": "75da85a72e0028e1b07c65f6b771a1eaa20b4e04", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9986147708168867, "res": {"Yes": 0.9986147708168867, "No": 0.00138517374438679}, "ground_truth": 0}, {"key": "75da85a72e0028e1b07c65f6b771a1eaa20b4e04", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9978163191077474, "res": {"Yes": 0.9978163191077474, "No": 0.002183605214566775}, "ground_truth": 0}, {"key": "9166b4c77e919f7a7cc457f9df981512dea3694f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9777140696642534, "res": {"Yes": 0.9777140696642534, "No": 0.022285598863572398}, "ground_truth": 0}, {"key": "9166b4c77e919f7a7cc457f9df981512dea3694f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9989922309406595, "res": {"Yes": 0.9989922309406595, "No": 0.0010077709170428193}, "ground_truth": 0}, {"key": "9166b4c77e919f7a7cc457f9df981512dea3694f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994011560186737, "res": {"Yes": 0.9994011560186737, "No": 0.0005987367134293606}, "ground_truth": 1}, {"key": "9166b4c77e919f7a7cc457f9df981512dea3694f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9946855474360486, "res": {"Yes": 0.9946855474360486, "No": 0.00531444337425442}, "ground_truth": 0}, {"key": "9166b4c77e919f7a7cc457f9df981512dea3694f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996656629031079, "res": {"Yes": 0.9996656629031079, "No": 0.0003343106190537813}, "ground_truth": 0}, {"key": "81e50c6cdbea4e55af6307d653135ef395b6d7fe", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9919078838808558, "res": {"Yes": 0.9919078838808558, "No": 0.00809203527858026}, "ground_truth": 0}, {"key": "81e50c6cdbea4e55af6307d653135ef395b6d7fe", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9955365519247604, "res": {"Yes": 0.9955365519247604, "No": 0.004463433348591412}, "ground_truth": 0}, {"key": "81e50c6cdbea4e55af6307d653135ef395b6d7fe", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9924110783862757, "res": {"Yes": 0.9924110783862757, "No": 0.007588836447886977}, "ground_truth": 1}, {"key": "81e50c6cdbea4e55af6307d653135ef395b6d7fe", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9972431543975012, "res": {"Yes": 0.9972431543975012, "No": 0.0027568559420786637}, "ground_truth": 0}, {"key": "81e50c6cdbea4e55af6307d653135ef395b6d7fe", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9995233272745592, "res": {"Yes": 0.9995233272745592, "No": 0.0004765504329843003}, "ground_truth": 0}, {"key": "59fa6c3439d216272cc845d35627f8eb11b08a6d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9972819211835804, "res": {"Yes": 0.9972819211835804, "No": 0.0027181067196172227}, "ground_truth": 0}, {"key": "59fa6c3439d216272cc845d35627f8eb11b08a6d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9987814570271567, "res": {"Yes": 0.9987814570271567, "No": 0.0012185459199146856}, "ground_truth": 0}, {"key": "59fa6c3439d216272cc845d35627f8eb11b08a6d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994703413187088, "res": {"Yes": 0.9994703413187088, "No": 0.0005296266148383821}, "ground_truth": 1}, {"key": "59fa6c3439d216272cc845d35627f8eb11b08a6d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9980532679456613, "res": {"Yes": 0.9980532679456613, "No": 0.0019467466194855343}, "ground_truth": 0}, {"key": "59fa6c3439d216272cc845d35627f8eb11b08a6d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.998581845233197, "res": {"Yes": 0.998581845233197, "No": 0.0014181379084425307}, "ground_truth": 0}, {"key": "2eff3ba44cfaefbff89ebc764828ae9e4d477c9b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9951102240691744, "res": {"Yes": 0.9951102240691744, "No": 0.004889770055578845}, "ground_truth": 0}, {"key": "2eff3ba44cfaefbff89ebc764828ae9e4d477c9b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9949537154909703, "res": {"Yes": 0.9949537154909703, "No": 0.005046309979060183}, "ground_truth": 0}, {"key": "2eff3ba44cfaefbff89ebc764828ae9e4d477c9b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9897459172856097, "res": {"Yes": 0.9897459172856097, "No": 0.010253983651246953}, "ground_truth": 1}, {"key": "2eff3ba44cfaefbff89ebc764828ae9e4d477c9b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996295752242045, "res": {"Yes": 0.9996295752242045, "No": 0.00037040353926011833}, "ground_truth": 0}, {"key": "2eff3ba44cfaefbff89ebc764828ae9e4d477c9b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9979737284655261, "res": {"Yes": 0.9979737284655261, "No": 0.0020262286939197976}, "ground_truth": 0}, {"key": "74698ee383888faf5a49b32ecb8211b888270b76", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.996812405401665, "res": {"Yes": 0.996812405401665, "No": 0.0031875553625668566}, "ground_truth": 0}, {"key": "74698ee383888faf5a49b32ecb8211b888270b76", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9975714061175076, "res": {"Yes": 0.9975714061175076, "No": 0.0024286274903157733}, "ground_truth": 0}, {"key": "74698ee383888faf5a49b32ecb8211b888270b76", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9923574205668734, "res": {"Yes": 0.9923574205668734, "No": 0.00764252143112308}, "ground_truth": 1}, {"key": "74698ee383888faf5a49b32ecb8211b888270b76", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9991951022041564, "res": {"Yes": 0.9991951022041564, "No": 0.0008048215356245404}, "ground_truth": 0}, {"key": "74698ee383888faf5a49b32ecb8211b888270b76", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9970601435661314, "res": {"Yes": 0.9970601435661314, "No": 0.002939857112509112}, "ground_truth": 0}, {"key": "79a6d24f452d8c1cef47ab224fd2083b0d08324c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9525277105152451, "res": {"Yes": 0.9525277105152451, "No": 0.047472257082163336}, "ground_truth": 0}, {"key": "79a6d24f452d8c1cef47ab224fd2083b0d08324c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994643879613674, "res": {"Yes": 0.9994643879613674, "No": 0.0005355067837979189}, "ground_truth": 0}, {"key": "79a6d24f452d8c1cef47ab224fd2083b0d08324c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995966987678144, "res": {"Yes": 0.9995966987678144, "No": 0.0004032402579843782}, "ground_truth": 1}, {"key": "79a6d24f452d8c1cef47ab224fd2083b0d08324c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.995851521808204, "res": {"Yes": 0.995851521808204, "No": 0.004148489445648362}, "ground_truth": 0}, {"key": "79a6d24f452d8c1cef47ab224fd2083b0d08324c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9949041441519813, "res": {"Yes": 0.9949041441519813, "No": 0.005095852527523635}, "ground_truth": 0}, {"key": "50f2041a2bed7f316d3254192c4158095eeec2e9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9996643557411417, "res": {"Yes": 0.9996643557411417, "No": 0.00033563030046717594}, "ground_truth": 0}, {"key": "50f2041a2bed7f316d3254192c4158095eeec2e9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999640459343629, "res": {"Yes": 0.9999640459343629, "No": 3.584799213066337e-05}, "ground_truth": 0}, {"key": "50f2041a2bed7f316d3254192c4158095eeec2e9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999653273603495, "res": {"Yes": 0.999653273603495, "No": 0.00034660169164877567}, "ground_truth": 1}, {"key": "50f2041a2bed7f316d3254192c4158095eeec2e9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994983250309899, "res": {"Yes": 0.9994983250309899, "No": 0.0005016573204275161}, "ground_truth": 0}, {"key": "50f2041a2bed7f316d3254192c4158095eeec2e9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998378327705268, "res": {"Yes": 0.9998378327705268, "No": 0.00016213584901047633}, "ground_truth": 0}, {"key": "de6ed8db6d639706f2c5015c699fc578e4aa43ec", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9968776748274883, "res": {"Yes": 0.9968776748274883, "No": 0.0031222949083349796}, "ground_truth": 0}, {"key": "de6ed8db6d639706f2c5015c699fc578e4aa43ec", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9970058713705261, "res": {"Yes": 0.9970058713705261, "No": 0.0029940646500941862}, "ground_truth": 0}, {"key": "de6ed8db6d639706f2c5015c699fc578e4aa43ec", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.998518850386411, "res": {"Yes": 0.998518850386411, "No": 0.0014811782397392795}, "ground_truth": 1}, {"key": "de6ed8db6d639706f2c5015c699fc578e4aa43ec", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999808531883025, "res": {"Yes": 0.9999808531883025, "No": 1.9114506663812234e-05}, "ground_truth": 0}, {"key": "de6ed8db6d639706f2c5015c699fc578e4aa43ec", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9942087902795994, "res": {"Yes": 0.9942087902795994, "No": 0.005791204372272765}, "ground_truth": 0}, {"key": "8abb66a9697a8c59348fcc79de126691e394bded", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9323729811369474, "res": {"Yes": 0.9323729811369474, "No": 0.06762672728733155}, "ground_truth": 0}, {"key": "8abb66a9697a8c59348fcc79de126691e394bded", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9954147524924377, "res": {"Yes": 0.9954147524924377, "No": 0.004585184198159852}, "ground_truth": 0}, {"key": "8abb66a9697a8c59348fcc79de126691e394bded", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996918756000763, "res": {"Yes": 0.9996918756000763, "No": 0.000308013174978913}, "ground_truth": 1}, {"key": "8abb66a9697a8c59348fcc79de126691e394bded", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9976143551845138, "res": {"Yes": 0.9976143551845138, "No": 0.002385592637939546}, "ground_truth": 0}, {"key": "8abb66a9697a8c59348fcc79de126691e394bded", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9967441809114166, "res": {"Yes": 0.9967441809114166, "No": 0.0032558496766912186}, "ground_truth": 0}, {"key": "11ec3be451b57e4312b44eeabb3ae441435c662e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9986389066289506, "res": {"Yes": 0.9986389066289506, "No": 0.0013610606411316635}, "ground_truth": 0}, {"key": "11ec3be451b57e4312b44eeabb3ae441435c662e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995735864701022, "res": {"Yes": 0.9995735864701022, "No": 0.0004263292171276973}, "ground_truth": 0}, {"key": "11ec3be451b57e4312b44eeabb3ae441435c662e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997691922901987, "res": {"Yes": 0.9997691922901987, "No": 0.00023077323413677803}, "ground_truth": 1}, {"key": "11ec3be451b57e4312b44eeabb3ae441435c662e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999159757201844, "res": {"Yes": 0.999159757201844, "No": 0.000840195210225852}, "ground_truth": 0}, {"key": "11ec3be451b57e4312b44eeabb3ae441435c662e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998741809658787, "res": {"Yes": 0.9998741809658787, "No": 0.0001258035493332698}, "ground_truth": 0}, {"key": "eed527dece78deffed54a7cdd8516c4d57a90011", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9979548501746585, "res": {"Yes": 0.9979548501746585, "No": 0.002045130658175473}, "ground_truth": 0}, {"key": "eed527dece78deffed54a7cdd8516c4d57a90011", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9992002202444568, "res": {"Yes": 0.9992002202444568, "No": 0.0007996967217376754}, "ground_truth": 0}, {"key": "eed527dece78deffed54a7cdd8516c4d57a90011", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996784135000271, "res": {"Yes": 0.9996784135000271, "No": 0.00032150643465753414}, "ground_truth": 1}, {"key": "eed527dece78deffed54a7cdd8516c4d57a90011", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9990345743235264, "res": {"Yes": 0.9990345743235264, "No": 0.0009654263893166215}, "ground_truth": 0}, {"key": "eed527dece78deffed54a7cdd8516c4d57a90011", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9990567030845182, "res": {"Yes": 0.9990567030845182, "No": 0.0009431985934456664}, "ground_truth": 0}, {"key": "803c0dc8b3d2eb528fd084b613dfc29d98151d72", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9842075661160475, "res": {"Yes": 0.9842075661160475, "No": 0.01579239158788259}, "ground_truth": 0}, {"key": "803c0dc8b3d2eb528fd084b613dfc29d98151d72", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9961453780015374, "res": {"Yes": 0.9961453780015374, "No": 0.00385462828785421}, "ground_truth": 0}, {"key": "803c0dc8b3d2eb528fd084b613dfc29d98151d72", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9797061002499122, "res": {"Yes": 0.9797061002499122, "No": 0.02029386612346533}, "ground_truth": 1}, {"key": "803c0dc8b3d2eb528fd084b613dfc29d98151d72", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9943693180481876, "res": {"Yes": 0.9943693180481876, "No": 0.005630640937836402}, "ground_truth": 0}, {"key": "803c0dc8b3d2eb528fd084b613dfc29d98151d72", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9966360587712941, "res": {"Yes": 0.9966360587712941, "No": 0.003363952355572908}, "ground_truth": 0}, {"key": "d4fb4cca753c0f5586bd93bb124b0491711396d9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9865963389142465, "res": {"Yes": 0.9865963389142465, "No": 0.013403612752065207}, "ground_truth": 0}, {"key": "d4fb4cca753c0f5586bd93bb124b0491711396d9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997800373753263, "res": {"Yes": 0.9997800373753263, "No": 0.0002199441296501313}, "ground_truth": 0}, {"key": "d4fb4cca753c0f5586bd93bb124b0491711396d9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996420760700584, "res": {"Yes": 0.9996420760700584, "No": 0.0003578919617250915}, "ground_truth": 1}, {"key": "d4fb4cca753c0f5586bd93bb124b0491711396d9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996883006083522, "res": {"Yes": 0.9996883006083522, "No": 0.00031157733004502755}, "ground_truth": 0}, {"key": "d4fb4cca753c0f5586bd93bb124b0491711396d9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9992836526993255, "res": {"Yes": 0.9992836526993255, "No": 0.0007163100886865589}, "ground_truth": 0}, {"key": "723443b1c16e2276a1aa03259a600044e86c2538", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9943687286856675, "res": {"Yes": 0.9943687286856675, "No": 0.005631218108112001}, "ground_truth": 0}, {"key": "723443b1c16e2276a1aa03259a600044e86c2538", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999638075349631, "res": {"Yes": 0.9999638075349631, "No": 3.610352687605043e-05}, "ground_truth": 0}, {"key": "723443b1c16e2276a1aa03259a600044e86c2538", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998885955719342, "res": {"Yes": 0.9998885955719342, "No": 0.0001113515215258462}, "ground_truth": 1}, {"key": "723443b1c16e2276a1aa03259a600044e86c2538", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999055207534452, "res": {"Yes": 0.9999055207534452, "No": 9.442553721988947e-05}, "ground_truth": 0}, {"key": "723443b1c16e2276a1aa03259a600044e86c2538", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999697675220106, "res": {"Yes": 0.9999697675220106, "No": 3.0192258726563843e-05}, "ground_truth": 0}, {"key": "b30a8f4dbb4f8375a8a9c2ccb4f658cc66dca0f5", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9908356103554776, "res": {"Yes": 0.9908356103554776, "No": 0.009164279638144571}, "ground_truth": 0}, {"key": "b30a8f4dbb4f8375a8a9c2ccb4f658cc66dca0f5", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9959909163197712, "res": {"Yes": 0.9959909163197712, "No": 0.0040090539026573346}, "ground_truth": 0}, {"key": "b30a8f4dbb4f8375a8a9c2ccb4f658cc66dca0f5", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9992850821256488, "res": {"Yes": 0.9992850821256488, "No": 0.0007148563103584361}, "ground_truth": 1}, {"key": "b30a8f4dbb4f8375a8a9c2ccb4f658cc66dca0f5", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995739439876509, "res": {"Yes": 0.9995739439876509, "No": 0.00042596175189917057}, "ground_truth": 0}, {"key": "b30a8f4dbb4f8375a8a9c2ccb4f658cc66dca0f5", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9995336894065956, "res": {"Yes": 0.9995336894065956, "No": 0.0004663015640427677}, "ground_truth": 0}, {"key": "f874dc1da36d2c45a65f78dc0dc9dcdf5846c3c9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9998058955507066, "res": {"Yes": 0.9998058955507066, "No": 0.00019405692773764293}, "ground_truth": 0}, {"key": "f874dc1da36d2c45a65f78dc0dc9dcdf5846c3c9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996881814255202, "res": {"Yes": 0.9996881814255202, "No": 0.00031169263498724315}, "ground_truth": 0}, {"key": "f874dc1da36d2c45a65f78dc0dc9dcdf5846c3c9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998685791264796, "res": {"Yes": 0.9998685791264796, "No": 0.00013131664683006268}, "ground_truth": 1}, {"key": "f874dc1da36d2c45a65f78dc0dc9dcdf5846c3c9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999371072378327, "res": {"Yes": 0.9999371072378327, "No": 6.284607972329483e-05}, "ground_truth": 0}, {"key": "f874dc1da36d2c45a65f78dc0dc9dcdf5846c3c9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997351231460697, "res": {"Yes": 0.9997351231460697, "No": 0.0002648020479450156}, "ground_truth": 0}, {"key": "60fc2801c429858cc0a87e547f1c30e34a7a96f3", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9999012298380936, "res": {"Yes": 0.9999012298380936, "No": 9.874093064207319e-05}, "ground_truth": 0}, {"key": "60fc2801c429858cc0a87e547f1c30e34a7a96f3", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9992434281948842, "res": {"Yes": 0.9992434281948842, "No": 0.0007564864018348964}, "ground_truth": 0}, {"key": "60fc2801c429858cc0a87e547f1c30e34a7a96f3", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998919329096687, "res": {"Yes": 0.9998919329096687, "No": 0.00010799927973312769}, "ground_truth": 1}, {"key": "60fc2801c429858cc0a87e547f1c30e34a7a96f3", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999804955832136, "res": {"Yes": 0.9999804955832136, "No": 1.9381258800982133e-05}, "ground_truth": 0}, {"key": "60fc2801c429858cc0a87e547f1c30e34a7a96f3", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999437822452772, "res": {"Yes": 0.9999437822452772, "No": 5.6117098824200496e-05}, "ground_truth": 0}, {"key": "d885b755dab03a11c7e3b19455b82859a6e1f76b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9721169605274421, "res": {"Yes": 0.9721169605274421, "No": 0.027882860442072298}, "ground_truth": 0}, {"key": "d885b755dab03a11c7e3b19455b82859a6e1f76b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9987272447408795, "res": {"Yes": 0.9987272447408795, "No": 0.0012727301009407316}, "ground_truth": 0}, {"key": "d885b755dab03a11c7e3b19455b82859a6e1f76b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9989248979367744, "res": {"Yes": 0.9989248979367744, "No": 0.0010750210889145532}, "ground_truth": 1}, {"key": "d885b755dab03a11c7e3b19455b82859a6e1f76b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998095901502569, "res": {"Yes": 0.9998095901502569, "No": 0.0001903601031202034}, "ground_truth": 0}, {"key": "d885b755dab03a11c7e3b19455b82859a6e1f76b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9984768934059902, "res": {"Yes": 0.9984768934059902, "No": 0.0015231145416272599}, "ground_truth": 0}, {"key": "9ce486aae2ad61c78b2e44d2617276a470c9f600", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9869982051901548, "res": {"Yes": 0.9869982051901548, "No": 0.013001724194097893}, "ground_truth": 0}, {"key": "9ce486aae2ad61c78b2e44d2617276a470c9f600", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9490197350134243, "res": {"Yes": 0.9490197350134243, "No": 0.050979815454475885}, "ground_truth": 0}, {"key": "9ce486aae2ad61c78b2e44d2617276a470c9f600", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.989619226040419, "res": {"Yes": 0.989619226040419, "No": 0.010380647745126656}, "ground_truth": 1}, {"key": "9ce486aae2ad61c78b2e44d2617276a470c9f600", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9986437835938236, "res": {"Yes": 0.9986437835938236, "No": 0.0013562055708938902}, "ground_truth": 0}, {"key": "9ce486aae2ad61c78b2e44d2617276a470c9f600", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9972145824940246, "res": {"Yes": 0.9972145824940246, "No": 0.0027854133402742966}, "ground_truth": 0}, {"key": "162e54974cad1d0e067a97e37175951c26f13a0e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.999634211486937, "res": {"Yes": 0.999634211486937, "No": 0.0003656991343192534}, "ground_truth": 0}, {"key": "162e54974cad1d0e067a97e37175951c26f13a0e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998416466692125, "res": {"Yes": 0.9998416466692125, "No": 0.00015830753384680834}, "ground_truth": 0}, {"key": "162e54974cad1d0e067a97e37175951c26f13a0e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997244013642831, "res": {"Yes": 0.9997244013642831, "No": 0.00027550714363440914}, "ground_truth": 1}, {"key": "162e54974cad1d0e067a97e37175951c26f13a0e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998089942439226, "res": {"Yes": 0.9998089942439226, "No": 0.0001909018760279161}, "ground_truth": 0}, {"key": "162e54974cad1d0e067a97e37175951c26f13a0e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9986462763116828, "res": {"Yes": 0.9986462763116828, "No": 0.001353655456926433}, "ground_truth": 0}, {"key": "03a0cef9c902928979ef731616fb96bd1fb10f9f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9823947781543663, "res": {"Yes": 0.9823947781543663, "No": 0.017605227570304836}, "ground_truth": 0}, {"key": "03a0cef9c902928979ef731616fb96bd1fb10f9f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9987249827261044, "res": {"Yes": 0.9987249827261044, "No": 0.0012749232477842283}, "ground_truth": 0}, {"key": "03a0cef9c902928979ef731616fb96bd1fb10f9f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9985005461060271, "res": {"Yes": 0.9985005461060271, "No": 0.0014994738486109722}, "ground_truth": 1}, {"key": "03a0cef9c902928979ef731616fb96bd1fb10f9f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.994845984750605, "res": {"Yes": 0.994845984750605, "No": 0.005153985397460306}, "ground_truth": 0}, {"key": "03a0cef9c902928979ef731616fb96bd1fb10f9f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9911651833904265, "res": {"Yes": 0.9911651833904265, "No": 0.008834786842385981}, "ground_truth": 0}, {"key": "6f64897fc65e1ba7531a4830a7d935ab15d8bec6", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9910624792197493, "res": {"Yes": 0.9910624792197493, "No": 0.008937387128685702}, "ground_truth": 0}, {"key": "6f64897fc65e1ba7531a4830a7d935ab15d8bec6", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9978683151741183, "res": {"Yes": 0.9978683151741183, "No": 0.0021317102226454205}, "ground_truth": 0}, {"key": "6f64897fc65e1ba7531a4830a7d935ab15d8bec6", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9936065625538562, "res": {"Yes": 0.9936065625538562, "No": 0.006393383189987472}, "ground_truth": 1}, {"key": "6f64897fc65e1ba7531a4830a7d935ab15d8bec6", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9852042927580174, "res": {"Yes": 0.9852042927580174, "No": 0.014795631658341463}, "ground_truth": 0}, {"key": "6f64897fc65e1ba7531a4830a7d935ab15d8bec6", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9943879367936069, "res": {"Yes": 0.9943879367936069, "No": 0.005612048918568876}, "ground_truth": 0}, {"key": "0d7383a44955b41c5a472151a35a842751f0004c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9092351894519729, "res": {"Yes": 0.9092351894519729, "No": 0.09076475767953372}, "ground_truth": 0}, {"key": "0d7383a44955b41c5a472151a35a842751f0004c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.998700491293656, "res": {"Yes": 0.998700491293656, "No": 0.0012995217068769006}, "ground_truth": 0}, {"key": "0d7383a44955b41c5a472151a35a842751f0004c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9983409535416113, "res": {"Yes": 0.9983409535416113, "No": 0.0016590513763322847}, "ground_truth": 1}, {"key": "0d7383a44955b41c5a472151a35a842751f0004c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995064232091205, "res": {"Yes": 0.9995064232091205, "No": 0.0004935072540545931}, "ground_truth": 0}, {"key": "0d7383a44955b41c5a472151a35a842751f0004c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9930257443919228, "res": {"Yes": 0.9930257443919228, "No": 0.0069742603493612824}, "ground_truth": 0}, {"key": "2fddefe691f7849048548a15f6533339d4a25bfd", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9952188319469295, "res": {"Yes": 0.9952188319469295, "No": 0.004781141277473112}, "ground_truth": 0}, {"key": "2fddefe691f7849048548a15f6533339d4a25bfd", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9855437752949232, "res": {"Yes": 0.9855437752949232, "No": 0.014456111683037995}, "ground_truth": 0}, {"key": "2fddefe691f7849048548a15f6533339d4a25bfd", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9971687118777005, "res": {"Yes": 0.9971687118777005, "No": 0.00283132918758249}, "ground_truth": 1}, {"key": "2fddefe691f7849048548a15f6533339d4a25bfd", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9962823855032937, "res": {"Yes": 0.9962823855032937, "No": 0.00371763710255252}, "ground_truth": 0}, {"key": "2fddefe691f7849048548a15f6533339d4a25bfd", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9981246479742106, "res": {"Yes": 0.9981246479742106, "No": 0.0018752681808516516}, "ground_truth": 0}, {"key": "e35ec8afe496d466d673ac67d80ec5c21b163410", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9917942475043837, "res": {"Yes": 0.9917942475043837, "No": 0.008205710209916346}, "ground_truth": 0}, {"key": "e35ec8afe496d466d673ac67d80ec5c21b163410", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9945277635338567, "res": {"Yes": 0.9945277635338567, "No": 0.0054721789300032206}, "ground_truth": 0}, {"key": "e35ec8afe496d466d673ac67d80ec5c21b163410", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9989134777931592, "res": {"Yes": 0.9989134777931592, "No": 0.0010865073428827665}, "ground_truth": 1}, {"key": "e35ec8afe496d466d673ac67d80ec5c21b163410", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996194467087797, "res": {"Yes": 0.9996194467087797, "No": 0.0003804324218438652}, "ground_truth": 0}, {"key": "e35ec8afe496d466d673ac67d80ec5c21b163410", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9942361292099513, "res": {"Yes": 0.9942361292099513, "No": 0.005763790199088093}, "ground_truth": 0}, {"key": "b4aa207f0723ae8a731dbc07ecdb03a68a42031f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.991114472712275, "res": {"Yes": 0.991114472712275, "No": 0.008885433752968774}, "ground_truth": 0}, {"key": "b4aa207f0723ae8a731dbc07ecdb03a68a42031f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9826854793525195, "res": {"Yes": 0.9826854793525195, "No": 0.017314482308766244}, "ground_truth": 0}, {"key": "b4aa207f0723ae8a731dbc07ecdb03a68a42031f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9861896895223573, "res": {"Yes": 0.9861896895223573, "No": 0.013810163118619875}, "ground_truth": 1}, {"key": "b4aa207f0723ae8a731dbc07ecdb03a68a42031f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9943167451434334, "res": {"Yes": 0.9943167451434334, "No": 0.005683275726908048}, "ground_truth": 0}, {"key": "b4aa207f0723ae8a731dbc07ecdb03a68a42031f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9814420659885809, "res": {"Yes": 0.9814420659885809, "No": 0.01855788523863207}, "ground_truth": 0}, {"key": "a65ecd71682863f7b12ca11ab8ac23b39ae302c4", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9999855020530962, "res": {"Yes": 0.9999855020530962, "No": 1.4404152858617873e-05}, "ground_truth": 0}, {"key": "a65ecd71682863f7b12ca11ab8ac23b39ae302c4", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999250684975053, "res": {"Yes": 0.9999250684975053, "No": 7.48456174436399e-05}, "ground_truth": 0}, {"key": "a65ecd71682863f7b12ca11ab8ac23b39ae302c4", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998297282578789, "res": {"Yes": 0.9998297282578789, "No": 0.00017020031347890382}, "ground_truth": 1}, {"key": "a65ecd71682863f7b12ca11ab8ac23b39ae302c4", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999882437011058, "res": {"Yes": 0.9999882437011058, "No": 1.1729799363571906e-05}, "ground_truth": 0}, {"key": "a65ecd71682863f7b12ca11ab8ac23b39ae302c4", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9995696618220472, "res": {"Yes": 0.9995696618220472, "No": 0.00043024514804330515}, "ground_truth": 0}, {"key": "215a2a64c7bd36360f76ec5e159690de07d7d959", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.999131909308925, "res": {"Yes": 0.999131909308925, "No": 0.0008680597580447532}, "ground_truth": 0}, {"key": "215a2a64c7bd36360f76ec5e159690de07d7d959", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9978513348928342, "res": {"Yes": 0.9978513348928342, "No": 0.002148580902751886}, "ground_truth": 0}, {"key": "215a2a64c7bd36360f76ec5e159690de07d7d959", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996057472880334, "res": {"Yes": 0.9996057472880334, "No": 0.00039415985799543744}, "ground_truth": 1}, {"key": "215a2a64c7bd36360f76ec5e159690de07d7d959", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999081429891136, "res": {"Yes": 0.9999081429891136, "No": 9.178350138178522e-05}, "ground_truth": 0}, {"key": "215a2a64c7bd36360f76ec5e159690de07d7d959", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999486693344637, "res": {"Yes": 0.9999486693344637, "No": 5.125184195531969e-05}, "ground_truth": 0}, {"key": "39074eb06df4966d1e06f76ad2f0713b31335fe5", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.997941790324376, "res": {"Yes": 0.997941790324376, "No": 0.002058205072568342}, "ground_truth": 0}, {"key": "39074eb06df4966d1e06f76ad2f0713b31335fe5", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9988609069595689, "res": {"Yes": 0.9988609069595689, "No": 0.0011390029551588015}, "ground_truth": 0}, {"key": "39074eb06df4966d1e06f76ad2f0713b31335fe5", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998502243374946, "res": {"Yes": 0.9998502243374946, "No": 0.00014965992566570157}, "ground_truth": 1}, {"key": "39074eb06df4966d1e06f76ad2f0713b31335fe5", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999628860289388, "res": {"Yes": 0.999628860289388, "No": 0.00037111892283347726}, "ground_truth": 0}, {"key": "39074eb06df4966d1e06f76ad2f0713b31335fe5", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998515353819624, "res": {"Yes": 0.9998515353819624, "No": 0.00014838667831905045}, "ground_truth": 0}, {"key": "3cae0d9bcf97b5d182b20a31aaea3f41cd7bf599", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.999253899271669, "res": {"Yes": 0.999253899271669, "No": 0.0007460824060404526}, "ground_truth": 0}, {"key": "3cae0d9bcf97b5d182b20a31aaea3f41cd7bf599", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9980285842976736, "res": {"Yes": 0.9980285842976736, "No": 0.0019713375642528242}, "ground_truth": 0}, {"key": "3cae0d9bcf97b5d182b20a31aaea3f41cd7bf599", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9912195269980749, "res": {"Yes": 0.9912195269980749, "No": 0.00878043788124105}, "ground_truth": 1}, {"key": "3cae0d9bcf97b5d182b20a31aaea3f41cd7bf599", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9977989973663624, "res": {"Yes": 0.9977989973663624, "No": 0.00220101205367263}, "ground_truth": 0}, {"key": "3cae0d9bcf97b5d182b20a31aaea3f41cd7bf599", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9993171071775119, "res": {"Yes": 0.9993171071775119, "No": 0.0006828791186011661}, "ground_truth": 0}, {"key": "6a9ead70f9423e494c466189bc6d907071547f42", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9997133185409849, "res": {"Yes": 0.9997133185409849, "No": 0.00028665819790874643}, "ground_truth": 0}, {"key": "6a9ead70f9423e494c466189bc6d907071547f42", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995472650753987, "res": {"Yes": 0.9995472650753987, "No": 0.0004526614784636198}, "ground_truth": 0}, {"key": "6a9ead70f9423e494c466189bc6d907071547f42", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.99951713165846, "res": {"Yes": 0.99951713165846, "No": 0.0004827813009102273}, "ground_truth": 1}, {"key": "6a9ead70f9423e494c466189bc6d907071547f42", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998455797840291, "res": {"Yes": 0.9998455797840291, "No": 0.00015435105066651173}, "ground_truth": 0}, {"key": "6a9ead70f9423e494c466189bc6d907071547f42", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9995718065409973, "res": {"Yes": 0.9995718065409973, "No": 0.0004281619697393964}, "ground_truth": 0}, {"key": "9a15663058028878027f6aa039fb3185c2ff52c8", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9878563396689497, "res": {"Yes": 0.9878563396689497, "No": 0.01214350645757497}, "ground_truth": 0}, {"key": "9a15663058028878027f6aa039fb3185c2ff52c8", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9967822009481853, "res": {"Yes": 0.9967822009481853, "No": 0.003217819573123919}, "ground_truth": 0}, {"key": "9a15663058028878027f6aa039fb3185c2ff52c8", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9975117440537069, "res": {"Yes": 0.9975117440537069, "No": 0.0024882814189282655}, "ground_truth": 1}, {"key": "9a15663058028878027f6aa039fb3185c2ff52c8", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998259143946578, "res": {"Yes": 0.9998259143946578, "No": 0.0001739675100561565}, "ground_truth": 0}, {"key": "9a15663058028878027f6aa039fb3185c2ff52c8", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9976058149438586, "res": {"Yes": 0.9976058149438586, "No": 0.0023941419010055013}, "ground_truth": 0}, {"key": "83cb0c7e79d4c71703e1bd5fc346fe68be8b8b13", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9972050947404572, "res": {"Yes": 0.9972050947404572, "No": 0.0027948802993516}, "ground_truth": 0}, {"key": "83cb0c7e79d4c71703e1bd5fc346fe68be8b8b13", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9959166622878765, "res": {"Yes": 0.9959166622878765, "No": 0.004083348158553248}, "ground_truth": 0}, {"key": "83cb0c7e79d4c71703e1bd5fc346fe68be8b8b13", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998908601961818, "res": {"Yes": 0.9998908601961818, "No": 0.00010902580070820947}, "ground_truth": 1}, {"key": "83cb0c7e79d4c71703e1bd5fc346fe68be8b8b13", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9991703464130128, "res": {"Yes": 0.9991703464130128, "No": 0.0008296428093347079}, "ground_truth": 0}, {"key": "83cb0c7e79d4c71703e1bd5fc346fe68be8b8b13", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9966455338353445, "res": {"Yes": 0.9966455338353445, "No": 0.0033544505247146447}, "ground_truth": 0}, {"key": "5eb63517219a5d2aeebd21aa66d9b69c52cfb208", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9664855744512265, "res": {"Yes": 0.9664855744512265, "No": 0.03351442284892635}, "ground_truth": 0}, {"key": "5eb63517219a5d2aeebd21aa66d9b69c52cfb208", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9986687424106097, "res": {"Yes": 0.9986687424106097, "No": 0.0013311947922191119}, "ground_truth": 0}, {"key": "5eb63517219a5d2aeebd21aa66d9b69c52cfb208", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9992431899253172, "res": {"Yes": 0.9992431899253172, "No": 0.0007567428941981179}, "ground_truth": 1}, {"key": "5eb63517219a5d2aeebd21aa66d9b69c52cfb208", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9990351697483101, "res": {"Yes": 0.9990351697483101, "No": 0.0009647351020080613}, "ground_truth": 0}, {"key": "5eb63517219a5d2aeebd21aa66d9b69c52cfb208", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9988904068542298, "res": {"Yes": 0.9988904068542298, "No": 0.0011095097809420212}, "ground_truth": 0}, {"key": "4a1de02b50578df33ea7b0cb384bdd6aaf4ee119", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9312656537813566, "res": {"Yes": 0.9312656537813566, "No": 0.06873428158957284}, "ground_truth": 0}, {"key": "4a1de02b50578df33ea7b0cb384bdd6aaf4ee119", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9973338491102632, "res": {"Yes": 0.9973338491102632, "No": 0.0026661201252697383}, "ground_truth": 0}, {"key": "4a1de02b50578df33ea7b0cb384bdd6aaf4ee119", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9931416654637335, "res": {"Yes": 0.9931416654637335, "No": 0.006858320447134555}, "ground_truth": 1}, {"key": "4a1de02b50578df33ea7b0cb384bdd6aaf4ee119", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9938596628982334, "res": {"Yes": 0.9938596628982334, "No": 0.0061402943391485314}, "ground_truth": 0}, {"key": "4a1de02b50578df33ea7b0cb384bdd6aaf4ee119", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9974083258014422, "res": {"Yes": 0.9974083258014422, "No": 0.002591709195189929}, "ground_truth": 0}, {"key": "ddedfc2a349fb607ed7efeda1660e9807454ddb7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9984098732932855, "res": {"Yes": 0.9984098732932855, "No": 0.0015901540192317374}, "ground_truth": 0}, {"key": "ddedfc2a349fb607ed7efeda1660e9807454ddb7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9992117664594001, "res": {"Yes": 0.9992117664594001, "No": 0.0007881970783083121}, "ground_truth": 0}, {"key": "ddedfc2a349fb607ed7efeda1660e9807454ddb7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9909106392684525, "res": {"Yes": 0.9909106392684525, "No": 0.009089282561819881}, "ground_truth": 1}, {"key": "ddedfc2a349fb607ed7efeda1660e9807454ddb7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999692709783313, "res": {"Yes": 0.999692709783313, "No": 0.00030725762281900445}, "ground_truth": 0}, {"key": "ddedfc2a349fb607ed7efeda1660e9807454ddb7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.998982949846451, "res": {"Yes": 0.998982949846451, "No": 0.0010170225389180337}, "ground_truth": 0}, {"key": "1cce8238a161fda411022c350d2b9a49072f366f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9988555561759083, "res": {"Yes": 0.9988555561759083, "No": 0.001144349414962571}, "ground_truth": 0}, {"key": "1cce8238a161fda411022c350d2b9a49072f366f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9989084802415312, "res": {"Yes": 0.9989084802415312, "No": 0.001091495396034632}, "ground_truth": 0}, {"key": "1cce8238a161fda411022c350d2b9a49072f366f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997148677379322, "res": {"Yes": 0.9997148677379322, "No": 0.00028506224443940393}, "ground_truth": 1}, {"key": "1cce8238a161fda411022c350d2b9a49072f366f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998399780848727, "res": {"Yes": 0.9998399780848727, "No": 0.00015999323897972355}, "ground_truth": 0}, {"key": "1cce8238a161fda411022c350d2b9a49072f366f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999685755197594, "res": {"Yes": 0.9999685755197594, "No": 3.1363962977818184e-05}, "ground_truth": 0}, {"key": "0612042c0c7d5cf6459f5435402919673c8c552f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.996504527238619, "res": {"Yes": 0.996504527238619, "No": 0.0034954026587141504}, "ground_truth": 0}, {"key": "0612042c0c7d5cf6459f5435402919673c8c552f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9982819119060301, "res": {"Yes": 0.9982819119060301, "No": 0.001718031706704265}, "ground_truth": 0}, {"key": "0612042c0c7d5cf6459f5435402919673c8c552f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9880425078698645, "res": {"Yes": 0.9880425078698645, "No": 0.011957399347089077}, "ground_truth": 1}, {"key": "0612042c0c7d5cf6459f5435402919673c8c552f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9910090931129972, "res": {"Yes": 0.9910090931129972, "No": 0.008990795025334217}, "ground_truth": 0}, {"key": "0612042c0c7d5cf6459f5435402919673c8c552f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.998554738697486, "res": {"Yes": 0.998554738697486, "No": 0.0014451588972044396}, "ground_truth": 0}, {"key": "cbe1f7ee7f5d619093684cc123e3908e39d79c92", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9970021978094603, "res": {"Yes": 0.9970021978094603, "No": 0.002997791883970452}, "ground_truth": 0}, {"key": "cbe1f7ee7f5d619093684cc123e3908e39d79c92", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9674829760954348, "res": {"Yes": 0.9674829760954348, "No": 0.032516936575016496}, "ground_truth": 0}, {"key": "cbe1f7ee7f5d619093684cc123e3908e39d79c92", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9934625258770954, "res": {"Yes": 0.9934625258770954, "No": 0.006537411414801973}, "ground_truth": 1}, {"key": "cbe1f7ee7f5d619093684cc123e3908e39d79c92", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9983653134579028, "res": {"Yes": 0.9983653134579028, "No": 0.0016346352307209776}, "ground_truth": 0}, {"key": "cbe1f7ee7f5d619093684cc123e3908e39d79c92", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9992604505016333, "res": {"Yes": 0.9992604505016333, "No": 0.0007395216497809295}, "ground_truth": 0}, {"key": "42be47d5986a9a55b413eb57359d2d8c51b59024", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9746911601338928, "res": {"Yes": 0.9746911601338928, "No": 0.02530875235975122}, "ground_truth": 0}, {"key": "42be47d5986a9a55b413eb57359d2d8c51b59024", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9811601411273712, "res": {"Yes": 0.9811601411273712, "No": 0.01883982685806666}, "ground_truth": 0}, {"key": "42be47d5986a9a55b413eb57359d2d8c51b59024", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9942503890444707, "res": {"Yes": 0.9942503890444707, "No": 0.005749643075072367}, "ground_truth": 1}, {"key": "42be47d5986a9a55b413eb57359d2d8c51b59024", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9955052393082727, "res": {"Yes": 0.9955052393082727, "No": 0.0044947824969706115}, "ground_truth": 0}, {"key": "42be47d5986a9a55b413eb57359d2d8c51b59024", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9867444223513754, "res": {"Yes": 0.9867444223513754, "No": 0.013255509521934172}, "ground_truth": 0}, {"key": "9f997964709283520a65d2ce75de7c8b154ce351", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9919933999456423, "res": {"Yes": 0.9919933999456423, "No": 0.008006504541302042}, "ground_truth": 0}, {"key": "9f997964709283520a65d2ce75de7c8b154ce351", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9939060618227128, "res": {"Yes": 0.9939060618227128, "No": 0.00609388786179483}, "ground_truth": 0}, {"key": "9f997964709283520a65d2ce75de7c8b154ce351", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9993157967738484, "res": {"Yes": 0.9993157967738484, "No": 0.0006841404672150381}, "ground_truth": 1}, {"key": "9f997964709283520a65d2ce75de7c8b154ce351", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9971786637708223, "res": {"Yes": 0.9971786637708223, "No": 0.0028212977636943365}, "ground_truth": 0}, {"key": "9f997964709283520a65d2ce75de7c8b154ce351", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9977929421417403, "res": {"Yes": 0.9977929421417403, "No": 0.002207052199935841}, "ground_truth": 0}, {"key": "57da930302d81b46438b0f715a5218b7174a5611", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9209956922414849, "res": {"Yes": 0.9209956922414849, "No": 0.0790041023012023}, "ground_truth": 0}, {"key": "57da930302d81b46438b0f715a5218b7174a5611", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9974769926466598, "res": {"Yes": 0.9974769926466598, "No": 0.0025230146569650484}, "ground_truth": 0}, {"key": "57da930302d81b46438b0f715a5218b7174a5611", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.993434297338516, "res": {"Yes": 0.993434297338516, "No": 0.006565666192406842}, "ground_truth": 1}, {"key": "57da930302d81b46438b0f715a5218b7174a5611", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9966030232254, "res": {"Yes": 0.9966030232254, "No": 0.0033969306672706847}, "ground_truth": 0}, {"key": "57da930302d81b46438b0f715a5218b7174a5611", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997149869239459, "res": {"Yes": 0.9997149869239459, "No": 0.00028497417381348467}, "ground_truth": 0}, {"key": "bc639974d283826b3b928bf90f0951652db71fd4", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9978202368416522, "res": {"Yes": 0.9978202368416522, "No": 0.0021797610898471385}, "ground_truth": 0}, {"key": "bc639974d283826b3b928bf90f0951652db71fd4", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9951207304983835, "res": {"Yes": 0.9951207304983835, "No": 0.00487927468639832}, "ground_truth": 0}, {"key": "bc639974d283826b3b928bf90f0951652db71fd4", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9991245324854968, "res": {"Yes": 0.9991245324854968, "No": 0.0008754688382162765}, "ground_truth": 1}, {"key": "bc639974d283826b3b928bf90f0951652db71fd4", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994968953496083, "res": {"Yes": 0.9994968953496083, "No": 0.0005030439580165314}, "ground_truth": 0}, {"key": "bc639974d283826b3b928bf90f0951652db71fd4", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996991411471042, "res": {"Yes": 0.9996991411471042, "No": 0.00030074224168524864}, "ground_truth": 0}, {"key": "304002bc095b57d14b728ccfb8d704ef39f28813", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9993484184715007, "res": {"Yes": 0.9993484184715007, "No": 0.0006514916357053854}, "ground_truth": 0}, {"key": "304002bc095b57d14b728ccfb8d704ef39f28813", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9981340329851474, "res": {"Yes": 0.9981340329851474, "No": 0.0018659947314627722}, "ground_truth": 0}, {"key": "304002bc095b57d14b728ccfb8d704ef39f28813", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995852635469492, "res": {"Yes": 0.9995852635469492, "No": 0.0004146762345192389}, "ground_truth": 1}, {"key": "304002bc095b57d14b728ccfb8d704ef39f28813", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999393719679535, "res": {"Yes": 0.9999393719679535, "No": 6.057689549783269e-05}, "ground_truth": 0}, {"key": "304002bc095b57d14b728ccfb8d704ef39f28813", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9994447374153582, "res": {"Yes": 0.9994447374153582, "No": 0.0005552583816673631}, "ground_truth": 0}, {"key": "fab941df6ecc8251b49d28715504baf4ac31dcf9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9910123713764996, "res": {"Yes": 0.9910123713764996, "No": 0.008987531959022386}, "ground_truth": 0}, {"key": "fab941df6ecc8251b49d28715504baf4ac31dcf9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9993938926573103, "res": {"Yes": 0.9993938926573103, "No": 0.0006060443437906852}, "ground_truth": 0}, {"key": "fab941df6ecc8251b49d28715504baf4ac31dcf9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9992701997338511, "res": {"Yes": 0.9992701997338511, "No": 0.0007297654330840699}, "ground_truth": 1}, {"key": "fab941df6ecc8251b49d28715504baf4ac31dcf9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9972826344398654, "res": {"Yes": 0.9972826344398654, "No": 0.0027173522779132117}, "ground_truth": 0}, {"key": "fab941df6ecc8251b49d28715504baf4ac31dcf9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9960527744556635, "res": {"Yes": 0.9960527744556635, "No": 0.003947221757773106}, "ground_truth": 0}, {"key": "92dec9732f4244771770150d82cd6086f40e5621", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9977708710064115, "res": {"Yes": 0.9977708710064115, "No": 0.002229146950729044}, "ground_truth": 0}, {"key": "92dec9732f4244771770150d82cd6086f40e5621", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998187634553272, "res": {"Yes": 0.9998187634553272, "No": 0.00018118988286364878}, "ground_truth": 0}, {"key": "92dec9732f4244771770150d82cd6086f40e5621", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9991802248294164, "res": {"Yes": 0.9991802248294164, "No": 0.0008197195404917506}, "ground_truth": 1}, {"key": "92dec9732f4244771770150d82cd6086f40e5621", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9991584470045134, "res": {"Yes": 0.9991584470045134, "No": 0.0008415220007441633}, "ground_truth": 0}, {"key": "92dec9732f4244771770150d82cd6086f40e5621", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996020607688274, "res": {"Yes": 0.9996020607688274, "No": 0.00039781996669743867}, "ground_truth": 0}, {"key": "d2ed554726c632af6e2130a580964a261f8c2e60", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9959889052169434, "res": {"Yes": 0.9959889052169434, "No": 0.004011050908714091}, "ground_truth": 0}, {"key": "d2ed554726c632af6e2130a580964a261f8c2e60", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9936743609279075, "res": {"Yes": 0.9936743609279075, "No": 0.006325599919080125}, "ground_truth": 0}, {"key": "d2ed554726c632af6e2130a580964a261f8c2e60", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.997522542974799, "res": {"Yes": 0.997522542974799, "No": 0.00247745464175094}, "ground_truth": 1}, {"key": "d2ed554726c632af6e2130a580964a261f8c2e60", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9974582537211574, "res": {"Yes": 0.9974582537211574, "No": 0.0025417470993794396}, "ground_truth": 0}, {"key": "d2ed554726c632af6e2130a580964a261f8c2e60", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998262719524653, "res": {"Yes": 0.9998262719524653, "No": 0.0001736395428990112}, "ground_truth": 0}, {"key": "84ab5256295369509813877f5000bb7e01e7f641", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.988543740482909, "res": {"Yes": 0.988543740482909, "No": 0.011456148571122019}, "ground_truth": 0}, {"key": "84ab5256295369509813877f5000bb7e01e7f641", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9992416414692925, "res": {"Yes": 0.9992416414692925, "No": 0.0007583655747479315}, "ground_truth": 0}, {"key": "84ab5256295369509813877f5000bb7e01e7f641", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9973974168072778, "res": {"Yes": 0.9973974168072778, "No": 0.002602559227003003}, "ground_truth": 1}, {"key": "84ab5256295369509813877f5000bb7e01e7f641", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9991818923327096, "res": {"Yes": 0.9991818923327096, "No": 0.0008180656237059055}, "ground_truth": 0}, {"key": "84ab5256295369509813877f5000bb7e01e7f641", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997383407987415, "res": {"Yes": 0.9997383407987415, "No": 0.00026165309406775986}, "ground_truth": 0}, {"key": "0e3f2fa30b711144fd8e49dedd6c248542a9083c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9978465879257549, "res": {"Yes": 0.9978465879257549, "No": 0.0021534447709060994}, "ground_truth": 0}, {"key": "0e3f2fa30b711144fd8e49dedd6c248542a9083c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9986524590497831, "res": {"Yes": 0.9986524590497831, "No": 0.0013475016913576231}, "ground_truth": 0}, {"key": "0e3f2fa30b711144fd8e49dedd6c248542a9083c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9991353632139922, "res": {"Yes": 0.9991353632139922, "No": 0.0008646156370642037}, "ground_truth": 1}, {"key": "0e3f2fa30b711144fd8e49dedd6c248542a9083c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9980930547349305, "res": {"Yes": 0.9980930547349305, "No": 0.0019069713043737726}, "ground_truth": 0}, {"key": "0e3f2fa30b711144fd8e49dedd6c248542a9083c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.999125961624259, "res": {"Yes": 0.999125961624259, "No": 0.0008739984113392816}, "ground_truth": 0}, {"key": "67ab697c3e2d707e3b7d070199b065b1541c8925", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9994356903126376, "res": {"Yes": 0.9994356903126376, "No": 0.0005642942627849592}, "ground_truth": 0}, {"key": "67ab697c3e2d707e3b7d070199b065b1541c8925", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999751315392253, "res": {"Yes": 0.9999751315392253, "No": 2.4743802607544103e-05}, "ground_truth": 0}, {"key": "67ab697c3e2d707e3b7d070199b065b1541c8925", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999956774797236, "res": {"Yes": 0.999956774797236, "No": 4.320469016383773e-05}, "ground_truth": 1}, {"key": "67ab697c3e2d707e3b7d070199b065b1541c8925", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998704861276457, "res": {"Yes": 0.9998704861276457, "No": 0.0001294147752046031}, "ground_truth": 0}, {"key": "67ab697c3e2d707e3b7d070199b065b1541c8925", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999967070975216, "res": {"Yes": 0.9999967070975216, "No": 3.1704564179924958e-06}, "ground_truth": 0}, {"key": "71537a701b69838dcb68ae310c7b3bfb375c1344", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9977531858740102, "res": {"Yes": 0.9977531858740102, "No": 0.0022467364104069953}, "ground_truth": 0}, {"key": "71537a701b69838dcb68ae310c7b3bfb375c1344", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9942499297008971, "res": {"Yes": 0.9942499297008971, "No": 0.005750013939010941}, "ground_truth": 0}, {"key": "71537a701b69838dcb68ae310c7b3bfb375c1344", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998999187284646, "res": {"Yes": 0.9998999187284646, "No": 0.00010002984164935287}, "ground_truth": 1}, {"key": "71537a701b69838dcb68ae310c7b3bfb375c1344", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998601168145652, "res": {"Yes": 0.9998601168145652, "No": 0.00013980653275003655}, "ground_truth": 0}, {"key": "71537a701b69838dcb68ae310c7b3bfb375c1344", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9954320001932792, "res": {"Yes": 0.9954320001932792, "No": 0.004567979474324772}, "ground_truth": 0}, {"key": "83989da5142972bb45c7f1e25ea60c6b101a9987", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9759794208363485, "res": {"Yes": 0.9759794208363485, "No": 0.0240204410672233}, "ground_truth": 0}, {"key": "83989da5142972bb45c7f1e25ea60c6b101a9987", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999737011318213, "res": {"Yes": 0.9999737011318213, "No": 2.6272547112064195e-05}, "ground_truth": 0}, {"key": "83989da5142972bb45c7f1e25ea60c6b101a9987", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9993469889445845, "res": {"Yes": 0.9993469889445845, "No": 0.0006529163094966369}, "ground_truth": 1}, {"key": "83989da5142972bb45c7f1e25ea60c6b101a9987", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997875419124167, "res": {"Yes": 0.9997875419124167, "No": 0.000212324137600917}, "ground_truth": 0}, {"key": "83989da5142972bb45c7f1e25ea60c6b101a9987", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996377862852326, "res": {"Yes": 0.9996377862852326, "No": 0.0003620884641096298}, "ground_truth": 0}, {"key": "cc432e8a26891d7fcb62b112f868e994532a7ba7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9961660815045303, "res": {"Yes": 0.9961660815045303, "No": 0.0038339410885838013}, "ground_truth": 0}, {"key": "cc432e8a26891d7fcb62b112f868e994532a7ba7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994965415777448, "res": {"Yes": 0.9994965415777448, "No": 0.000503441167350636}, "ground_truth": 0}, {"key": "cc432e8a26891d7fcb62b112f868e994532a7ba7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9985868336612648, "res": {"Yes": 0.9985868336612648, "No": 0.0014131867760135964}, "ground_truth": 1}, {"key": "cc432e8a26891d7fcb62b112f868e994532a7ba7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9978920541449822, "res": {"Yes": 0.9978920541449822, "No": 0.0021079541479214507}, "ground_truth": 0}, {"key": "cc432e8a26891d7fcb62b112f868e994532a7ba7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9988548418945554, "res": {"Yes": 0.9988548418945554, "No": 0.001145169062542501}, "ground_truth": 0}, {"key": "ef024e4ca04bd445d2915e7fe5c2c3c9b6f723a2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9528701552848922, "res": {"Yes": 0.9528701552848922, "No": 0.047129817986482886}, "ground_truth": 0}, {"key": "ef024e4ca04bd445d2915e7fe5c2c3c9b6f723a2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9966656687701292, "res": {"Yes": 0.9966656687701292, "No": 0.0033343233958584655}, "ground_truth": 0}, {"key": "ef024e4ca04bd445d2915e7fe5c2c3c9b6f723a2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9439270852699032, "res": {"Yes": 0.9439270852699032, "No": 0.05607276817955675}, "ground_truth": 1}, {"key": "ef024e4ca04bd445d2915e7fe5c2c3c9b6f723a2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9978095502441424, "res": {"Yes": 0.9978095502441424, "No": 0.0021904230068340936}, "ground_truth": 0}, {"key": "ef024e4ca04bd445d2915e7fe5c2c3c9b6f723a2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9880613490320875, "res": {"Yes": 0.9880613490320875, "No": 0.011938576490814572}, "ground_truth": 0}, {"key": "dfc606b97c43ac249d72cff2fe812213987a37a2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9978256972282747, "res": {"Yes": 0.9978256972282747, "No": 0.002174307621637724}, "ground_truth": 0}, {"key": "dfc606b97c43ac249d72cff2fe812213987a37a2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9978082454094468, "res": {"Yes": 0.9978082454094468, "No": 0.002191775134027548}, "ground_truth": 0}, {"key": "dfc606b97c43ac249d72cff2fe812213987a37a2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9939340810258764, "res": {"Yes": 0.9939340810258764, "No": 0.006065877890924772}, "ground_truth": 1}, {"key": "dfc606b97c43ac249d72cff2fe812213987a37a2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994691498908039, "res": {"Yes": 0.9994691498908039, "No": 0.0005308070360693357}, "ground_truth": 0}, {"key": "dfc606b97c43ac249d72cff2fe812213987a37a2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996786518433829, "res": {"Yes": 0.9996786518433829, "No": 0.00032133849142441636}, "ground_truth": 0}, {"key": "087ac92e2ca469b5ca6277950e63b2f0706d0de2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.8893609036407147, "res": {"Yes": 0.8893609036407147, "No": 0.11063880416792268}, "ground_truth": 0}, {"key": "087ac92e2ca469b5ca6277950e63b2f0706d0de2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9993850879359619, "res": {"Yes": 0.9993850879359619, "No": 0.0006148972360336943}, "ground_truth": 0}, {"key": "087ac92e2ca469b5ca6277950e63b2f0706d0de2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9980816475945942, "res": {"Yes": 0.9980816475945942, "No": 0.0019182652825792703}, "ground_truth": 1}, {"key": "087ac92e2ca469b5ca6277950e63b2f0706d0de2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.961716450272794, "res": {"Yes": 0.961716450272794, "No": 0.03828347262807467}, "ground_truth": 0}, {"key": "087ac92e2ca469b5ca6277950e63b2f0706d0de2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9991497561623071, "res": {"Yes": 0.9991497561623071, "No": 0.0008501361881168793}, "ground_truth": 0}, {"key": "a1abb9ce1c76a6c390233d8ec934cfb34bc35215", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.996630493471417, "res": {"Yes": 0.996630493471417, "No": 0.0033695212736682355}, "ground_truth": 0}, {"key": "a1abb9ce1c76a6c390233d8ec934cfb34bc35215", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9975901544511756, "res": {"Yes": 0.9975901544511756, "No": 0.0024097723416192174}, "ground_truth": 0}, {"key": "a1abb9ce1c76a6c390233d8ec934cfb34bc35215", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994770095372134, "res": {"Yes": 0.9994770095372134, "No": 0.0005229468104142591}, "ground_truth": 1}, {"key": "a1abb9ce1c76a6c390233d8ec934cfb34bc35215", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996656629031079, "res": {"Yes": 0.9996656629031079, "No": 0.0003343092818139794}, "ground_truth": 0}, {"key": "a1abb9ce1c76a6c390233d8ec934cfb34bc35215", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9989269186639943, "res": {"Yes": 0.9989269186639943, "No": 0.0010730934788245872}, "ground_truth": 0}, {"key": "99ec57979392a7f9c1a24158d5e1262cdf113f45", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9195806059261348, "res": {"Yes": 0.9195806059261348, "No": 0.08041922616331931}, "ground_truth": 0}, {"key": "99ec57979392a7f9c1a24158d5e1262cdf113f45", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9485836022044991, "res": {"Yes": 0.9485836022044991, "No": 0.05141621959373956}, "ground_truth": 0}, {"key": "99ec57979392a7f9c1a24158d5e1262cdf113f45", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9825624351807514, "res": {"Yes": 0.9825624351807514, "No": 0.01743758055098318}, "ground_truth": 1}, {"key": "99ec57979392a7f9c1a24158d5e1262cdf113f45", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.998672905369939, "res": {"Yes": 0.998672905369939, "No": 0.0013270935881598022}, "ground_truth": 0}, {"key": "99ec57979392a7f9c1a24158d5e1262cdf113f45", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997320246718041, "res": {"Yes": 0.9997320246718041, "No": 0.0002679205705304405}, "ground_truth": 0}, {"key": "ca5c0c816376354b82a4a7b9fa9a31b655e2d2cd", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9954421646013274, "res": {"Yes": 0.9954421646013274, "No": 0.004557879434101702}, "ground_truth": 0}, {"key": "ca5c0c816376354b82a4a7b9fa9a31b655e2d2cd", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9968959201487215, "res": {"Yes": 0.9968959201487215, "No": 0.0031041107423317024}, "ground_truth": 0}, {"key": "ca5c0c816376354b82a4a7b9fa9a31b655e2d2cd", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9955501374079045, "res": {"Yes": 0.9955501374079045, "No": 0.004449858415730168}, "ground_truth": 1}, {"key": "ca5c0c816376354b82a4a7b9fa9a31b655e2d2cd", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994932055137307, "res": {"Yes": 0.9994932055137307, "No": 0.0005067941259774043}, "ground_truth": 0}, {"key": "ca5c0c816376354b82a4a7b9fa9a31b655e2d2cd", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9991400045082751, "res": {"Yes": 0.9991400045082751, "No": 0.0008599881051786403}, "ground_truth": 0}, {"key": "e9787bf9462b12b7dfc0b562510a551e7d07b4d7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9977953173906663, "res": {"Yes": 0.9977953173906663, "No": 0.002204659287859526}, "ground_truth": 0}, {"key": "e9787bf9462b12b7dfc0b562510a551e7d07b4d7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9987827666302617, "res": {"Yes": 0.9987827666302617, "No": 0.0012171726619237507}, "ground_truth": 0}, {"key": "e9787bf9462b12b7dfc0b562510a551e7d07b4d7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9926123567843771, "res": {"Yes": 0.9926123567843771, "No": 0.007387639597365216}, "ground_truth": 1}, {"key": "e9787bf9462b12b7dfc0b562510a551e7d07b4d7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996460083698273, "res": {"Yes": 0.9996460083698273, "No": 0.00035387896856483875}, "ground_truth": 0}, {"key": "e9787bf9462b12b7dfc0b562510a551e7d07b4d7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996669737055711, "res": {"Yes": 0.9996669737055711, "No": 0.00033293176463065996}, "ground_truth": 0}, {"key": "ed8a7d5b06b4b5415725e624a15ab5ada7146894", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9931007443017406, "res": {"Yes": 0.9931007443017406, "No": 0.006899184933058514}, "ground_truth": 0}, {"key": "ed8a7d5b06b4b5415725e624a15ab5ada7146894", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994584346594764, "res": {"Yes": 0.9994584346594764, "No": 0.0005414544030296813}, "ground_truth": 0}, {"key": "ed8a7d5b06b4b5415725e624a15ab5ada7146894", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9942076159208699, "res": {"Yes": 0.9942076159208699, "No": 0.005792323341046391}, "ground_truth": 1}, {"key": "ed8a7d5b06b4b5415725e624a15ab5ada7146894", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.992342037260611, "res": {"Yes": 0.992342037260611, "No": 0.007657926682100204}, "ground_truth": 0}, {"key": "ed8a7d5b06b4b5415725e624a15ab5ada7146894", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9955195363594304, "res": {"Yes": 0.9955195363594304, "No": 0.004480466986184649}, "ground_truth": 0}, {"key": "4b6f2532f6bb4cf4ae341e4fb72d384ef58f0f42", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9949534780950422, "res": {"Yes": 0.9949534780950422, "No": 0.005046521928530209}, "ground_truth": 0}, {"key": "4b6f2532f6bb4cf4ae341e4fb72d384ef58f0f42", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9942243502660322, "res": {"Yes": 0.9942243502660322, "No": 0.005775585167511303}, "ground_truth": 0}, {"key": "4b6f2532f6bb4cf4ae341e4fb72d384ef58f0f42", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.997767310168081, "res": {"Yes": 0.997767310168081, "No": 0.002232703714233239}, "ground_truth": 1}, {"key": "4b6f2532f6bb4cf4ae341e4fb72d384ef58f0f42", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9989002788370135, "res": {"Yes": 0.9989002788370135, "No": 0.0010996228737577376}, "ground_truth": 0}, {"key": "4b6f2532f6bb4cf4ae341e4fb72d384ef58f0f42", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9978602413544565, "res": {"Yes": 0.9978602413544565, "No": 0.0021397747767421143}, "ground_truth": 0}, {"key": "46102a68e2927f930c1c2bd117ca714d188cd1ab", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9947538447908849, "res": {"Yes": 0.9947538447908849, "No": 0.005246159467905194}, "ground_truth": 0}, {"key": "46102a68e2927f930c1c2bd117ca714d188cd1ab", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994074664173995, "res": {"Yes": 0.9994074664173995, "No": 0.0005924719664990389}, "ground_truth": 0}, {"key": "46102a68e2927f930c1c2bd117ca714d188cd1ab", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9982713356643395, "res": {"Yes": 0.9982713356643395, "No": 0.0017286170209956189}, "ground_truth": 1}, {"key": "46102a68e2927f930c1c2bd117ca714d188cd1ab", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9954362536833036, "res": {"Yes": 0.9954362536833036, "No": 0.004563748288568638}, "ground_truth": 0}, {"key": "46102a68e2927f930c1c2bd117ca714d188cd1ab", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9945175040383667, "res": {"Yes": 0.9945175040383667, "No": 0.0054825294831672795}, "ground_truth": 0}, {"key": "ee6568a8880e61a498aab80c41f70939d567c991", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.13459096349779598, "res": {"No": 0.8654088108478489, "Yes": 0.13459096349779598}, "ground_truth": 0}, {"key": "ee6568a8880e61a498aab80c41f70939d567c991", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.992914900802976, "res": {"Yes": 0.992914900802976, "No": 0.007085080810833666}, "ground_truth": 0}, {"key": "ee6568a8880e61a498aab80c41f70939d567c991", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9980205089816595, "res": {"Yes": 0.9980205089816595, "No": 0.0019794675194785864}, "ground_truth": 1}, {"key": "ee6568a8880e61a498aab80c41f70939d567c991", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999544643806115, "res": {"Yes": 0.999544643806115, "No": 0.0004552353591025039}, "ground_truth": 0}, {"key": "ee6568a8880e61a498aab80c41f70939d567c991", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9977752642012281, "res": {"Yes": 0.9977752642012281, "No": 0.0022247814342205123}, "ground_truth": 0}, {"key": "8f462be265750ebbf85a6ad8ef28cb39c59f826e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9987627979662713, "res": {"Yes": 0.9987627979662713, "No": 0.0012372167235351634}, "ground_truth": 0}, {"key": "8f462be265750ebbf85a6ad8ef28cb39c59f826e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9993448447080114, "res": {"Yes": 0.9993448447080114, "No": 0.000655118121775186}, "ground_truth": 0}, {"key": "8f462be265750ebbf85a6ad8ef28cb39c59f826e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9993005566246383, "res": {"Yes": 0.9993005566246383, "No": 0.0006993258908411178}, "ground_truth": 1}, {"key": "8f462be265750ebbf85a6ad8ef28cb39c59f826e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9992899659335721, "res": {"Yes": 0.9992899659335721, "No": 0.0007100382614604342}, "ground_truth": 0}, {"key": "8f462be265750ebbf85a6ad8ef28cb39c59f826e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.999570495863321, "res": {"Yes": 0.999570495863321, "No": 0.0004293978841451264}, "ground_truth": 0}, {"key": "b21258342db561f79656c3c75f2e8c8244dd6178", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9921563700707858, "res": {"Yes": 0.9921563700707858, "No": 0.007843626404260333}, "ground_truth": 0}, {"key": "b21258342db561f79656c3c75f2e8c8244dd6178", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.8681912973080876, "res": {"Yes": 0.8681912973080876, "No": 0.1318083507057643}, "ground_truth": 0}, {"key": "b21258342db561f79656c3c75f2e8c8244dd6178", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9839913417530978, "res": {"Yes": 0.9839913417530978, "No": 0.016008523177213994}, "ground_truth": 1}, {"key": "b21258342db561f79656c3c75f2e8c8244dd6178", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9689230548536822, "res": {"Yes": 0.9689230548536822, "No": 0.031076791263890284}, "ground_truth": 0}, {"key": "b21258342db561f79656c3c75f2e8c8244dd6178", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9978184562330283, "res": {"Yes": 0.9978184562330283, "No": 0.002181562316311471}, "ground_truth": 0}, {"key": "c7b97c7c2ca7b9bedff4978dd3cae9aaef8f8100", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.989903246933131, "res": {"Yes": 0.989903246933131, "No": 0.010096650942242326}, "ground_truth": 0}, {"key": "c7b97c7c2ca7b9bedff4978dd3cae9aaef8f8100", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9990294572515706, "res": {"Yes": 0.9990294572515706, "No": 0.0009705025770256516}, "ground_truth": 0}, {"key": "c7b97c7c2ca7b9bedff4978dd3cae9aaef8f8100", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9985778017837758, "res": {"Yes": 0.9985778017837758, "No": 0.001422216512570089}, "ground_truth": 1}, {"key": "c7b97c7c2ca7b9bedff4978dd3cae9aaef8f8100", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9979288569827882, "res": {"Yes": 0.9979288569827882, "No": 0.0020711029241512234}, "ground_truth": 0}, {"key": "c7b97c7c2ca7b9bedff4978dd3cae9aaef8f8100", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9968664224348651, "res": {"Yes": 0.9968664224348651, "No": 0.0031336068175072235}, "ground_truth": 0}, {"key": "bdbeb9265050f2a4ce200c3802775694b7dc20e6", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9978511004975831, "res": {"Yes": 0.9978511004975831, "No": 0.002148884303794964}, "ground_truth": 0}, {"key": "bdbeb9265050f2a4ce200c3802775694b7dc20e6", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998145957494927, "res": {"Yes": 0.9998145957494927, "No": 0.00018531620050005098}, "ground_truth": 0}, {"key": "bdbeb9265050f2a4ce200c3802775694b7dc20e6", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9986210724954194, "res": {"Yes": 0.9986210724954194, "No": 0.0013788453954230524}, "ground_truth": 1}, {"key": "bdbeb9265050f2a4ce200c3802775694b7dc20e6", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.997368001098783, "res": {"Yes": 0.997368001098783, "No": 0.0026319341100709457}, "ground_truth": 0}, {"key": "bdbeb9265050f2a4ce200c3802775694b7dc20e6", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997191578835323, "res": {"Yes": 0.9997191578835323, "No": 0.000280775539401631}, "ground_truth": 0}, {"key": "42e93d5aef7547b8169f1f6c7735d0265a030580", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.99709509642122, "res": {"Yes": 0.99709509642122, "No": 0.002904832353447952}, "ground_truth": 0}, {"key": "42e93d5aef7547b8169f1f6c7735d0265a030580", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9989287046470288, "res": {"Yes": 0.9989287046470288, "No": 0.0010712222421856406}, "ground_truth": 0}, {"key": "42e93d5aef7547b8169f1f6c7735d0265a030580", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9985310920030002, "res": {"Yes": 0.9985310920030002, "No": 0.0014689237341299038}, "ground_truth": 1}, {"key": "42e93d5aef7547b8169f1f6c7735d0265a030580", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9990858552451294, "res": {"Yes": 0.9990858552451294, "No": 0.0009140840795372854}, "ground_truth": 0}, {"key": "42e93d5aef7547b8169f1f6c7735d0265a030580", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997012861838587, "res": {"Yes": 0.9997012861838587, "No": 0.0002986521618712511}, "ground_truth": 0}, {"key": "6e7a607934780f0011aa4e84509314efe15c2685", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.8650873538839858, "res": {"Yes": 0.8650873538839858, "No": 0.13491250793699333}, "ground_truth": 0}, {"key": "6e7a607934780f0011aa4e84509314efe15c2685", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995152253112383, "res": {"Yes": 0.9995152253112383, "No": 0.00048473834861220125}, "ground_truth": 0}, {"key": "6e7a607934780f0011aa4e84509314efe15c2685", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9976383260504623, "res": {"Yes": 0.9976383260504623, "No": 0.0023616907428181777}, "ground_truth": 1}, {"key": "6e7a607934780f0011aa4e84509314efe15c2685", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9963714873200558, "res": {"Yes": 0.9963714873200558, "No": 0.003628507036561232}, "ground_truth": 0}, {"key": "6e7a607934780f0011aa4e84509314efe15c2685", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9952371368766779, "res": {"Yes": 0.9952371368766779, "No": 0.004762807374664525}, "ground_truth": 0}, {"key": "c087e2af172aed145b177c9a1a68029205bc732a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9772192897260897, "res": {"Yes": 0.9772192897260897, "No": 0.02278063149229219}, "ground_truth": 0}, {"key": "c087e2af172aed145b177c9a1a68029205bc732a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9977199576543219, "res": {"Yes": 0.9977199576543219, "No": 0.0022799964341187846}, "ground_truth": 0}, {"key": "c087e2af172aed145b177c9a1a68029205bc732a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9946375495457047, "res": {"Yes": 0.9946375495457047, "No": 0.005362449564466092}, "ground_truth": 1}, {"key": "c087e2af172aed145b177c9a1a68029205bc732a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9942245871897231, "res": {"Yes": 0.9942245871897231, "No": 0.005775368009591601}, "ground_truth": 0}, {"key": "c087e2af172aed145b177c9a1a68029205bc732a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9964116164916076, "res": {"Yes": 0.9964116164916076, "No": 0.0035884156392829653}, "ground_truth": 0}, {"key": "40e79de5b7a27d7161b147725936d1d47e45c8f9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9987895456915394, "res": {"Yes": 0.9987895456915394, "No": 0.001210388374757078}, "ground_truth": 0}, {"key": "40e79de5b7a27d7161b147725936d1d47e45c8f9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999843100330889, "res": {"Yes": 0.9999843100330889, "No": 1.561956910563716e-05}, "ground_truth": 0}, {"key": "40e79de5b7a27d7161b147725936d1d47e45c8f9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996331500459057, "res": {"Yes": 0.9996331500459057, "No": 0.0003667423500199648}, "ground_truth": 1}, {"key": "40e79de5b7a27d7161b147725936d1d47e45c8f9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999942038320978, "res": {"Yes": 0.9999942038320978, "No": 5.699500605683492e-06}, "ground_truth": 0}, {"key": "40e79de5b7a27d7161b147725936d1d47e45c8f9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.999932458601023, "res": {"Yes": 0.999932458601023, "No": 6.749574493991155e-05}, "ground_truth": 0}, {"key": "e878909ace599352d3d578796968bbd52cb299bd", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9931633025816954, "res": {"Yes": 0.9931633025816954, "No": 0.006836687146383268}, "ground_truth": 0}, {"key": "e878909ace599352d3d578796968bbd52cb299bd", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995678744931329, "res": {"Yes": 0.9995678744931329, "No": 0.0004320933089947067}, "ground_truth": 0}, {"key": "e878909ace599352d3d578796968bbd52cb299bd", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9955818070583748, "res": {"Yes": 0.9955818070583748, "No": 0.004418217256003876}, "ground_truth": 1}, {"key": "e878909ace599352d3d578796968bbd52cb299bd", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9903233802240079, "res": {"Yes": 0.9903233802240079, "No": 0.009676548511550013}, "ground_truth": 0}, {"key": "e878909ace599352d3d578796968bbd52cb299bd", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9990511168745806, "res": {"Yes": 0.9990511168745806, "No": 0.0009488479438103908}, "ground_truth": 0}, {"key": "470708aab0fe600a27423bf4a2f8c9860fe5f64b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9972559561897673, "res": {"Yes": 0.9972559561897673, "No": 0.0027439757681681546}, "ground_truth": 0}, {"key": "470708aab0fe600a27423bf4a2f8c9860fe5f64b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9991603527012367, "res": {"Yes": 0.9991603527012367, "No": 0.0008395454026021948}, "ground_truth": 0}, {"key": "470708aab0fe600a27423bf4a2f8c9860fe5f64b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996736469047605, "res": {"Yes": 0.9996736469047605, "No": 0.0003263027260406864}, "ground_truth": 1}, {"key": "470708aab0fe600a27423bf4a2f8c9860fe5f64b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997595390553684, "res": {"Yes": 0.9997595390553684, "No": 0.00024034143494971636}, "ground_truth": 0}, {"key": "470708aab0fe600a27423bf4a2f8c9860fe5f64b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9992137913241281, "res": {"Yes": 0.9992137913241281, "No": 0.0007861479906408948}, "ground_truth": 0}, {"key": "16e8cb520c874af6825d4661f6127a67efd4b6f6", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9779086335885467, "res": {"Yes": 0.9779086335885467, "No": 0.022091353309094442}, "ground_truth": 0}, {"key": "16e8cb520c874af6825d4661f6127a67efd4b6f6", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9982077838408528, "res": {"Yes": 0.9982077838408528, "No": 0.0017921318323065065}, "ground_truth": 0}, {"key": "16e8cb520c874af6825d4661f6127a67efd4b6f6", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994841543941172, "res": {"Yes": 0.9994841543941172, "No": 0.0005158298473814277}, "ground_truth": 1}, {"key": "16e8cb520c874af6825d4661f6127a67efd4b6f6", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9991313138264846, "res": {"Yes": 0.9991313138264846, "No": 0.0008685920418047387}, "ground_truth": 0}, {"key": "16e8cb520c874af6825d4661f6127a67efd4b6f6", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9992075976167174, "res": {"Yes": 0.9992075976167174, "No": 0.0007923753909261815}, "ground_truth": 0}, {"key": "38673348ff28676905791fe3e8db2bbda814d974", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9926679211065723, "res": {"Yes": 0.9926679211065723, "No": 0.007331987161713541}, "ground_truth": 0}, {"key": "38673348ff28676905791fe3e8db2bbda814d974", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9980997062488293, "res": {"Yes": 0.9980997062488293, "No": 0.0019003180729593304}, "ground_truth": 0}, {"key": "38673348ff28676905791fe3e8db2bbda814d974", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995426182309467, "res": {"Yes": 0.9995426182309467, "No": 0.00045728656806167815}, "ground_truth": 1}, {"key": "38673348ff28676905791fe3e8db2bbda814d974", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9987078614308662, "res": {"Yes": 0.9987078614308662, "No": 0.0012921259420571854}, "ground_truth": 0}, {"key": "38673348ff28676905791fe3e8db2bbda814d974", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9990718168287361, "res": {"Yes": 0.9990718168287361, "No": 0.0009281789590185999}, "ground_truth": 0}, {"key": "914a0163e25ecccb635b601837cc4dd552c66ce0", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9832808683107742, "res": {"Yes": 0.9832808683107742, "No": 0.016719074930155394}, "ground_truth": 0}, {"key": "914a0163e25ecccb635b601837cc4dd552c66ce0", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9987447235194636, "res": {"Yes": 0.9987447235194636, "No": 0.00125521601211798}, "ground_truth": 0}, {"key": "914a0163e25ecccb635b601837cc4dd552c66ce0", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9964208407152022, "res": {"Yes": 0.9964208407152022, "No": 0.0035790971611413505}, "ground_truth": 1}, {"key": "914a0163e25ecccb635b601837cc4dd552c66ce0", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.997820474622243, "res": {"Yes": 0.997820474622243, "No": 0.0021794864572520353}, "ground_truth": 0}, {"key": "914a0163e25ecccb635b601837cc4dd552c66ce0", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9983698321693741, "res": {"Yes": 0.9983698321693741, "No": 0.0016301846310422922}, "ground_truth": 0}, {"key": "851c7314af56b6f47742970e11696948323dfe94", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.982309651440894, "res": {"Yes": 0.982309651440894, "No": 0.017690349749094145}, "ground_truth": 0}, {"key": "851c7314af56b6f47742970e11696948323dfe94", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9960058213357607, "res": {"Yes": 0.9960058213357607, "No": 0.003994186300434989}, "ground_truth": 0}, {"key": "851c7314af56b6f47742970e11696948323dfe94", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9980314357692153, "res": {"Yes": 0.9980314357692153, "No": 0.001968562692960482}, "ground_truth": 1}, {"key": "851c7314af56b6f47742970e11696948323dfe94", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9953363358117344, "res": {"Yes": 0.9953363358117344, "No": 0.004663620039089851}, "ground_truth": 0}, {"key": "851c7314af56b6f47742970e11696948323dfe94", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.974178054063558, "res": {"Yes": 0.974178054063558, "No": 0.025821804714881443}, "ground_truth": 0}, {"key": "f29f04e8f0615b768dd756c4387e87e27b7b4c2d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.962698298138727, "res": {"Yes": 0.962698298138727, "No": 0.03730153501358177}, "ground_truth": 0}, {"key": "f29f04e8f0615b768dd756c4387e87e27b7b4c2d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9690550205015306, "res": {"Yes": 0.9690550205015306, "No": 0.030944855913942134}, "ground_truth": 0}, {"key": "f29f04e8f0615b768dd756c4387e87e27b7b4c2d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8958992827235788, "res": {"Yes": 0.8958992827235788, "No": 0.10410067700210376}, "ground_truth": 1}, {"key": "f29f04e8f0615b768dd756c4387e87e27b7b4c2d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9979572258090069, "res": {"Yes": 0.9979572258090069, "No": 0.0020427807225770946}, "ground_truth": 0}, {"key": "f29f04e8f0615b768dd756c4387e87e27b7b4c2d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9857150392459291, "res": {"Yes": 0.9857150392459291, "No": 0.014284884910495557}, "ground_truth": 0}, {"key": "8451164e86012e6dbf2fd39f7e6ca784ff9f3624", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9955763731877009, "res": {"Yes": 0.9955763731877009, "No": 0.004423588652432466}, "ground_truth": 0}, {"key": "8451164e86012e6dbf2fd39f7e6ca784ff9f3624", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9993636557529338, "res": {"Yes": 0.9993636557529338, "No": 0.0006363075769308652}, "ground_truth": 0}, {"key": "8451164e86012e6dbf2fd39f7e6ca784ff9f3624", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999200623291858, "res": {"Yes": 0.9999200623291858, "No": 7.990916547049783e-05}, "ground_truth": 1}, {"key": "8451164e86012e6dbf2fd39f7e6ca784ff9f3624", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996218298743548, "res": {"Yes": 0.9996218298743548, "No": 0.00037814013633631865}, "ground_truth": 0}, {"key": "8451164e86012e6dbf2fd39f7e6ca784ff9f3624", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999152945777069, "res": {"Yes": 0.9999152945777069, "No": 8.468271926221337e-05}, "ground_truth": 0}, {"key": "18765fda59852b1c13eb226a2da9ab0979739f44", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.99583059450712, "res": {"Yes": 0.99583059450712, "No": 0.004169375560783037}, "ground_truth": 0}, {"key": "18765fda59852b1c13eb226a2da9ab0979739f44", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.983692949319539, "res": {"Yes": 0.983692949319539, "No": 0.016306992099309286}, "ground_truth": 0}, {"key": "18765fda59852b1c13eb226a2da9ab0979739f44", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9967042687842691, "res": {"Yes": 0.9967042687842691, "No": 0.003295781373468632}, "ground_truth": 1}, {"key": "18765fda59852b1c13eb226a2da9ab0979739f44", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996078921243943, "res": {"Yes": 0.9996078921243943, "No": 0.0003920612815137165}, "ground_truth": 0}, {"key": "18765fda59852b1c13eb226a2da9ab0979739f44", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9987654172251434, "res": {"Yes": 0.9987654172251434, "No": 0.0012345230226262006}, "ground_truth": 0}, {"key": "fba410776268aaf447802e66ac94e4af54f7cb7a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9979301544907315, "res": {"Yes": 0.9979301544907315, "No": 0.002069818203412619}, "ground_truth": 0}, {"key": "fba410776268aaf447802e66ac94e4af54f7cb7a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.987642223282544, "res": {"Yes": 0.987642223282544, "No": 0.01235768181455539}, "ground_truth": 0}, {"key": "fba410776268aaf447802e66ac94e4af54f7cb7a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9880301653199479, "res": {"Yes": 0.9880301653199479, "No": 0.011969757722616619}, "ground_truth": 1}, {"key": "fba410776268aaf447802e66ac94e4af54f7cb7a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9959843002728856, "res": {"Yes": 0.9959843002728856, "No": 0.004015642176658195}, "ground_truth": 0}, {"key": "fba410776268aaf447802e66ac94e4af54f7cb7a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9954761828258512, "res": {"Yes": 0.9954761828258512, "No": 0.004523805565953009}, "ground_truth": 0}, {"key": "31494e5d6eda2042c4793ae68cea4b8304dfa0ad", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9982261944558917, "res": {"Yes": 0.9982261944558917, "No": 0.001773719898813343}, "ground_truth": 0}, {"key": "31494e5d6eda2042c4793ae68cea4b8304dfa0ad", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.8003343829143594, "res": {"Yes": 0.8003343829143594, "No": 0.1996655823867253}, "ground_truth": 0}, {"key": "31494e5d6eda2042c4793ae68cea4b8304dfa0ad", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.995188014875804, "res": {"Yes": 0.995188014875804, "No": 0.004811950344601217}, "ground_truth": 1}, {"key": "31494e5d6eda2042c4793ae68cea4b8304dfa0ad", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9947794341679175, "res": {"Yes": 0.9947794341679175, "No": 0.005220601259022183}, "ground_truth": 0}, {"key": "31494e5d6eda2042c4793ae68cea4b8304dfa0ad", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9938531874266953, "res": {"Yes": 0.9938531874266953, "No": 0.006146816336882847}, "ground_truth": 0}, {"key": "564bdb9bbcd2276bdfbdce6eb1e2058f0921bd82", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9329951839921918, "res": {"Yes": 0.9329951839921918, "No": 0.0670044998437106}, "ground_truth": 0}, {"key": "564bdb9bbcd2276bdfbdce6eb1e2058f0921bd82", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9888519749471381, "res": {"Yes": 0.9888519749471381, "No": 0.01114793500024192}, "ground_truth": 0}, {"key": "564bdb9bbcd2276bdfbdce6eb1e2058f0921bd82", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9979314593850547, "res": {"Yes": 0.9979314593850547, "No": 0.002068549379951317}, "ground_truth": 1}, {"key": "564bdb9bbcd2276bdfbdce6eb1e2058f0921bd82", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9856204328853586, "res": {"Yes": 0.9856204328853586, "No": 0.014379517223895464}, "ground_truth": 0}, {"key": "564bdb9bbcd2276bdfbdce6eb1e2058f0921bd82", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9972131597689947, "res": {"Yes": 0.9972131597689947, "No": 0.0027868442960038366}, "ground_truth": 0}, {"key": "acbacec078274c02aeae06061357d02d950fec33", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9974710621648298, "res": {"Yes": 0.9974710621648298, "No": 0.0025289451498581236}, "ground_truth": 0}, {"key": "acbacec078274c02aeae06061357d02d950fec33", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9919114051600939, "res": {"Yes": 0.9919114051600939, "No": 0.008088575055113144}, "ground_truth": 0}, {"key": "acbacec078274c02aeae06061357d02d950fec33", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.987754323085637, "res": {"Yes": 0.987754323085637, "No": 0.012245607227193329}, "ground_truth": 1}, {"key": "acbacec078274c02aeae06061357d02d950fec33", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9851897162757373, "res": {"Yes": 0.9851897162757373, "No": 0.014810208092364907}, "ground_truth": 0}, {"key": "acbacec078274c02aeae06061357d02d950fec33", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9991292964824854, "res": {"Yes": 0.9991292964824854, "No": 0.000870707028431701}, "ground_truth": 0}, {"key": "04327bc3e426eee01ed61d369fd52415aa300ddc", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9969781420318455, "res": {"Yes": 0.9969781420318455, "No": 0.003021829609731424}, "ground_truth": 0}, {"key": "04327bc3e426eee01ed61d369fd52415aa300ddc", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.966414198261352, "res": {"Yes": 0.966414198261352, "No": 0.03358540773321823}, "ground_truth": 0}, {"key": "04327bc3e426eee01ed61d369fd52415aa300ddc", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9983605605514686, "res": {"Yes": 0.9983605605514686, "No": 0.0016394671475028128}, "ground_truth": 1}, {"key": "04327bc3e426eee01ed61d369fd52415aa300ddc", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9858657587113161, "res": {"Yes": 0.9858657587113161, "No": 0.014134202636560813}, "ground_truth": 0}, {"key": "04327bc3e426eee01ed61d369fd52415aa300ddc", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9912896775906227, "res": {"Yes": 0.9912896775906227, "No": 0.008710195873707794}, "ground_truth": 0}, {"key": "b1f3d2adb48370e5ecadff8e6e3baff407b01f0e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9910796891691259, "res": {"Yes": 0.9910796891691259, "No": 0.008920208127348799}, "ground_truth": 0}, {"key": "b1f3d2adb48370e5ecadff8e6e3baff407b01f0e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9966534721486358, "res": {"Yes": 0.9966534721486358, "No": 0.0033464774293511155}, "ground_truth": 0}, {"key": "b1f3d2adb48370e5ecadff8e6e3baff407b01f0e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9967367177176284, "res": {"Yes": 0.9967367177176284, "No": 0.0032632977994420066}, "ground_truth": 1}, {"key": "b1f3d2adb48370e5ecadff8e6e3baff407b01f0e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9993244891202749, "res": {"Yes": 0.9993244891202749, "No": 0.0006754694009240616}, "ground_truth": 0}, {"key": "b1f3d2adb48370e5ecadff8e6e3baff407b01f0e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9984861661042607, "res": {"Yes": 0.9984861661042607, "No": 0.001513832142991688}, "ground_truth": 0}, {"key": "d7eaf2e5ca91bf54c05ead867e5e90728d44e218", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9989595164787501, "res": {"Yes": 0.9989595164787501, "No": 0.0010405009490723068}, "ground_truth": 0}, {"key": "d7eaf2e5ca91bf54c05ead867e5e90728d44e218", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9989210912410266, "res": {"Yes": 0.9989210912410266, "No": 0.001078907849465144}, "ground_truth": 0}, {"key": "d7eaf2e5ca91bf54c05ead867e5e90728d44e218", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9972456470098807, "res": {"Yes": 0.9972456470098807, "No": 0.002754352200391785}, "ground_truth": 1}, {"key": "d7eaf2e5ca91bf54c05ead867e5e90728d44e218", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997359573653939, "res": {"Yes": 0.9997359573653939, "No": 0.00026397213754234065}, "ground_truth": 0}, {"key": "d7eaf2e5ca91bf54c05ead867e5e90728d44e218", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9980763120639837, "res": {"Yes": 0.9980763120639837, "No": 0.0019236928131705234}, "ground_truth": 0}, {"key": "1fcce9f667de2a14d76aaa6cdb3f1e6f7900e463", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9927881061204848, "res": {"Yes": 0.9927881061204848, "No": 0.007211895236660289}, "ground_truth": 0}, {"key": "1fcce9f667de2a14d76aaa6cdb3f1e6f7900e463", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9976538684787759, "res": {"Yes": 0.9976538684787759, "No": 0.002346130274066447}, "ground_truth": 0}, {"key": "1fcce9f667de2a14d76aaa6cdb3f1e6f7900e463", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9873368490583437, "res": {"Yes": 0.9873368490583437, "No": 0.012663049683551012}, "ground_truth": 1}, {"key": "1fcce9f667de2a14d76aaa6cdb3f1e6f7900e463", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9576938826672728, "res": {"Yes": 0.9576938826672728, "No": 0.04230603211099294}, "ground_truth": 0}, {"key": "1fcce9f667de2a14d76aaa6cdb3f1e6f7900e463", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9981519626278165, "res": {"Yes": 0.9981519626278165, "No": 0.0018480633981368213}, "ground_truth": 0}, {"key": "892f75a4d31560b60aa588faa6f4bf51322b8c18", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9959683350713066, "res": {"Yes": 0.9959683350713066, "No": 0.004031615963085487}, "ground_truth": 0}, {"key": "892f75a4d31560b60aa588faa6f4bf51322b8c18", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995290426752328, "res": {"Yes": 0.9995290426752328, "No": 0.00047083040645964723}, "ground_truth": 0}, {"key": "892f75a4d31560b60aa588faa6f4bf51322b8c18", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9976436629309623, "res": {"Yes": 0.9976436629309623, "No": 0.0023563228251973806}, "ground_truth": 1}, {"key": "892f75a4d31560b60aa588faa6f4bf51322b8c18", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9975762706865732, "res": {"Yes": 0.9975762706865732, "No": 0.0024236878350209407}, "ground_truth": 0}, {"key": "892f75a4d31560b60aa588faa6f4bf51322b8c18", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.999487490337967, "res": {"Yes": 0.999487490337967, "No": 0.0005123955872003259}, "ground_truth": 0}, {"key": "acf12bd8d1142648b1b522a3a925e14e48b8f068", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9976433061736523, "res": {"Yes": 0.9976433061736523, "No": 0.0023567352177750875}, "ground_truth": 0}, {"key": "acf12bd8d1142648b1b522a3a925e14e48b8f068", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999744163342675, "res": {"Yes": 0.9999744163342675, "No": 2.545493306896488e-05}, "ground_truth": 0}, {"key": "acf12bd8d1142648b1b522a3a925e14e48b8f068", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994248526606768, "res": {"Yes": 0.9994248526606768, "No": 0.0005751477463494552}, "ground_truth": 1}, {"key": "acf12bd8d1142648b1b522a3a925e14e48b8f068", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9992446192437983, "res": {"Yes": 0.9992446192437983, "No": 0.0007552815230538768}, "ground_truth": 0}, {"key": "acf12bd8d1142648b1b522a3a925e14e48b8f068", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9978914594014952, "res": {"Yes": 0.9978914594014952, "No": 0.0021085389755139206}, "ground_truth": 0}, {"key": "c8caeb08b05cc56e1ac29146793ceb07e4c0de69", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9986340295880313, "res": {"Yes": 0.9986340295880313, "No": 0.001365911373835774}, "ground_truth": 0}, {"key": "c8caeb08b05cc56e1ac29146793ceb07e4c0de69", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9987327137862458, "res": {"Yes": 0.9987327137862458, "No": 0.0012672386969557314}, "ground_truth": 0}, {"key": "c8caeb08b05cc56e1ac29146793ceb07e4c0de69", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999582999628833, "res": {"Yes": 0.999582999628833, "No": 0.00041691324640645783}, "ground_truth": 1}, {"key": "c8caeb08b05cc56e1ac29146793ceb07e4c0de69", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998253184785939, "res": {"Yes": 0.9998253184785939, "No": 0.00017457781229185875}, "ground_truth": 0}, {"key": "c8caeb08b05cc56e1ac29146793ceb07e4c0de69", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9988515116181788, "res": {"Yes": 0.9988515116181788, "No": 0.001148485684995849}, "ground_truth": 0}, {"key": "ed7ed4440f5f7082230ae745d240350a4f20b8b7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9936401038992634, "res": {"Yes": 0.9936401038992634, "No": 0.006359859455920378}, "ground_truth": 0}, {"key": "ed7ed4440f5f7082230ae745d240350a4f20b8b7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995444054947095, "res": {"Yes": 0.9995444054947095, "No": 0.0004555687133781469}, "ground_truth": 0}, {"key": "ed7ed4440f5f7082230ae745d240350a4f20b8b7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9978239168094716, "res": {"Yes": 0.9978239168094716, "No": 0.002176054291852135}, "ground_truth": 1}, {"key": "ed7ed4440f5f7082230ae745d240350a4f20b8b7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9931103863636743, "res": {"Yes": 0.9931103863636743, "No": 0.006889596905152622}, "ground_truth": 0}, {"key": "ed7ed4440f5f7082230ae745d240350a4f20b8b7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.999364370498076, "res": {"Yes": 0.999364370498076, "No": 0.0006355634688604234}, "ground_truth": 0}, {"key": "25b7adf8bfe43c2953a874b18c7766370158638a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9885075169497743, "res": {"Yes": 0.9885075169497743, "No": 0.011492407259161813}, "ground_truth": 0}, {"key": "25b7adf8bfe43c2953a874b18c7766370158638a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9952230881088872, "res": {"Yes": 0.9952230881088872, "No": 0.004776859293503785}, "ground_truth": 0}, {"key": "25b7adf8bfe43c2953a874b18c7766370158638a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.990442642641292, "res": {"Yes": 0.990442642641292, "No": 0.009557265771423053}, "ground_truth": 1}, {"key": "25b7adf8bfe43c2953a874b18c7766370158638a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998493900428185, "res": {"Yes": 0.9998493900428185, "No": 0.00015053002379756967}, "ground_truth": 0}, {"key": "25b7adf8bfe43c2953a874b18c7766370158638a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9987151161709921, "res": {"Yes": 0.9987151161709921, "No": 0.0012848661878835912}, "ground_truth": 0}, {"key": "771c125e3b0c54f1c3367b2857f15d689f081163", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9588691381299468, "res": {"Yes": 0.9588691381299468, "No": 0.04113076077794408}, "ground_truth": 0}, {"key": "771c125e3b0c54f1c3367b2857f15d689f081163", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9976682254198379, "res": {"Yes": 0.9976682254198379, "No": 0.0023317188022268444}, "ground_truth": 0}, {"key": "771c125e3b0c54f1c3367b2857f15d689f081163", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9949933691665211, "res": {"Yes": 0.9949933691665211, "No": 0.005006611199687303}, "ground_truth": 1}, {"key": "771c125e3b0c54f1c3367b2857f15d689f081163", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9947103198907362, "res": {"Yes": 0.9947103198907362, "No": 0.005289716638751561}, "ground_truth": 0}, {"key": "771c125e3b0c54f1c3367b2857f15d689f081163", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9869047398988232, "res": {"Yes": 0.9869047398988232, "No": 0.013095167630462062}, "ground_truth": 0}, {"key": "4ad70f84c363e5b594da00983e2b40600f12ca54", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9981994691041054, "res": {"Yes": 0.9981994691041054, "No": 0.0018005527101845958}, "ground_truth": 0}, {"key": "4ad70f84c363e5b594da00983e2b40600f12ca54", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9987978725348129, "res": {"Yes": 0.9987978725348129, "No": 0.0012020466900354648}, "ground_truth": 0}, {"key": "4ad70f84c363e5b594da00983e2b40600f12ca54", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9991668961238966, "res": {"Yes": 0.9991668961238966, "No": 0.000833058741730346}, "ground_truth": 1}, {"key": "4ad70f84c363e5b594da00983e2b40600f12ca54", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997167744559291, "res": {"Yes": 0.9997167744559291, "No": 0.00028321818093174844}, "ground_truth": 0}, {"key": "4ad70f84c363e5b594da00983e2b40600f12ca54", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997665704089276, "res": {"Yes": 0.9997665704089276, "No": 0.00023341279956624712}, "ground_truth": 0}, {"key": "cbb5654e979eb27e0af71068cd4e4e5191df35ee", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9955241461233162, "res": {"Yes": 0.9955241461233162, "No": 0.004475822255000565}, "ground_truth": 0}, {"key": "cbb5654e979eb27e0af71068cd4e4e5191df35ee", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9850148133392846, "res": {"Yes": 0.9850148133392846, "No": 0.014985098574853676}, "ground_truth": 0}, {"key": "cbb5654e979eb27e0af71068cd4e4e5191df35ee", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9965795854907643, "res": {"Yes": 0.9965795854907643, "No": 0.003420464220947229}, "ground_truth": 1}, {"key": "cbb5654e979eb27e0af71068cd4e4e5191df35ee", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9934446462955256, "res": {"Yes": 0.9934446462955256, "No": 0.006555273754095425}, "ground_truth": 0}, {"key": "cbb5654e979eb27e0af71068cd4e4e5191df35ee", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9910993662606086, "res": {"Yes": 0.9910993662606086, "No": 0.008900596339949813}, "ground_truth": 0}, {"key": "bde6b7e505c05a15a9afe44739e7f016ce169735", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9998659569845679, "res": {"Yes": 0.9998659569845679, "No": 0.0001340243577873351}, "ground_truth": 0}, {"key": "bde6b7e505c05a15a9afe44739e7f016ce169735", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.999217241875103, "res": {"Yes": 0.999217241875103, "No": 0.0007826496561271779}, "ground_truth": 0}, {"key": "bde6b7e505c05a15a9afe44739e7f016ce169735", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9981944825112931, "res": {"Yes": 0.9981944825112931, "No": 0.0018055090586617537}, "ground_truth": 1}, {"key": "bde6b7e505c05a15a9afe44739e7f016ce169735", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996866322899207, "res": {"Yes": 0.9996866322899207, "No": 0.00031327755599387354}, "ground_truth": 0}, {"key": "bde6b7e505c05a15a9afe44739e7f016ce169735", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9991499943596374, "res": {"Yes": 0.9991499943596374, "No": 0.0008499806274286911}, "ground_truth": 0}, {"key": "abc4744227c6bf0ddbf64cac2269bd4c6d02938b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9965258326335138, "res": {"Yes": 0.9965258326335138, "No": 0.003474127189509304}, "ground_truth": 0}, {"key": "abc4744227c6bf0ddbf64cac2269bd4c6d02938b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9981897267248299, "res": {"Yes": 0.9981897267248299, "No": 0.00181022451492839}, "ground_truth": 0}, {"key": "abc4744227c6bf0ddbf64cac2269bd4c6d02938b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9931494242146734, "res": {"Yes": 0.9931494242146734, "No": 0.0068504796997455215}, "ground_truth": 1}, {"key": "abc4744227c6bf0ddbf64cac2269bd4c6d02938b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9977724135613704, "res": {"Yes": 0.9977724135613704, "No": 0.00222761137470878}, "ground_truth": 0}, {"key": "abc4744227c6bf0ddbf64cac2269bd4c6d02938b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9941192560665658, "res": {"Yes": 0.9941192560665658, "No": 0.0058807733712356635}, "ground_truth": 0}, {"key": "f7e5e75031e9f099564ed046bd822ef1bedb3188", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9997567980383928, "res": {"Yes": 0.9997567980383928, "No": 0.0002430773141502941}, "ground_truth": 0}, {"key": "f7e5e75031e9f099564ed046bd822ef1bedb3188", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9934838189875993, "res": {"Yes": 0.9934838189875993, "No": 0.006516091148024577}, "ground_truth": 0}, {"key": "f7e5e75031e9f099564ed046bd822ef1bedb3188", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999716747231683, "res": {"Yes": 0.9999716747231683, "No": 2.8237942659324294e-05}, "ground_truth": 1}, {"key": "f7e5e75031e9f099564ed046bd822ef1bedb3188", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997895679539296, "res": {"Yes": 0.9997895679539296, "No": 0.0002103766631163126}, "ground_truth": 0}, {"key": "f7e5e75031e9f099564ed046bd822ef1bedb3188", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998926480526327, "res": {"Yes": 0.9998926480526327, "No": 0.00010732080838094779}, "ground_truth": 0}, {"key": "9a8ee7eb582f293c617eecab5f37374d9400bb04", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9993137753598992, "res": {"Yes": 0.9993137753598992, "No": 0.0006861749044368023}, "ground_truth": 0}, {"key": "9a8ee7eb582f293c617eecab5f37374d9400bb04", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.998598718912488, "res": {"Yes": 0.998598718912488, "No": 0.0014012223726358962}, "ground_truth": 0}, {"key": "9a8ee7eb582f293c617eecab5f37374d9400bb04", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996842489398619, "res": {"Yes": 0.9996842489398619, "No": 0.0003156736914160459}, "ground_truth": 1}, {"key": "9a8ee7eb582f293c617eecab5f37374d9400bb04", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996381437557689, "res": {"Yes": 0.9996381437557689, "No": 0.0003617421112057754}, "ground_truth": 0}, {"key": "9a8ee7eb582f293c617eecab5f37374d9400bb04", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999795419732683, "res": {"Yes": 0.9999795419732683, "No": 2.041311441871357e-05}, "ground_truth": 0}, {"key": "a745e5a95be983cde00f3d472062c0627db3113c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9980921064471696, "res": {"Yes": 0.9980921064471696, "No": 0.00190790079794311}, "ground_truth": 0}, {"key": "a745e5a95be983cde00f3d472062c0627db3113c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9988271288516498, "res": {"Yes": 0.9988271288516498, "No": 0.0011728526913720567}, "ground_truth": 0}, {"key": "a745e5a95be983cde00f3d472062c0627db3113c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999448788113121, "res": {"Yes": 0.999448788113121, "No": 0.0005511833751022909}, "ground_truth": 1}, {"key": "a745e5a95be983cde00f3d472062c0627db3113c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9993897340880357, "res": {"Yes": 0.9993897340880357, "No": 0.0006102639435731031}, "ground_truth": 0}, {"key": "a745e5a95be983cde00f3d472062c0627db3113c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9985108866312257, "res": {"Yes": 0.9985108866312257, "No": 0.0014890409857063831}, "ground_truth": 0}, {"key": "698cdb1268d4da1240d7f540bd4a5af4e229d00f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9992340255683044, "res": {"Yes": 0.9992340255683044, "No": 0.0007659627642843957}, "ground_truth": 0}, {"key": "698cdb1268d4da1240d7f540bd4a5af4e229d00f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9993792542424146, "res": {"Yes": 0.9993792542424146, "No": 0.0006206840386221028}, "ground_truth": 0}, {"key": "698cdb1268d4da1240d7f540bd4a5af4e229d00f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9991588044035539, "res": {"Yes": 0.9991588044035539, "No": 0.0008411243914090979}, "ground_truth": 1}, {"key": "698cdb1268d4da1240d7f540bd4a5af4e229d00f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997160594587478, "res": {"Yes": 0.9997160594587478, "No": 0.00028384932680724736}, "ground_truth": 0}, {"key": "698cdb1268d4da1240d7f540bd4a5af4e229d00f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9991732049535376, "res": {"Yes": 0.9991732049535376, "No": 0.0008267923357290505}, "ground_truth": 0}, {"key": "708c903fbf6a7c3c84f84ff34496eb0cb51f44e4", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9779358785025984, "res": {"Yes": 0.9779358785025984, "No": 0.022064093947193813}, "ground_truth": 0}, {"key": "708c903fbf6a7c3c84f84ff34496eb0cb51f44e4", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9987304554543868, "res": {"Yes": 0.9987304554543868, "No": 0.0012695243198153512}, "ground_truth": 0}, {"key": "708c903fbf6a7c3c84f84ff34496eb0cb51f44e4", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9974587220279174, "res": {"Yes": 0.9974587220279174, "No": 0.0025412537941442723}, "ground_truth": 1}, {"key": "708c903fbf6a7c3c84f84ff34496eb0cb51f44e4", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9946478099811963, "res": {"Yes": 0.9946478099811963, "No": 0.005352203145203314}, "ground_truth": 0}, {"key": "708c903fbf6a7c3c84f84ff34496eb0cb51f44e4", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9930963935369101, "res": {"Yes": 0.9930963935369101, "No": 0.0069036018246702285}, "ground_truth": 0}, {"key": "26a1bafbde5cc970d5aee024dc8ef418587018a9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9991294154787917, "res": {"Yes": 0.9991294154787917, "No": 0.0008705241991534764}, "ground_truth": 0}, {"key": "26a1bafbde5cc970d5aee024dc8ef418587018a9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9974889690258174, "res": {"Yes": 0.9974889690258174, "No": 0.0025110042657072195}, "ground_truth": 0}, {"key": "26a1bafbde5cc970d5aee024dc8ef418587018a9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9985648454207093, "res": {"Yes": 0.9985648454207093, "No": 0.0014350651932015879}, "ground_truth": 1}, {"key": "26a1bafbde5cc970d5aee024dc8ef418587018a9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9984231809874922, "res": {"Yes": 0.9984231809874922, "No": 0.0015768195557931712}, "ground_truth": 0}, {"key": "26a1bafbde5cc970d5aee024dc8ef418587018a9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9990752705260693, "res": {"Yes": 0.9990752705260693, "No": 0.000924665691844381}, "ground_truth": 0}, {"key": "6832312991fe5d87d0e300374ec7a699a921f06a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9966702801535133, "res": {"Yes": 0.9966702801535133, "No": 0.0033296982323900355}, "ground_truth": 0}, {"key": "6832312991fe5d87d0e300374ec7a699a921f06a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9990762232846645, "res": {"Yes": 0.9990762232846645, "No": 0.0009237358535254178}, "ground_truth": 0}, {"key": "6832312991fe5d87d0e300374ec7a699a921f06a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995968178997561, "res": {"Yes": 0.9995968178997561, "No": 0.0004030956819518972}, "ground_truth": 1}, {"key": "6832312991fe5d87d0e300374ec7a699a921f06a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997110543328424, "res": {"Yes": 0.9997110543328424, "No": 0.00028885636509276194}, "ground_truth": 0}, {"key": "6832312991fe5d87d0e300374ec7a699a921f06a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9995969370916878, "res": {"Yes": 0.9995969370916878, "No": 0.0004029943564346054}, "ground_truth": 0}, {"key": "9c5d5d672c2549a594b801897a27dde1492e11d5", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9972892772618888, "res": {"Yes": 0.9972892772618888, "No": 0.0027107227018331338}, "ground_truth": 0}, {"key": "9c5d5d672c2549a594b801897a27dde1492e11d5", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9977798881020715, "res": {"Yes": 0.9977798881020715, "No": 0.0022201378287558214}, "ground_truth": 0}, {"key": "9c5d5d672c2549a594b801897a27dde1492e11d5", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998391437887479, "res": {"Yes": 0.9998391437887479, "No": 0.00016084361507004365}, "ground_truth": 1}, {"key": "9c5d5d672c2549a594b801897a27dde1492e11d5", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999392527721875, "res": {"Yes": 0.9999392527721875, "No": 6.0636532499595376e-05}, "ground_truth": 0}, {"key": "9c5d5d672c2549a594b801897a27dde1492e11d5", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.999916486517445, "res": {"Yes": 0.999916486517445, "No": 8.338551039203575e-05}, "ground_truth": 0}, {"key": "c32fa9084fe8cd565890a940794e505d5f7ba4c2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9949884131168923, "res": {"Yes": 0.9949884131168923, "No": 0.005011612296441146}, "ground_truth": 0}, {"key": "c32fa9084fe8cd565890a940794e505d5f7ba4c2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9985377356518691, "res": {"Yes": 0.9985377356518691, "No": 0.0014621968242586648}, "ground_truth": 0}, {"key": "c32fa9084fe8cd565890a940794e505d5f7ba4c2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9991472550937678, "res": {"Yes": 0.9991472550937678, "No": 0.0008526445526116436}, "ground_truth": 1}, {"key": "c32fa9084fe8cd565890a940794e505d5f7ba4c2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9986161991366148, "res": {"Yes": 0.9986161991366148, "No": 0.0013837286543560909}, "ground_truth": 0}, {"key": "c32fa9084fe8cd565890a940794e505d5f7ba4c2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9991843899908075, "res": {"Yes": 0.9991843899908075, "No": 0.0008155343625530078}, "ground_truth": 0}, {"key": "41b5dd6064864c16fbde9b46d37c9f7d9a56e705", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9749418575933615, "res": {"Yes": 0.9749418575933615, "No": 0.02505805365321405}, "ground_truth": 0}, {"key": "41b5dd6064864c16fbde9b46d37c9f7d9a56e705", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9593664406926152, "res": {"Yes": 0.9593664406926152, "No": 0.04063342399302522}, "ground_truth": 0}, {"key": "41b5dd6064864c16fbde9b46d37c9f7d9a56e705", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9987274828374829, "res": {"Yes": 0.9987274828374829, "No": 0.0012725264804146583}, "ground_truth": 1}, {"key": "41b5dd6064864c16fbde9b46d37c9f7d9a56e705", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9958476190737374, "res": {"Yes": 0.9958476190737374, "No": 0.004152415773524219}, "ground_truth": 0}, {"key": "41b5dd6064864c16fbde9b46d37c9f7d9a56e705", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9947602148170009, "res": {"Yes": 0.9947602148170009, "No": 0.005239812309963223}, "ground_truth": 0}, {"key": "b2d3ba739af767da7aa25c3ba8e1d186168c3394", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9984784406469831, "res": {"Yes": 0.9984784406469831, "No": 0.0015215319345314878}, "ground_truth": 0}, {"key": "b2d3ba739af767da7aa25c3ba8e1d186168c3394", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9906653568205454, "res": {"Yes": 0.9906653568205454, "No": 0.009334625692566732}, "ground_truth": 0}, {"key": "b2d3ba739af767da7aa25c3ba8e1d186168c3394", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9990639640552105, "res": {"Yes": 0.9990639640552105, "No": 0.0009359509375574354}, "ground_truth": 1}, {"key": "b2d3ba739af767da7aa25c3ba8e1d186168c3394", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9978839801332468, "res": {"Yes": 0.9978839801332468, "No": 0.002115977496430217}, "ground_truth": 0}, {"key": "b2d3ba739af767da7aa25c3ba8e1d186168c3394", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9992686511660225, "res": {"Yes": 0.9992686511660225, "No": 0.0007312301537188857}, "ground_truth": 0}, {"key": "519fd9b5c5ca82979cdfee3c990c3590838d93c7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9916403052153181, "res": {"Yes": 0.9916403052153181, "No": 0.008359683595750697}, "ground_truth": 0}, {"key": "519fd9b5c5ca82979cdfee3c990c3590838d93c7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999447358231209, "res": {"Yes": 0.9999447358231209, "No": 5.5169019675321885e-05}, "ground_truth": 0}, {"key": "519fd9b5c5ca82979cdfee3c990c3590838d93c7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995890728536968, "res": {"Yes": 0.9995890728536968, "No": 0.0004109205792399018}, "ground_truth": 1}, {"key": "519fd9b5c5ca82979cdfee3c990c3590838d93c7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996181396672228, "res": {"Yes": 0.9996181396672228, "No": 0.0003817694368629655}, "ground_truth": 0}, {"key": "519fd9b5c5ca82979cdfee3c990c3590838d93c7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9975341701653231, "res": {"Yes": 0.9975341701653231, "No": 0.00246586652948793}, "ground_truth": 0}, {"key": "60f518dff20b2936b104035b1fd9584dd7d16def", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9918383416732013, "res": {"Yes": 0.9918383416732013, "No": 0.008161592252977147}, "ground_truth": 0}, {"key": "60f518dff20b2936b104035b1fd9584dd7d16def", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9912782104179586, "res": {"Yes": 0.9912782104179586, "No": 0.008721768081279368}, "ground_truth": 0}, {"key": "60f518dff20b2936b104035b1fd9584dd7d16def", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9993581831526028, "res": {"Yes": 0.9993581831526028, "No": 0.0006417892366476252}, "ground_truth": 1}, {"key": "60f518dff20b2936b104035b1fd9584dd7d16def", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9989427410924449, "res": {"Yes": 0.9989427410924449, "No": 0.0010572760949559762}, "ground_truth": 0}, {"key": "60f518dff20b2936b104035b1fd9584dd7d16def", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9985816072711717, "res": {"Yes": 0.9985816072711717, "No": 0.0014182994435495528}, "ground_truth": 0}, {"key": "b82546b760716f0a2b26606b07af0fe0dd61d6ff", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9976622900099202, "res": {"Yes": 0.9976622900099202, "No": 0.0023376900389980243}, "ground_truth": 0}, {"key": "b82546b760716f0a2b26606b07af0fe0dd61d6ff", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997662128924659, "res": {"Yes": 0.9997662128924659, "No": 0.0002336746020274247}, "ground_truth": 0}, {"key": "b82546b760716f0a2b26606b07af0fe0dd61d6ff", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9993913908777101, "res": {"Yes": 0.9993913908777101, "No": 0.0006085119544265026}, "ground_truth": 1}, {"key": "b82546b760716f0a2b26606b07af0fe0dd61d6ff", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9975733051960011, "res": {"Yes": 0.9975733051960011, "No": 0.002426687063957467}, "ground_truth": 0}, {"key": "b82546b760716f0a2b26606b07af0fe0dd61d6ff", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.999738936662965, "res": {"Yes": 0.999738936662965, "No": 0.00026094836642545557}, "ground_truth": 0}, {"key": "7990fde4a924299fab79f6c8e2b447ee5ffcd1fc", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9953933708150325, "res": {"Yes": 0.9953933708150325, "No": 0.004606562784329378}, "ground_truth": 0}, {"key": "7990fde4a924299fab79f6c8e2b447ee5ffcd1fc", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9731557214513401, "res": {"Yes": 0.9731557214513401, "No": 0.02684415158007081}, "ground_truth": 0}, {"key": "7990fde4a924299fab79f6c8e2b447ee5ffcd1fc", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9975364186098766, "res": {"Yes": 0.9975364186098766, "No": 0.0024635317201815747}, "ground_truth": 1}, {"key": "7990fde4a924299fab79f6c8e2b447ee5ffcd1fc", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9987306936516288, "res": {"Yes": 0.9987306936516288, "No": 0.0012692095168212218}, "ground_truth": 0}, {"key": "7990fde4a924299fab79f6c8e2b447ee5ffcd1fc", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9931643609963908, "res": {"Yes": 0.9931643609963908, "No": 0.006835545514958703}, "ground_truth": 0}, {"key": "504f6f6e1aea6bb73c728d12a1342faa6828aace", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9961641891892392, "res": {"Yes": 0.9961641891892392, "No": 0.0038357484508903237}, "ground_truth": 0}, {"key": "504f6f6e1aea6bb73c728d12a1342faa6828aace", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9894811402498863, "res": {"Yes": 0.9894811402498863, "No": 0.010518706873478043}, "ground_truth": 0}, {"key": "504f6f6e1aea6bb73c728d12a1342faa6828aace", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9963776451141498, "res": {"Yes": 0.9963776451141498, "No": 0.00362237460487515}, "ground_truth": 1}, {"key": "504f6f6e1aea6bb73c728d12a1342faa6828aace", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9963009616604347, "res": {"Yes": 0.9963009616604347, "No": 0.0036990050533250536}, "ground_truth": 0}, {"key": "504f6f6e1aea6bb73c728d12a1342faa6828aace", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9870553461442358, "res": {"Yes": 0.9870553461442358, "No": 0.012944577556897536}, "ground_truth": 0}, {"key": "ef42653bf9337de184e74be0160b807a41b526fd", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9914864096247387, "res": {"Yes": 0.9914864096247387, "No": 0.008513555679353866}, "ground_truth": 0}, {"key": "ef42653bf9337de184e74be0160b807a41b526fd", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9712519932986923, "res": {"Yes": 0.9712519932986923, "No": 0.02874751357001491}, "ground_truth": 0}, {"key": "ef42653bf9337de184e74be0160b807a41b526fd", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9876418736572589, "res": {"Yes": 0.9876418736572589, "No": 0.012358057493792849}, "ground_truth": 1}, {"key": "ef42653bf9337de184e74be0160b807a41b526fd", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9986090678440765, "res": {"Yes": 0.9986090678440765, "No": 0.0013908525145821657}, "ground_truth": 0}, {"key": "ef42653bf9337de184e74be0160b807a41b526fd", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.8457528851299267, "res": {"Yes": 0.8457528851299267, "No": 0.15424648563836713}, "ground_truth": 0}, {"key": "26d0a5624f3ee7605833c641da5d88e2259b1162", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9972290446036532, "res": {"Yes": 0.9972290446036532, "No": 0.002770903607180304}, "ground_truth": 0}, {"key": "26d0a5624f3ee7605833c641da5d88e2259b1162", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9967804219925787, "res": {"Yes": 0.9967804219925787, "No": 0.0032195444647765935}, "ground_truth": 0}, {"key": "26d0a5624f3ee7605833c641da5d88e2259b1162", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9974690443826592, "res": {"Yes": 0.9974690443826592, "No": 0.0025309083729264472}, "ground_truth": 1}, {"key": "26d0a5624f3ee7605833c641da5d88e2259b1162", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999568589424315, "res": {"Yes": 0.999568589424315, "No": 0.00043132771377446397}, "ground_truth": 0}, {"key": "26d0a5624f3ee7605833c641da5d88e2259b1162", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997842085863171, "res": {"Yes": 0.9997842085863171, "No": 0.00021575350121956784}, "ground_truth": 0}, {"key": "7e43e9a2c67829721ff9f68147285c579e2c8a70", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9858905535469414, "res": {"Yes": 0.9858905535469414, "No": 0.014109400522821771}, "ground_truth": 0}, {"key": "7e43e9a2c67829721ff9f68147285c579e2c8a70", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9841969436211109, "res": {"Yes": 0.9841969436211109, "No": 0.015803001320422415}, "ground_truth": 0}, {"key": "7e43e9a2c67829721ff9f68147285c579e2c8a70", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9829091964579729, "res": {"Yes": 0.9829091964579729, "No": 0.01709081661140521}, "ground_truth": 1}, {"key": "7e43e9a2c67829721ff9f68147285c579e2c8a70", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9746917283790048, "res": {"Yes": 0.9746917283790048, "No": 0.025308185450047767}, "ground_truth": 0}, {"key": "7e43e9a2c67829721ff9f68147285c579e2c8a70", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9827603028906905, "res": {"Yes": 0.9827603028906905, "No": 0.017239672680098513}, "ground_truth": 0}, {"key": "2137ee3e39b5c0d12e17bbd1e10eb75495a3a83b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9948153051683476, "res": {"Yes": 0.9948153051683476, "No": 0.005184670437877891}, "ground_truth": 0}, {"key": "2137ee3e39b5c0d12e17bbd1e10eb75495a3a83b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9941858191520355, "res": {"Yes": 0.9941858191520355, "No": 0.0058141789393330425}, "ground_truth": 0}, {"key": "2137ee3e39b5c0d12e17bbd1e10eb75495a3a83b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999776581231691, "res": {"Yes": 0.999776581231691, "No": 0.00022328937097606288}, "ground_truth": 1}, {"key": "2137ee3e39b5c0d12e17bbd1e10eb75495a3a83b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996482724706356, "res": {"Yes": 0.9996482724706356, "No": 0.00035169824800530526}, "ground_truth": 0}, {"key": "2137ee3e39b5c0d12e17bbd1e10eb75495a3a83b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9991334576648446, "res": {"Yes": 0.9991334576648446, "No": 0.0008665224849343907}, "ground_truth": 0}, {"key": "31defcc44013caae59c0bdbcedd16be150413bd5", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9465348302628007, "res": {"Yes": 0.9465348302628007, "No": 0.053465055858845284}, "ground_truth": 0}, {"key": "31defcc44013caae59c0bdbcedd16be150413bd5", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9981632477977009, "res": {"Yes": 0.9981632477977009, "No": 0.0018367271757016093}, "ground_truth": 0}, {"key": "31defcc44013caae59c0bdbcedd16be150413bd5", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9962293830962521, "res": {"Yes": 0.9962293830962521, "No": 0.0037706683955027586}, "ground_truth": 1}, {"key": "31defcc44013caae59c0bdbcedd16be150413bd5", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9989125251298298, "res": {"Yes": 0.9989125251298298, "No": 0.0010873725470332684}, "ground_truth": 0}, {"key": "31defcc44013caae59c0bdbcedd16be150413bd5", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9454213290535827, "res": {"Yes": 0.9454213290535827, "No": 0.054578546210214005}, "ground_truth": 0}, {"key": "db73dc3466e4e92672d716c1eb2ac9430ac69951", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.993840706299198, "res": {"Yes": 0.993840706299198, "No": 0.006159319366455334}, "ground_truth": 0}, {"key": "db73dc3466e4e92672d716c1eb2ac9430ac69951", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994952309887639, "res": {"Yes": 0.9994952309887639, "No": 0.00050474994991433}, "ground_truth": 0}, {"key": "db73dc3466e4e92672d716c1eb2ac9430ac69951", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9965746178528856, "res": {"Yes": 0.9965746178528856, "No": 0.0034253298682768232}, "ground_truth": 1}, {"key": "db73dc3466e4e92672d716c1eb2ac9430ac69951", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9981413986426074, "res": {"Yes": 0.9981413986426074, "No": 0.0018585358103448187}, "ground_truth": 0}, {"key": "db73dc3466e4e92672d716c1eb2ac9430ac69951", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9983041210305958, "res": {"Yes": 0.9983041210305958, "No": 0.0016958910212761106}, "ground_truth": 0}, {"key": "3f6308f2f5b6ccdb89d7b6abe1b9490cc30b8e5f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9985049463079291, "res": {"Yes": 0.9985049463079291, "No": 0.0014949851579222082}, "ground_truth": 0}, {"key": "3f6308f2f5b6ccdb89d7b6abe1b9490cc30b8e5f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9936611786331735, "res": {"Yes": 0.9936611786331735, "No": 0.006338752477532901}, "ground_truth": 0}, {"key": "3f6308f2f5b6ccdb89d7b6abe1b9490cc30b8e5f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9992480698912377, "res": {"Yes": 0.9992480698912377, "No": 0.0007518949033234959}, "ground_truth": 1}, {"key": "3f6308f2f5b6ccdb89d7b6abe1b9490cc30b8e5f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996761493709394, "res": {"Yes": 0.9996761493709394, "No": 0.0003237418885293498}, "ground_truth": 0}, {"key": "3f6308f2f5b6ccdb89d7b6abe1b9490cc30b8e5f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9992822268724628, "res": {"Yes": 0.9992822268724628, "No": 0.0007177368932604369}, "ground_truth": 0}, {"key": "068d37f7a1ee822245dbf9d0d8ddfc281919d7ee", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9887536009354525, "res": {"Yes": 0.9887536009354525, "No": 0.011246256064350145}, "ground_truth": 0}, {"key": "068d37f7a1ee822245dbf9d0d8ddfc281919d7ee", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9951049117869338, "res": {"Yes": 0.9951049117869338, "No": 0.004895025958252179}, "ground_truth": 0}, {"key": "068d37f7a1ee822245dbf9d0d8ddfc281919d7ee", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.997547808246465, "res": {"Yes": 0.997547808246465, "No": 0.002452130599224723}, "ground_truth": 1}, {"key": "068d37f7a1ee822245dbf9d0d8ddfc281919d7ee", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994316397279254, "res": {"Yes": 0.9994316397279254, "No": 0.0005682602827084026}, "ground_truth": 0}, {"key": "068d37f7a1ee822245dbf9d0d8ddfc281919d7ee", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9970094255033912, "res": {"Yes": 0.9970094255033912, "No": 0.002990514897152035}, "ground_truth": 0}, {"key": "bb1e2c23f3fc6edb0ee4bed365cb5567c87a5aa9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9859238052383776, "res": {"Yes": 0.9859238052383776, "No": 0.014076165528016178}, "ground_truth": 0}, {"key": "bb1e2c23f3fc6edb0ee4bed365cb5567c87a5aa9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9989153793266654, "res": {"Yes": 0.9989153793266654, "No": 0.0010845544733057938}, "ground_truth": 0}, {"key": "bb1e2c23f3fc6edb0ee4bed365cb5567c87a5aa9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9990482586933288, "res": {"Yes": 0.9990482586933288, "No": 0.0009516764922401189}, "ground_truth": 1}, {"key": "bb1e2c23f3fc6edb0ee4bed365cb5567c87a5aa9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9986844383109771, "res": {"Yes": 0.9986844383109771, "No": 0.0013155663784947375}, "ground_truth": 0}, {"key": "bb1e2c23f3fc6edb0ee4bed365cb5567c87a5aa9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997276153234438, "res": {"Yes": 0.9997276153234438, "No": 0.0002722847947254339}, "ground_truth": 0}, {"key": "7c72b57e65ddefc77362ed64f7dbc0aebb87ac81", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.969267640928378, "res": {"Yes": 0.969267640928378, "No": 0.030732105164369375}, "ground_truth": 0}, {"key": "7c72b57e65ddefc77362ed64f7dbc0aebb87ac81", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9977754983791101, "res": {"Yes": 0.9977754983791101, "No": 0.0022244768824892084}, "ground_truth": 0}, {"key": "7c72b57e65ddefc77362ed64f7dbc0aebb87ac81", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9722016890733415, "res": {"Yes": 0.9722016890733415, "No": 0.02779798341348238}, "ground_truth": 1}, {"key": "7c72b57e65ddefc77362ed64f7dbc0aebb87ac81", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996681653493006, "res": {"Yes": 0.9996681653493006, "No": 0.0003317390141983614}, "ground_truth": 0}, {"key": "7c72b57e65ddefc77362ed64f7dbc0aebb87ac81", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9812917863273711, "res": {"Yes": 0.9812917863273711, "No": 0.018708066209946007}, "ground_truth": 0}, {"key": "37a335b36b8d2472825c4624e5449b995726b7a3", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9756073381399442, "res": {"Yes": 0.9756073381399442, "No": 0.02439256271276317}, "ground_truth": 0}, {"key": "37a335b36b8d2472825c4624e5449b995726b7a3", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9986416410058843, "res": {"Yes": 0.9986416410058843, "No": 0.0013583249401597605}, "ground_truth": 0}, {"key": "37a335b36b8d2472825c4624e5449b995726b7a3", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999437822452772, "res": {"Yes": 0.9999437822452772, "No": 5.615266026920399e-05}, "ground_truth": 1}, {"key": "37a335b36b8d2472825c4624e5449b995726b7a3", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9993067472108316, "res": {"Yes": 0.9993067472108316, "No": 0.0006931341492130987}, "ground_truth": 0}, {"key": "37a335b36b8d2472825c4624e5449b995726b7a3", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997652594858669, "res": {"Yes": 0.9997652594858669, "No": 0.00023471430659970837}, "ground_truth": 0}, {"key": "da894eb6e5cc104f90ac65e8a124bfcfbfc6578d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9985379738031475, "res": {"Yes": 0.9985379738031475, "No": 0.0014619911076379963}, "ground_truth": 0}, {"key": "da894eb6e5cc104f90ac65e8a124bfcfbfc6578d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999520068687072, "res": {"Yes": 0.9999520068687072, "No": 4.7900768971093004e-05}, "ground_truth": 0}, {"key": "da894eb6e5cc104f90ac65e8a124bfcfbfc6578d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997161786049148, "res": {"Yes": 0.9997161786049148, "No": 0.00028370318203944944}, "ground_truth": 1}, {"key": "da894eb6e5cc104f90ac65e8a124bfcfbfc6578d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998759688125038, "res": {"Yes": 0.9998759688125038, "No": 0.0001239271084850294}, "ground_truth": 0}, {"key": "da894eb6e5cc104f90ac65e8a124bfcfbfc6578d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999028985246534, "res": {"Yes": 0.9999028985246534, "No": 9.703983438473693e-05}, "ground_truth": 0}, {"key": "b7697e424512e656efe5f2aca17bc33cfe0a2b20", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9969482849775297, "res": {"Yes": 0.9969482849775297, "No": 0.0030517075273035973}, "ground_truth": 0}, {"key": "b7697e424512e656efe5f2aca17bc33cfe0a2b20", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9985239612183744, "res": {"Yes": 0.9985239612183744, "No": 0.0014760244321407013}, "ground_truth": 0}, {"key": "b7697e424512e656efe5f2aca17bc33cfe0a2b20", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9983381048795986, "res": {"Yes": 0.9983381048795986, "No": 0.0016618464092935797}, "ground_truth": 1}, {"key": "b7697e424512e656efe5f2aca17bc33cfe0a2b20", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996800781459136, "res": {"Yes": 0.9996800781459136, "No": 0.0003198602104364589}, "ground_truth": 0}, {"key": "b7697e424512e656efe5f2aca17bc33cfe0a2b20", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9989107390758315, "res": {"Yes": 0.9989107390758315, "No": 0.001089180718242429}, "ground_truth": 0}, {"key": "2292d4348e59aafbe93f3040af23fdff448a6cac", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9172706803193281, "res": {"Yes": 0.9172706803193281, "No": 0.08272917152773145}, "ground_truth": 0}, {"key": "2292d4348e59aafbe93f3040af23fdff448a6cac", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.996138286268606, "res": {"Yes": 0.996138286268606, "No": 0.0038616798340684612}, "ground_truth": 0}, {"key": "2292d4348e59aafbe93f3040af23fdff448a6cac", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997408397678161, "res": {"Yes": 0.9997408397678161, "No": 0.00025903561398090536}, "ground_truth": 1}, {"key": "2292d4348e59aafbe93f3040af23fdff448a6cac", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9976487656918193, "res": {"Yes": 0.9976487656918193, "No": 0.0023511986900517775}, "ground_truth": 0}, {"key": "2292d4348e59aafbe93f3040af23fdff448a6cac", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9910010149352995, "res": {"Yes": 0.9910010149352995, "No": 0.008998956975256855}, "ground_truth": 0}, {"key": "8ed7e0c5e15275537f2c1d5d945e95709d7dc6bf", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9891477947004822, "res": {"Yes": 0.9891477947004822, "No": 0.01085213982008125}, "ground_truth": 0}, {"key": "8ed7e0c5e15275537f2c1d5d945e95709d7dc6bf", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9962213334957346, "res": {"Yes": 0.9962213334957346, "No": 0.003778703188646005}, "ground_truth": 0}, {"key": "8ed7e0c5e15275537f2c1d5d945e95709d7dc6bf", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9958758666920724, "res": {"Yes": 0.9958758666920724, "No": 0.004124101916959617}, "ground_truth": 1}, {"key": "8ed7e0c5e15275537f2c1d5d945e95709d7dc6bf", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.992097822666592, "res": {"Yes": 0.992097822666592, "No": 0.007902113686835998}, "ground_truth": 0}, {"key": "8ed7e0c5e15275537f2c1d5d945e95709d7dc6bf", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9993510392661646, "res": {"Yes": 0.9993510392661646, "No": 0.0006488843852770519}, "ground_truth": 0}, {"key": "6b22be31a2cefb2fdd302b8ff788335f23f5dbad", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9895356512778867, "res": {"Yes": 0.9895356512778867, "No": 0.010464214349568355}, "ground_truth": 0}, {"key": "6b22be31a2cefb2fdd302b8ff788335f23f5dbad", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9948425638790833, "res": {"Yes": 0.9948425638790833, "No": 0.00515742940987789}, "ground_truth": 0}, {"key": "6b22be31a2cefb2fdd302b8ff788335f23f5dbad", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994184230214889, "res": {"Yes": 0.9994184230214889, "No": 0.0005815044112789621}, "ground_truth": 1}, {"key": "6b22be31a2cefb2fdd302b8ff788335f23f5dbad", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9948680516741251, "res": {"Yes": 0.9948680516741251, "No": 0.005131990927805948}, "ground_truth": 0}, {"key": "6b22be31a2cefb2fdd302b8ff788335f23f5dbad", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9987403221610398, "res": {"Yes": 0.9987403221610398, "No": 0.0012596958198260347}, "ground_truth": 0}, {"key": "686caba1039a286aca406e0bbac00aebb198fda5", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.989572770445886, "res": {"Yes": 0.989572770445886, "No": 0.010427111394057418}, "ground_truth": 0}, {"key": "686caba1039a286aca406e0bbac00aebb198fda5", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997814675017057, "res": {"Yes": 0.9997814675017057, "No": 0.00021841688605078215}, "ground_truth": 0}, {"key": "686caba1039a286aca406e0bbac00aebb198fda5", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998734658461215, "res": {"Yes": 0.9998734658461215, "No": 0.00012648472602146902}, "ground_truth": 1}, {"key": "686caba1039a286aca406e0bbac00aebb198fda5", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9960152831372037, "res": {"Yes": 0.9960152831372037, "No": 0.003984667133861625}, "ground_truth": 0}, {"key": "686caba1039a286aca406e0bbac00aebb198fda5", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9981083770780704, "res": {"Yes": 0.9981083770780704, "No": 0.0018916042507979927}, "ground_truth": 0}, {"key": "3c84e13d5941fdbe626183cdf1dca9b5df6c8f92", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9960628264717741, "res": {"Yes": 0.9960628264717741, "No": 0.003937125462458899}, "ground_truth": 0}, {"key": "3c84e13d5941fdbe626183cdf1dca9b5df6c8f92", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9948284019980498, "res": {"Yes": 0.9948284019980498, "No": 0.005171541357408061}, "ground_truth": 0}, {"key": "3c84e13d5941fdbe626183cdf1dca9b5df6c8f92", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9971774827131124, "res": {"Yes": 0.9971774827131124, "No": 0.002822520496901364}, "ground_truth": 1}, {"key": "3c84e13d5941fdbe626183cdf1dca9b5df6c8f92", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9979523558894231, "res": {"Yes": 0.9979523558894231, "No": 0.002047568064611662}, "ground_truth": 0}, {"key": "3c84e13d5941fdbe626183cdf1dca9b5df6c8f92", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998802522853301, "res": {"Yes": 0.9998802522853301, "No": 0.00011969987376448799}, "ground_truth": 0}, {"key": "7f5fd7614f32586747f65545bebba418c3679d12", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9939429195245962, "res": {"Yes": 0.9939429195245962, "No": 0.0060570087886123855}, "ground_truth": 0}, {"key": "7f5fd7614f32586747f65545bebba418c3679d12", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9989194242429005, "res": {"Yes": 0.9989194242429005, "No": 0.0010805453625219274}, "ground_truth": 0}, {"key": "7f5fd7614f32586747f65545bebba418c3679d12", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9993006756913068, "res": {"Yes": 0.9993006756913068, "No": 0.0006992592482592901}, "ground_truth": 1}, {"key": "7f5fd7614f32586747f65545bebba418c3679d12", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9960785600060159, "res": {"Yes": 0.9960785600060159, "No": 0.003921449198518315}, "ground_truth": 0}, {"key": "7f5fd7614f32586747f65545bebba418c3679d12", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9990112662196768, "res": {"Yes": 0.9990112662196768, "No": 0.000988648202435669}, "ground_truth": 0}, {"key": "186210bbc060c272ff9a2412ec2e711a3e4a5e6e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9032288090591323, "res": {"Yes": 0.9032288090591323, "No": 0.09677110803148373}, "ground_truth": 0}, {"key": "186210bbc060c272ff9a2412ec2e711a3e4a5e6e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9977351481560873, "res": {"Yes": 0.9977351481560873, "No": 0.002264826779530802}, "ground_truth": 0}, {"key": "186210bbc060c272ff9a2412ec2e711a3e4a5e6e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9992441428040775, "res": {"Yes": 0.9992441428040775, "No": 0.000755834591527118}, "ground_truth": 1}, {"key": "186210bbc060c272ff9a2412ec2e711a3e4a5e6e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9990513550084331, "res": {"Yes": 0.9990513550084331, "No": 0.000948562668127036}, "ground_truth": 0}, {"key": "186210bbc060c272ff9a2412ec2e711a3e4a5e6e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9958498637168005, "res": {"Yes": 0.9958498637168005, "No": 0.004150127177617703}, "ground_truth": 0}, {"key": "03392835ea832d7940082c421a21778c70701ff0", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9233452891636544, "res": {"Yes": 0.9233452891636544, "No": 0.07665463544652945}, "ground_truth": 0}, {"key": "03392835ea832d7940082c421a21778c70701ff0", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9941418781040972, "res": {"Yes": 0.9941418781040972, "No": 0.005858114997046419}, "ground_truth": 0}, {"key": "03392835ea832d7940082c421a21778c70701ff0", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9985203977922724, "res": {"Yes": 0.9985203977922724, "No": 0.0014796161247775054}, "ground_truth": 1}, {"key": "03392835ea832d7940082c421a21778c70701ff0", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.997482209764218, "res": {"Yes": 0.997482209764218, "No": 0.002517778534920238}, "ground_truth": 0}, {"key": "03392835ea832d7940082c421a21778c70701ff0", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.999347227208922, "res": {"Yes": 0.999347227208922, "No": 0.0006527596936608096}, "ground_truth": 0}, {"key": "e2b96982b60cbd10b39c1db4282023d0ac1f3f2a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9897852753145414, "res": {"Yes": 0.9897852753145414, "No": 0.010214642999267212}, "ground_truth": 0}, {"key": "e2b96982b60cbd10b39c1db4282023d0ac1f3f2a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9928982137175031, "res": {"Yes": 0.9928982137175031, "No": 0.007101764533267268}, "ground_truth": 0}, {"key": "e2b96982b60cbd10b39c1db4282023d0ac1f3f2a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9941937080478096, "res": {"Yes": 0.9941937080478096, "No": 0.005806313610152027}, "ground_truth": 1}, {"key": "e2b96982b60cbd10b39c1db4282023d0ac1f3f2a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9986494866650274, "res": {"Yes": 0.9986494866650274, "No": 0.0013505086226075571}, "ground_truth": 0}, {"key": "e2b96982b60cbd10b39c1db4282023d0ac1f3f2a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.995686974923363, "res": {"Yes": 0.995686974923363, "No": 0.004313015359985116}, "ground_truth": 0}, {"key": "31310358d02c98abba71916e8cb4ef7477a2d82a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9795622899503298, "res": {"Yes": 0.9795622899503298, "No": 0.020437707039559026}, "ground_truth": 0}, {"key": "31310358d02c98abba71916e8cb4ef7477a2d82a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.998487832379365, "res": {"Yes": 0.998487832379365, "No": 0.0015121719260258054}, "ground_truth": 0}, {"key": "31310358d02c98abba71916e8cb4ef7477a2d82a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9975705738440307, "res": {"Yes": 0.9975705738440307, "No": 0.002429412063696925}, "ground_truth": 1}, {"key": "31310358d02c98abba71916e8cb4ef7477a2d82a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9968641680239999, "res": {"Yes": 0.9968641680239999, "No": 0.00313582337445924}, "ground_truth": 0}, {"key": "31310358d02c98abba71916e8cb4ef7477a2d82a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9964455814543289, "res": {"Yes": 0.9964455814543289, "No": 0.003554383672341551}, "ground_truth": 0}, {"key": "ec0e9a0c77bbb4bba73ef5177413c666e691850d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9240980982856091, "res": {"Yes": 0.9240980982856091, "No": 0.07590176807925722}, "ground_truth": 0}, {"key": "ec0e9a0c77bbb4bba73ef5177413c666e691850d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994092534095175, "res": {"Yes": 0.9994092534095175, "No": 0.0005907149352415036}, "ground_truth": 0}, {"key": "ec0e9a0c77bbb4bba73ef5177413c666e691850d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9981364052531226, "res": {"Yes": 0.9981364052531226, "No": 0.001863602195362408}, "ground_truth": 1}, {"key": "ec0e9a0c77bbb4bba73ef5177413c666e691850d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9980550488735016, "res": {"Yes": 0.9980550488735016, "No": 0.0019449091743390188}, "ground_truth": 0}, {"key": "ec0e9a0c77bbb4bba73ef5177413c666e691850d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9942033786170406, "res": {"Yes": 0.9942033786170406, "No": 0.005796630955159911}, "ground_truth": 0}, {"key": "02311be4aab97a81d2262963896b975c27f6503d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9829416742802054, "res": {"Yes": 0.9829416742802054, "No": 0.017058357830872787}, "ground_truth": 0}, {"key": "02311be4aab97a81d2262963896b975c27f6503d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.994638020009377, "res": {"Yes": 0.994638020009377, "No": 0.005361920853004937}, "ground_truth": 0}, {"key": "02311be4aab97a81d2262963896b975c27f6503d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9860905784944124, "res": {"Yes": 0.9860905784944124, "No": 0.013909327788323888}, "ground_truth": 1}, {"key": "02311be4aab97a81d2262963896b975c27f6503d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9911893046798361, "res": {"Yes": 0.9911893046798361, "No": 0.008810601206402606}, "ground_truth": 0}, {"key": "02311be4aab97a81d2262963896b975c27f6503d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.999584191172462, "res": {"Yes": 0.999584191172462, "No": 0.00041571168620031107}, "ground_truth": 0}, {"key": "a18b463219c663ef55a0cbbf81ae489cdf877b62", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9979090279342346, "res": {"Yes": 0.9979090279342346, "No": 0.0020909626938308244}, "ground_truth": 0}, {"key": "a18b463219c663ef55a0cbbf81ae489cdf877b62", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9979863128937887, "res": {"Yes": 0.9979863128937887, "No": 0.002013610297865016}, "ground_truth": 0}, {"key": "a18b463219c663ef55a0cbbf81ae489cdf877b62", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999648749133035, "res": {"Yes": 0.999648749133035, "No": 0.0003512292197220713}, "ground_truth": 1}, {"key": "a18b463219c663ef55a0cbbf81ae489cdf877b62", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999322344911992, "res": {"Yes": 0.999322344911992, "No": 0.0006775867290808267}, "ground_truth": 0}, {"key": "a18b463219c663ef55a0cbbf81ae489cdf877b62", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998605935679798, "res": {"Yes": 0.9998605935679798, "No": 0.00013935094814568634}, "ground_truth": 0}, {"key": "aa966ac9e3611ac2cef8d3fb9ceb3b58c150e238", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.8223587858656354, "res": {"Yes": 0.8223587858656354, "No": 0.17764077247956456}, "ground_truth": 0}, {"key": "aa966ac9e3611ac2cef8d3fb9ceb3b58c150e238", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.7165012980941927, "res": {"Yes": 0.7165012980941927, "No": 0.28349839987787834}, "ground_truth": 0}, {"key": "aa966ac9e3611ac2cef8d3fb9ceb3b58c150e238", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9907233867433558, "res": {"Yes": 0.9907233867433558, "No": 0.009276494049102498}, "ground_truth": 1}, {"key": "aa966ac9e3611ac2cef8d3fb9ceb3b58c150e238", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9774932681005383, "res": {"Yes": 0.9774932681005383, "No": 0.02250650726067085}, "ground_truth": 0}, {"key": "aa966ac9e3611ac2cef8d3fb9ceb3b58c150e238", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9950523789980512, "res": {"Yes": 0.9950523789980512, "No": 0.004947573621759933}, "ground_truth": 0}, {"key": "5215d4e6e945ba390786f1720cc5a8abf92c32fd", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.1793603084381756, "res": {"No": 0.8206393642842311, "Yes": 0.1793603084381756}, "ground_truth": 0}, {"key": "5215d4e6e945ba390786f1720cc5a8abf92c32fd", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9985892144948317, "res": {"Yes": 0.9985892144948317, "No": 0.001410701754747993}, "ground_truth": 0}, {"key": "5215d4e6e945ba390786f1720cc5a8abf92c32fd", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9924270464089894, "res": {"Yes": 0.9924270464089894, "No": 0.007572910554692693}, "ground_truth": 1}, {"key": "5215d4e6e945ba390786f1720cc5a8abf92c32fd", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9973779660517363, "res": {"Yes": 0.9973779660517363, "No": 0.002622009422542661}, "ground_truth": 0}, {"key": "5215d4e6e945ba390786f1720cc5a8abf92c32fd", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9987750461691932, "res": {"Yes": 0.9987750461691932, "No": 0.0012248861231505361}, "ground_truth": 0}, {"key": "e7297f59909379c8eb07ea9bc2377b5e3980073d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9773569446663217, "res": {"Yes": 0.9773569446663217, "No": 0.022642953029222777}, "ground_truth": 0}, {"key": "e7297f59909379c8eb07ea9bc2377b5e3980073d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9890297677254488, "res": {"Yes": 0.9890297677254488, "No": 0.010970127304242794}, "ground_truth": 0}, {"key": "e7297f59909379c8eb07ea9bc2377b5e3980073d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999446166237158, "res": {"Yes": 0.9999446166237158, "No": 5.529140978107497e-05}, "ground_truth": 1}, {"key": "e7297f59909379c8eb07ea9bc2377b5e3980073d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995043977513863, "res": {"Yes": 0.9995043977513863, "No": 0.0004954923714369659}, "ground_truth": 0}, {"key": "e7297f59909379c8eb07ea9bc2377b5e3980073d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9994153255587529, "res": {"Yes": 0.9994153255587529, "No": 0.0005845650450559264}, "ground_truth": 0}, {"key": "6c5f775006a225e617c1fa4cf0c439579e1443a2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9960892023654192, "res": {"Yes": 0.9960892023654192, "No": 0.0039107789690755}, "ground_truth": 0}, {"key": "6c5f775006a225e617c1fa4cf0c439579e1443a2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.999436520644154, "res": {"Yes": 0.999436520644154, "No": 0.0005634623732532702}, "ground_truth": 0}, {"key": "6c5f775006a225e617c1fa4cf0c439579e1443a2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9962425066119771, "res": {"Yes": 0.9962425066119771, "No": 0.003757422732831271}, "ground_truth": 1}, {"key": "6c5f775006a225e617c1fa4cf0c439579e1443a2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9993938926573103, "res": {"Yes": 0.9993938926573103, "No": 0.0006060906473473243}, "ground_truth": 0}, {"key": "6c5f775006a225e617c1fa4cf0c439579e1443a2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9989844979715279, "res": {"Yes": 0.9989844979715279, "No": 0.0010154930712101017}, "ground_truth": 0}, {"key": "46ce664a42eac4474d25c435a28790ec25137603", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9859395638774575, "res": {"Yes": 0.9859395638774575, "No": 0.014060399205617847}, "ground_truth": 0}, {"key": "46ce664a42eac4474d25c435a28790ec25137603", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997336930659882, "res": {"Yes": 0.9997336930659882, "No": 0.0002662755188733022}, "ground_truth": 0}, {"key": "46ce664a42eac4474d25c435a28790ec25137603", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.998280364969578, "res": {"Yes": 0.998280364969578, "No": 0.0017196284997553243}, "ground_truth": 1}, {"key": "46ce664a42eac4474d25c435a28790ec25137603", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998633348695298, "res": {"Yes": 0.9998633348695298, "No": 0.00013658819974876225}, "ground_truth": 0}, {"key": "46ce664a42eac4474d25c435a28790ec25137603", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9987400841612494, "res": {"Yes": 0.9987400841612494, "No": 0.001259829354660158}, "ground_truth": 0}, {"key": "693d73ab049f7d8f3c897922b3f7269a96a1a4fa", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9599745685163052, "res": {"Yes": 0.9599745685163052, "No": 0.04002530499563888}, "ground_truth": 0}, {"key": "693d73ab049f7d8f3c897922b3f7269a96a1a4fa", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9949528886847764, "res": {"Yes": 0.9949528886847764, "No": 0.005047078590598276}, "ground_truth": 0}, {"key": "693d73ab049f7d8f3c897922b3f7269a96a1a4fa", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9982417570233965, "res": {"Yes": 0.9982417570233965, "No": 0.0017582056623751975}, "ground_truth": 1}, {"key": "693d73ab049f7d8f3c897922b3f7269a96a1a4fa", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9939855556205605, "res": {"Yes": 0.9939855556205605, "No": 0.006014396302073524}, "ground_truth": 0}, {"key": "693d73ab049f7d8f3c897922b3f7269a96a1a4fa", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9988002501759784, "res": {"Yes": 0.9988002501759784, "No": 0.0011996766701928801}, "ground_truth": 0}, {"key": "48e4930e3c380330b609f460f11520be0e29d5fd", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9996087262075636, "res": {"Yes": 0.9996087262075636, "No": 0.0003911904625834355}, "ground_truth": 0}, {"key": "48e4930e3c380330b609f460f11520be0e29d5fd", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995796633062313, "res": {"Yes": 0.9995796633062313, "No": 0.00042028916310484133}, "ground_truth": 0}, {"key": "48e4930e3c380330b609f460f11520be0e29d5fd", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999107652316588, "res": {"Yes": 0.9999107652316588, "No": 8.9205487608138e-05}, "ground_truth": 1}, {"key": "48e4930e3c380330b609f460f11520be0e29d5fd", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999560596094073, "res": {"Yes": 0.9999560596094073, "No": 4.391331416164727e-05}, "ground_truth": 0}, {"key": "48e4930e3c380330b609f460f11520be0e29d5fd", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9981485193089302, "res": {"Yes": 0.9981485193089302, "No": 0.0018515151038776447}, "ground_truth": 0}, {"key": "53f3e11ada213ff55b987692a2eb0c9b6d890d5e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9999528412540086, "res": {"Yes": 0.9999528412540086, "No": 4.7045704151568015e-05}, "ground_truth": 0}, {"key": "53f3e11ada213ff55b987692a2eb0c9b6d890d5e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9984912803635011, "res": {"Yes": 0.9984912803635011, "No": 0.0015086516109106135}, "ground_truth": 0}, {"key": "53f3e11ada213ff55b987692a2eb0c9b6d890d5e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999952483661937, "res": {"Yes": 0.999952483661937, "No": 4.746514052653529e-05}, "ground_truth": 1}, {"key": "53f3e11ada213ff55b987692a2eb0c9b6d890d5e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999504572972803, "res": {"Yes": 0.9999504572972803, "No": 4.941369230547009e-05}, "ground_truth": 0}, {"key": "53f3e11ada213ff55b987692a2eb0c9b6d890d5e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999798995780161, "res": {"Yes": 0.9999798995780161, "No": 2.0062322589582317e-05}, "ground_truth": 0}, {"key": "d2ee859acf7e35bea229f968d53240c2f1c2845c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9989093138310058, "res": {"Yes": 0.9989093138310058, "No": 0.0010906093524502055}, "ground_truth": 0}, {"key": "d2ee859acf7e35bea229f968d53240c2f1c2845c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997123652048218, "res": {"Yes": 0.9997123652048218, "No": 0.00028751658857503173}, "ground_truth": 0}, {"key": "d2ee859acf7e35bea229f968d53240c2f1c2845c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999722375444831, "res": {"Yes": 0.999722375444831, "No": 0.0002775509605545024}, "ground_truth": 1}, {"key": "d2ee859acf7e35bea229f968d53240c2f1c2845c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9993652044180229, "res": {"Yes": 0.9993652044180229, "No": 0.0006347616397583498}, "ground_truth": 0}, {"key": "d2ee859acf7e35bea229f968d53240c2f1c2845c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9992430708155361, "res": {"Yes": 0.9992430708155361, "No": 0.0007568352224631192}, "ground_truth": 0}, {"key": "d4d44650eb77504fd01a730c0ccc7e6cdfcc578a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9884350728770739, "res": {"Yes": 0.9884350728770739, "No": 0.011564826333153816}, "ground_truth": 0}, {"key": "d4d44650eb77504fd01a730c0ccc7e6cdfcc578a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9914656689374737, "res": {"Yes": 0.9914656689374737, "No": 0.008534219277259218}, "ground_truth": 0}, {"key": "d4d44650eb77504fd01a730c0ccc7e6cdfcc578a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9986730244117565, "res": {"Yes": 0.9986730244117565, "No": 0.0013269821169802516}, "ground_truth": 1}, {"key": "d4d44650eb77504fd01a730c0ccc7e6cdfcc578a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9990018658675924, "res": {"Yes": 0.9990018658675924, "No": 0.000998146997914287}, "ground_truth": 0}, {"key": "d4d44650eb77504fd01a730c0ccc7e6cdfcc578a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9866140948483343, "res": {"Yes": 0.9866140948483343, "No": 0.013385868741592687}, "ground_truth": 0}, {"key": "3695132378151f89d210da575bfef0a3f29aa051", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9970333671775847, "res": {"Yes": 0.9970333671775847, "No": 0.0029665889140583035}, "ground_truth": 0}, {"key": "3695132378151f89d210da575bfef0a3f29aa051", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9993405598862675, "res": {"Yes": 0.9993405598862675, "No": 0.0006593876214153786}, "ground_truth": 0}, {"key": "3695132378151f89d210da575bfef0a3f29aa051", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9978848126681987, "res": {"Yes": 0.9978848126681987, "No": 0.002115219900544385}, "ground_truth": 1}, {"key": "3695132378151f89d210da575bfef0a3f29aa051", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998187634553272, "res": {"Yes": 0.9998187634553272, "No": 0.0001811346283761129}, "ground_truth": 0}, {"key": "3695132378151f89d210da575bfef0a3f29aa051", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9934733516972243, "res": {"Yes": 0.9934733516972243, "No": 0.0065266701293767646}, "ground_truth": 0}, {"key": "c193cecd478fa204b16500ece25bf25b75bc5265", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9562447959499912, "res": {"Yes": 0.9562447959499912, "No": 0.04375484191322304}, "ground_truth": 0}, {"key": "c193cecd478fa204b16500ece25bf25b75bc5265", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9885842933560054, "res": {"Yes": 0.9885842933560054, "No": 0.011415654086828401}, "ground_truth": 0}, {"key": "c193cecd478fa204b16500ece25bf25b75bc5265", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9960784414726743, "res": {"Yes": 0.9960784414726743, "No": 0.003921613902843418}, "ground_truth": 1}, {"key": "c193cecd478fa204b16500ece25bf25b75bc5265", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999180360292298, "res": {"Yes": 0.9999180360292298, "No": 8.190987504121061e-05}, "ground_truth": 0}, {"key": "c193cecd478fa204b16500ece25bf25b75bc5265", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9994930864241223, "res": {"Yes": 0.9994930864241223, "No": 0.0005068211895063211}, "ground_truth": 0}, {"key": "b599d61b0c28b86dc92c7af4d40814ece8279500", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9977544903373782, "res": {"Yes": 0.9977544903373782, "No": 0.0022455401171959737}, "ground_truth": 0}, {"key": "b599d61b0c28b86dc92c7af4d40814ece8279500", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9977048862104765, "res": {"Yes": 0.9977048862104765, "No": 0.0022951189654860865}, "ground_truth": 0}, {"key": "b599d61b0c28b86dc92c7af4d40814ece8279500", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9834546972499159, "res": {"Yes": 0.9834546972499159, "No": 0.0165452537626797}, "ground_truth": 1}, {"key": "b599d61b0c28b86dc92c7af4d40814ece8279500", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997416739519209, "res": {"Yes": 0.9997416739519209, "No": 0.0002582772338892202}, "ground_truth": 0}, {"key": "b599d61b0c28b86dc92c7af4d40814ece8279500", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9994873712390449, "res": {"Yes": 0.9994873712390449, "No": 0.0005125245221494022}, "ground_truth": 0}, {"key": "06f290a163e81c46437ca61b5fd13741d7db11f2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9974357176034537, "res": {"Yes": 0.9974357176034537, "No": 0.0025642242057665266}, "ground_truth": 0}, {"key": "06f290a163e81c46437ca61b5fd13741d7db11f2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9901719975942324, "res": {"Yes": 0.9901719975942324, "No": 0.009827876407025588}, "ground_truth": 0}, {"key": "06f290a163e81c46437ca61b5fd13741d7db11f2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.992638435038777, "res": {"Yes": 0.992638435038777, "No": 0.007361476904499062}, "ground_truth": 1}, {"key": "06f290a163e81c46437ca61b5fd13741d7db11f2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9974552848908292, "res": {"Yes": 0.9974552848908292, "No": 0.002544737697771322}, "ground_truth": 0}, {"key": "06f290a163e81c46437ca61b5fd13741d7db11f2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9936368133661438, "res": {"Yes": 0.9936368133661438, "No": 0.00636314008127834}, "ground_truth": 0}, {"key": "46210bd0b15cd5bba64751d873674766decb2677", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9942802180972674, "res": {"Yes": 0.9942802180972674, "No": 0.005719713856094696}, "ground_truth": 0}, {"key": "46210bd0b15cd5bba64751d873674766decb2677", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.999812212134349, "res": {"Yes": 0.999812212134349, "No": 0.00018769195054019322}, "ground_truth": 0}, {"key": "46210bd0b15cd5bba64751d873674766decb2677", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9982771628906147, "res": {"Yes": 0.9982771628906147, "No": 0.0017227679644602788}, "ground_truth": 1}, {"key": "46210bd0b15cd5bba64751d873674766decb2677", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997311904957507, "res": {"Yes": 0.9997311904957507, "No": 0.0002687771393481178}, "ground_truth": 0}, {"key": "46210bd0b15cd5bba64751d873674766decb2677", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9989263231039429, "res": {"Yes": 0.9989263231039429, "No": 0.001073580451797395}, "ground_truth": 0}, {"key": "cfba701dad16c47e0f7b97b8a6b043d25f740085", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9990505213403873, "res": {"Yes": 0.9990505213403873, "No": 0.0009494705922678956}, "ground_truth": 0}, {"key": "cfba701dad16c47e0f7b97b8a6b043d25f740085", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.997445081374436, "res": {"Yes": 0.997445081374436, "No": 0.0025548851689396706}, "ground_truth": 0}, {"key": "cfba701dad16c47e0f7b97b8a6b043d25f740085", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9985737581510123, "res": {"Yes": 0.9985737581510123, "No": 0.0014261988795350353}, "ground_truth": 1}, {"key": "cfba701dad16c47e0f7b97b8a6b043d25f740085", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998934823934031, "res": {"Yes": 0.9998934823934031, "No": 0.00010639318860246758}, "ground_truth": 0}, {"key": "cfba701dad16c47e0f7b97b8a6b043d25f740085", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.999508329509569, "res": {"Yes": 0.999508329509569, "No": 0.0004915559940716377}, "ground_truth": 0}, {"key": "1c0fbe7e6758334d83d145580688e7aa1c549c53", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9968764863506329, "res": {"Yes": 0.9968764863506329, "No": 0.003123529395492553}, "ground_truth": 0}, {"key": "1c0fbe7e6758334d83d145580688e7aa1c549c53", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998259143946578, "res": {"Yes": 0.9998259143946578, "No": 0.00017400143702838968}, "ground_truth": 0}, {"key": "1c0fbe7e6758334d83d145580688e7aa1c549c53", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998757304221038, "res": {"Yes": 0.9998757304221038, "No": 0.0001241399499689542}, "ground_truth": 1}, {"key": "1c0fbe7e6758334d83d145580688e7aa1c549c53", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999894357248024, "res": {"Yes": 0.9999894357248024, "No": 1.053617483081195e-05}, "ground_truth": 0}, {"key": "1c0fbe7e6758334d83d145580688e7aa1c549c53", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.999752042066651, "res": {"Yes": 0.999752042066651, "No": 0.00024787226623251444}, "ground_truth": 0}, {"key": "820c917dada8b32be365eaeec9edc1ca5833cfcd", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.997996644400973, "res": {"Yes": 0.997996644400973, "No": 0.002003286894808184}, "ground_truth": 0}, {"key": "820c917dada8b32be365eaeec9edc1ca5833cfcd", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9924271649047859, "res": {"Yes": 0.9924271649047859, "No": 0.007572765156205875}, "ground_truth": 0}, {"key": "820c917dada8b32be365eaeec9edc1ca5833cfcd", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9978767387150546, "res": {"Yes": 0.9978767387150546, "No": 0.002123295321900752}, "ground_truth": 1}, {"key": "820c917dada8b32be365eaeec9edc1ca5833cfcd", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9989703418621014, "res": {"Yes": 0.9989703418621014, "No": 0.0010296616766194034}, "ground_truth": 0}, {"key": "820c917dada8b32be365eaeec9edc1ca5833cfcd", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9992977050546176, "res": {"Yes": 0.9992977050546176, "No": 0.0007022296996363224}, "ground_truth": 0}, {"key": "faf3821150b0c63290cbbddd7a8b54900995ea5a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9971107417637025, "res": {"Yes": 0.9971107417637025, "No": 0.002889240252001073}, "ground_truth": 0}, {"key": "faf3821150b0c63290cbbddd7a8b54900995ea5a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995380979096314, "res": {"Yes": 0.9995380979096314, "No": 0.00046179831953312107}, "ground_truth": 0}, {"key": "faf3821150b0c63290cbbddd7a8b54900995ea5a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9835141980589809, "res": {"Yes": 0.9835141980589809, "No": 0.016485773205159905}, "ground_truth": 1}, {"key": "faf3821150b0c63290cbbddd7a8b54900995ea5a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998816825540612, "res": {"Yes": 0.9998816825540612, "No": 0.00011819497105067121}, "ground_truth": 0}, {"key": "faf3821150b0c63290cbbddd7a8b54900995ea5a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9992948462378533, "res": {"Yes": 0.9992948462378533, "No": 0.0007050621521216386}, "ground_truth": 0}, {"key": "1de0cae2832cf41f49e6fad8254247c789dfabd2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9989557096510783, "res": {"Yes": 0.9989557096510783, "No": 0.0010442984068993956}, "ground_truth": 0}, {"key": "1de0cae2832cf41f49e6fad8254247c789dfabd2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997401283924773, "res": {"Yes": 0.9997401283924773, "No": 0.00025978946629202855}, "ground_truth": 0}, {"key": "1de0cae2832cf41f49e6fad8254247c789dfabd2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997516845353897, "res": {"Yes": 0.9997516845353897, "No": 0.000248235665227314}, "ground_truth": 1}, {"key": "1de0cae2832cf41f49e6fad8254247c789dfabd2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994266396938766, "res": {"Yes": 0.9994266396938766, "No": 0.000573319732207599}, "ground_truth": 0}, {"key": "1de0cae2832cf41f49e6fad8254247c789dfabd2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.999722971309536, "res": {"Yes": 0.999722971309536, "No": 0.00027700416937358934}, "ground_truth": 0}, {"key": "6c6e51ef20def117ed9114ed8d8480b3a100f1c3", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9981485193089302, "res": {"Yes": 0.9981485193089302, "No": 0.0018514658542309048}, "ground_truth": 0}, {"key": "6c6e51ef20def117ed9114ed8d8480b3a100f1c3", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9983250289324018, "res": {"Yes": 0.9983250289324018, "No": 0.0016749666907872535}, "ground_truth": 0}, {"key": "6c6e51ef20def117ed9114ed8d8480b3a100f1c3", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9964082943611752, "res": {"Yes": 0.9964082943611752, "No": 0.003591745438744195}, "ground_truth": 1}, {"key": "6c6e51ef20def117ed9114ed8d8480b3a100f1c3", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.993108151768508, "res": {"Yes": 0.993108151768508, "No": 0.006891844658251681}, "ground_truth": 0}, {"key": "6c6e51ef20def117ed9114ed8d8480b3a100f1c3", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9950842560115998, "res": {"Yes": 0.9950842560115998, "No": 0.004915715309304046}, "ground_truth": 0}, {"key": "b8c755b8921f7917d161605d2ef404d9551f3846", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9879957241767104, "res": {"Yes": 0.9879957241767104, "No": 0.0120041362630272}, "ground_truth": 0}, {"key": "b8c755b8921f7917d161605d2ef404d9551f3846", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9944278947885594, "res": {"Yes": 0.9944278947885594, "No": 0.005572126057141722}, "ground_truth": 0}, {"key": "b8c755b8921f7917d161605d2ef404d9551f3846", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9957823603300406, "res": {"Yes": 0.9957823603300406, "No": 0.004217689975426008}, "ground_truth": 1}, {"key": "b8c755b8921f7917d161605d2ef404d9551f3846", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9992568771126811, "res": {"Yes": 0.9992568771126811, "No": 0.0007430397614060318}, "ground_truth": 0}, {"key": "b8c755b8921f7917d161605d2ef404d9551f3846", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9947846298149924, "res": {"Yes": 0.9947846298149924, "No": 0.0052153936979708725}, "ground_truth": 0}, {"key": "dc7e18d14b8ad29abbd79f554324ec7b8ba8b9c0", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9993928204681416, "res": {"Yes": 0.9993928204681416, "No": 0.0006071750032338072}, "ground_truth": 0}, {"key": "dc7e18d14b8ad29abbd79f554324ec7b8ba8b9c0", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997830168142573, "res": {"Yes": 0.9997830168142573, "No": 0.0002168894025534075}, "ground_truth": 0}, {"key": "dc7e18d14b8ad29abbd79f554324ec7b8ba8b9c0", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9968408247290934, "res": {"Yes": 0.9968408247290934, "No": 0.003159150868477852}, "ground_truth": 1}, {"key": "dc7e18d14b8ad29abbd79f554324ec7b8ba8b9c0", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.998829748179347, "res": {"Yes": 0.998829748179347, "No": 0.0011702231755842752}, "ground_truth": 0}, {"key": "dc7e18d14b8ad29abbd79f554324ec7b8ba8b9c0", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999768003491161, "res": {"Yes": 0.9999768003491161, "No": 2.3166529846165276e-05}, "ground_truth": 0}, {"key": "a7fc90db6c296663b1b7fc057451dfd45019342d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9952540253979929, "res": {"Yes": 0.9952540253979929, "No": 0.004745929384642155}, "ground_truth": 0}, {"key": "a7fc90db6c296663b1b7fc057451dfd45019342d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9991300110899877, "res": {"Yes": 0.9991300110899877, "No": 0.0008699807265081008}, "ground_truth": 0}, {"key": "a7fc90db6c296663b1b7fc057451dfd45019342d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9879549968684294, "res": {"Yes": 0.9879549968684294, "No": 0.012044875249560846}, "ground_truth": 1}, {"key": "a7fc90db6c296663b1b7fc057451dfd45019342d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9786612868684226, "res": {"Yes": 0.9786612868684226, "No": 0.02133867339977176}, "ground_truth": 0}, {"key": "a7fc90db6c296663b1b7fc057451dfd45019342d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.876807560493453, "res": {"Yes": 0.876807560493453, "No": 0.12319245150640927}, "ground_truth": 0}, {"key": "7f22fbc4fd81e50c632c5853856965aa4d0065d4", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9992823460368755, "res": {"Yes": 0.9992823460368755, "No": 0.0007176328289557587}, "ground_truth": 0}, {"key": "7f22fbc4fd81e50c632c5853856965aa4d0065d4", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995264250920127, "res": {"Yes": 0.9995264250920127, "No": 0.0004735419754280455}, "ground_truth": 0}, {"key": "7f22fbc4fd81e50c632c5853856965aa4d0065d4", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.998563301740508, "res": {"Yes": 0.998563301740508, "No": 0.0014367227890408566}, "ground_truth": 1}, {"key": "7f22fbc4fd81e50c632c5853856965aa4d0065d4", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.997524441561262, "res": {"Yes": 0.997524441561262, "No": 0.00247551332605537}, "ground_truth": 0}, {"key": "7f22fbc4fd81e50c632c5853856965aa4d0065d4", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9826016854037266, "res": {"Yes": 0.9826016854037266, "No": 0.01739827875119338}, "ground_truth": 0}, {"key": "309ae632df7b88a9569d81fa91aff3b9fb03529e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9981881799312278, "res": {"Yes": 0.9981881799312278, "No": 0.0018118044438600095}, "ground_truth": 0}, {"key": "309ae632df7b88a9569d81fa91aff3b9fb03529e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9912694237668439, "res": {"Yes": 0.9912694237668439, "No": 0.008730509926602403}, "ground_truth": 0}, {"key": "309ae632df7b88a9569d81fa91aff3b9fb03529e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9924801198492418, "res": {"Yes": 0.9924801198492418, "No": 0.007519798023185783}, "ground_truth": 1}, {"key": "309ae632df7b88a9569d81fa91aff3b9fb03529e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995759695563304, "res": {"Yes": 0.9995759695563304, "No": 0.00042396078297226393}, "ground_truth": 0}, {"key": "309ae632df7b88a9569d81fa91aff3b9fb03529e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998275829655257, "res": {"Yes": 0.9998275829655257, "No": 0.00017229300724825962}, "ground_truth": 0}, {"key": "732304a0901ff14d023d4361c1d41a85724808f3", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9955372597515005, "res": {"Yes": 0.9955372597515005, "No": 0.004462719256384537}, "ground_truth": 0}, {"key": "732304a0901ff14d023d4361c1d41a85724808f3", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995097592572815, "res": {"Yes": 0.9995097592572815, "No": 0.0004902289652395269}, "ground_truth": 0}, {"key": "732304a0901ff14d023d4361c1d41a85724808f3", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997677621813781, "res": {"Yes": 0.9997677621813781, "No": 0.00023219361196412418}, "ground_truth": 1}, {"key": "732304a0901ff14d023d4361c1d41a85724808f3", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999722375444831, "res": {"Yes": 0.999722375444831, "No": 0.0002776114732595486}, "ground_truth": 0}, {"key": "732304a0901ff14d023d4361c1d41a85724808f3", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998580906400859, "res": {"Yes": 0.9998580906400859, "No": 0.00014187417283265753}, "ground_truth": 0}, {"key": "509b9f69b376e4acb8da49d53198699b1e779ea7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9848201951184585, "res": {"Yes": 0.9848201951184585, "No": 0.015179749638547043}, "ground_truth": 0}, {"key": "509b9f69b376e4acb8da49d53198699b1e779ea7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9981697809971709, "res": {"Yes": 0.9981697809971709, "No": 0.0018302275053362618}, "ground_truth": 0}, {"key": "509b9f69b376e4acb8da49d53198699b1e779ea7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9887019755027301, "res": {"Yes": 0.9887019755027301, "No": 0.011297909165338448}, "ground_truth": 1}, {"key": "509b9f69b376e4acb8da49d53198699b1e779ea7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9985957541771524, "res": {"Yes": 0.9985957541771524, "No": 0.0014042527053628757}, "ground_truth": 0}, {"key": "509b9f69b376e4acb8da49d53198699b1e779ea7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9984482533058455, "res": {"Yes": 0.9984482533058455, "No": 0.001551742788523224}, "ground_truth": 0}, {"key": "c292860b41f7c9131458cdc44a79febef38ad8a2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9996087262075636, "res": {"Yes": 0.9996087262075636, "No": 0.0003912487543049857}, "ground_truth": 0}, {"key": "c292860b41f7c9131458cdc44a79febef38ad8a2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9959612408141221, "res": {"Yes": 0.9959612408141221, "No": 0.004038713816296345}, "ground_truth": 0}, {"key": "c292860b41f7c9131458cdc44a79febef38ad8a2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995691852073547, "res": {"Yes": 0.9995691852073547, "No": 0.0004307737705419518}, "ground_truth": 1}, {"key": "c292860b41f7c9131458cdc44a79febef38ad8a2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999050439846079, "res": {"Yes": 0.9999050439846079, "No": 9.48966645464794e-05}, "ground_truth": 0}, {"key": "c292860b41f7c9131458cdc44a79febef38ad8a2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9992104562538225, "res": {"Yes": 0.9992104562538225, "No": 0.0007895444692623795}, "ground_truth": 0}, {"key": "6db7e70883f0241081833487c421bab86fdac26e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9948233323654306, "res": {"Yes": 0.9948233323654306, "No": 0.005176707720330881}, "ground_truth": 0}, {"key": "6db7e70883f0241081833487c421bab86fdac26e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9992872261840283, "res": {"Yes": 0.9992872261840283, "No": 0.0007127693372920422}, "ground_truth": 0}, {"key": "6db7e70883f0241081833487c421bab86fdac26e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9988002501759784, "res": {"Yes": 0.9988002501759784, "No": 0.0011997224987170003}, "ground_truth": 1}, {"key": "6db7e70883f0241081833487c421bab86fdac26e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996074155014689, "res": {"Yes": 0.9996074155014689, "No": 0.00039246507719488654}, "ground_truth": 0}, {"key": "6db7e70883f0241081833487c421bab86fdac26e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9988979010576198, "res": {"Yes": 0.9988979010576198, "No": 0.0011020094422535102}, "ground_truth": 0}, {"key": "e51e06234b3f49da16fec69aaee6fb9cd09cb1d8", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9934635843126343, "res": {"Yes": 0.9934635843126343, "No": 0.00653639623364011}, "ground_truth": 0}, {"key": "e51e06234b3f49da16fec69aaee6fb9cd09cb1d8", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9987425843104314, "res": {"Yes": 0.9987425843104314, "No": 0.001257398342767147}, "ground_truth": 0}, {"key": "e51e06234b3f49da16fec69aaee6fb9cd09cb1d8", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9976256233022921, "res": {"Yes": 0.9976256233022921, "No": 0.0023744073725025824}, "ground_truth": 1}, {"key": "e51e06234b3f49da16fec69aaee6fb9cd09cb1d8", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995981286419349, "res": {"Yes": 0.9995981286419349, "No": 0.0004018219813046708}, "ground_truth": 0}, {"key": "e51e06234b3f49da16fec69aaee6fb9cd09cb1d8", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998609511181921, "res": {"Yes": 0.9998609511181921, "No": 0.00013893949602378797}, "ground_truth": 0}, {"key": "42474528588bac3a27842671a3fa940fed450552", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.997900243679097, "res": {"Yes": 0.997900243679097, "No": 0.002099737378353623}, "ground_truth": 0}, {"key": "42474528588bac3a27842671a3fa940fed450552", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9977327730503941, "res": {"Yes": 0.9977327730503941, "No": 0.0022671462431212755}, "ground_truth": 0}, {"key": "42474528588bac3a27842671a3fa940fed450552", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9986771837936975, "res": {"Yes": 0.9986771837936975, "No": 0.0013227577727399752}, "ground_truth": 1}, {"key": "42474528588bac3a27842671a3fa940fed450552", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9984419563122585, "res": {"Yes": 0.9984419563122585, "No": 0.0015579907011570688}, "ground_truth": 0}, {"key": "42474528588bac3a27842671a3fa940fed450552", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9973273287623389, "res": {"Yes": 0.9973273287623389, "No": 0.002672629125130729}, "ground_truth": 0}, {"key": "5932d9c8f1da55bad396197ebae0853c515180a5", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9988597199139724, "res": {"Yes": 0.9988597199139724, "No": 0.001140283572159422}, "ground_truth": 0}, {"key": "5932d9c8f1da55bad396197ebae0853c515180a5", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999216118525114, "res": {"Yes": 0.9999216118525114, "No": 7.827110910530501e-05}, "ground_truth": 0}, {"key": "5932d9c8f1da55bad396197ebae0853c515180a5", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9991978417011235, "res": {"Yes": 0.9991978417011235, "No": 0.0008021205898435238}, "ground_truth": 1}, {"key": "5932d9c8f1da55bad396197ebae0853c515180a5", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998386670555593, "res": {"Yes": 0.9998386670555593, "No": 0.00016120608185087642}, "ground_truth": 0}, {"key": "5932d9c8f1da55bad396197ebae0853c515180a5", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9948796114116358, "res": {"Yes": 0.9948796114116358, "No": 0.005120434398872213}, "ground_truth": 0}, {"key": "daacc676fc659bfd147d11b1409ab42615a2f169", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9989483304924013, "res": {"Yes": 0.9989483304924013, "No": 0.001051569993136272}, "ground_truth": 0}, {"key": "daacc676fc659bfd147d11b1409ab42615a2f169", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.999987886094374, "res": {"Yes": 0.999987886094374, "No": 1.2018625337476055e-05}, "ground_truth": 0}, {"key": "daacc676fc659bfd147d11b1409ab42615a2f169", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999753699393249, "res": {"Yes": 0.9999753699393249, "No": 2.4575513424568618e-05}, "ground_truth": 1}, {"key": "daacc676fc659bfd147d11b1409ab42615a2f169", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998235307425315, "res": {"Yes": 0.9998235307425315, "No": 0.00017644400218583633}, "ground_truth": 0}, {"key": "daacc676fc659bfd147d11b1409ab42615a2f169", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999884821053314, "res": {"Yes": 0.9999884821053314, "No": 1.1423601907577447e-05}, "ground_truth": 0}, {"key": "7a02e707b139791920fb13115d6132541fff7d97", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9980703707329971, "res": {"Yes": 0.9980703707329971, "No": 0.0019296415860901622}, "ground_truth": 0}, {"key": "7a02e707b139791920fb13115d6132541fff7d97", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.999131909308925, "res": {"Yes": 0.999131909308925, "No": 0.0008680133180899777}, "ground_truth": 0}, {"key": "7a02e707b139791920fb13115d6132541fff7d97", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.990339976697441, "res": {"Yes": 0.990339976697441, "No": 0.009659928813381777}, "ground_truth": 1}, {"key": "7a02e707b139791920fb13115d6132541fff7d97", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9989025413487074, "res": {"Yes": 0.9989025413487074, "No": 0.0010974126567596437}, "ground_truth": 0}, {"key": "7a02e707b139791920fb13115d6132541fff7d97", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9993466315381913, "res": {"Yes": 0.9993466315381913, "No": 0.0006532597684884274}, "ground_truth": 0}, {"key": "db1972f0204c7a9c292f4c85e8838251cb26625e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9978335331842428, "res": {"Yes": 0.9978335331842428, "No": 0.0021664023294148765}, "ground_truth": 0}, {"key": "db1972f0204c7a9c292f4c85e8838251cb26625e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9974568269578917, "res": {"Yes": 0.9974568269578917, "No": 0.0025431304402799763}, "ground_truth": 0}, {"key": "db1972f0204c7a9c292f4c85e8838251cb26625e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9977383520883393, "res": {"Yes": 0.9977383520883393, "No": 0.0022615964981071066}, "ground_truth": 1}, {"key": "db1972f0204c7a9c292f4c85e8838251cb26625e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9984791547590192, "res": {"Yes": 0.9984791547590192, "No": 0.0015207722711871087}, "ground_truth": 0}, {"key": "db1972f0204c7a9c292f4c85e8838251cb26625e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997254702601872, "res": {"Yes": 0.9997254702601872, "No": 0.0002744951405735456}, "ground_truth": 0}, {"key": "6defcdfd6995661fd681dc15163d67f6a5d6727b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9843635970346832, "res": {"Yes": 0.9843635970346832, "No": 0.01563631789955174}, "ground_truth": 0}, {"key": "6defcdfd6995661fd681dc15163d67f6a5d6727b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9912082821719345, "res": {"Yes": 0.9912082821719345, "No": 0.008791698983609292}, "ground_truth": 0}, {"key": "6defcdfd6995661fd681dc15163d67f6a5d6727b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999837713579866, "res": {"Yes": 0.999837713579866, "No": 0.0001621797937798054}, "ground_truth": 1}, {"key": "6defcdfd6995661fd681dc15163d67f6a5d6727b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9991175093241398, "res": {"Yes": 0.9991175093241398, "No": 0.0008824703212799588}, "ground_truth": 0}, {"key": "6defcdfd6995661fd681dc15163d67f6a5d6727b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9995338085810045, "res": {"Yes": 0.9995338085810045, "No": 0.000466089911181527}, "ground_truth": 0}, {"key": "24716f5a39d23dafbe9a9df8fa002788d14d020b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9455625411103005, "res": {"Yes": 0.9455625411103005, "No": 0.05443719103373684}, "ground_truth": 0}, {"key": "24716f5a39d23dafbe9a9df8fa002788d14d020b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9946941559793702, "res": {"Yes": 0.9946941559793702, "No": 0.005305819722515159}, "ground_truth": 0}, {"key": "24716f5a39d23dafbe9a9df8fa002788d14d020b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9853511429825665, "res": {"Yes": 0.9853511429825665, "No": 0.014648836008798172}, "ground_truth": 1}, {"key": "24716f5a39d23dafbe9a9df8fa002788d14d020b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995740631068848, "res": {"Yes": 0.9995740631068848, "No": 0.00042584505436744246}, "ground_truth": 0}, {"key": "24716f5a39d23dafbe9a9df8fa002788d14d020b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9992740115072616, "res": {"Yes": 0.9992740115072616, "No": 0.0007259807925733465}, "ground_truth": 0}, {"key": "ab3c98cf7833b842898a568d872e45ceb2f7dda9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9987689815256479, "res": {"Yes": 0.9987689815256479, "No": 0.0012309960999271371}, "ground_truth": 0}, {"key": "ab3c98cf7833b842898a568d872e45ceb2f7dda9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9992828225547686, "res": {"Yes": 0.9992828225547686, "No": 0.0007171706787619053}, "ground_truth": 0}, {"key": "ab3c98cf7833b842898a568d872e45ceb2f7dda9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999839858893956, "res": {"Yes": 0.999839858893956, "No": 0.00016006637259969675}, "ground_truth": 1}, {"key": "ab3c98cf7833b842898a568d872e45ceb2f7dda9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9993846114292656, "res": {"Yes": 0.9993846114292656, "No": 0.000615359258838623}, "ground_truth": 0}, {"key": "ab3c98cf7833b842898a568d872e45ceb2f7dda9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9994889200458753, "res": {"Yes": 0.9994889200458753, "No": 0.00051106692890832}, "ground_truth": 0}, {"key": "831005755f85012c882f17c3b3699b34a7febb7b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9992481890116072, "res": {"Yes": 0.9992481890116072, "No": 0.000751814981143212}, "ground_truth": 0}, {"key": "831005755f85012c882f17c3b3699b34a7febb7b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996972344427446, "res": {"Yes": 0.9996972344427446, "No": 0.00030265439923935655}, "ground_truth": 0}, {"key": "831005755f85012c882f17c3b3699b34a7febb7b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996617341547962, "res": {"Yes": 0.9996617341547962, "No": 0.00033818820429693994}, "ground_truth": 1}, {"key": "831005755f85012c882f17c3b3699b34a7febb7b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997566788773872, "res": {"Yes": 0.9997566788773872, "No": 0.00024322928495756863}, "ground_truth": 0}, {"key": "831005755f85012c882f17c3b3699b34a7febb7b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9989825926102119, "res": {"Yes": 0.9989825926102119, "No": 0.001017332778101474}, "ground_truth": 0}, {"key": "0590a8c653b7d332d9df195dcf3ce014abb4eff7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9987328328351924, "res": {"Yes": 0.9987328328351924, "No": 0.0012671757167575636}, "ground_truth": 0}, {"key": "0590a8c653b7d332d9df195dcf3ce014abb4eff7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9858087497145953, "res": {"Yes": 0.9858087497145953, "No": 0.014191140748877078}, "ground_truth": 0}, {"key": "0590a8c653b7d332d9df195dcf3ce014abb4eff7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9974441338020588, "res": {"Yes": 0.9974441338020588, "No": 0.0025558671999952763}, "ground_truth": 1}, {"key": "0590a8c653b7d332d9df195dcf3ce014abb4eff7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9992126002619988, "res": {"Yes": 0.9992126002619988, "No": 0.0007873623470028816}, "ground_truth": 0}, {"key": "0590a8c653b7d332d9df195dcf3ce014abb4eff7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9991491606692299, "res": {"Yes": 0.9991491606692299, "No": 0.000850794448240287}, "ground_truth": 0}, {"key": "f79341438eed40986c3219b67a3b5d87eb6c018a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9956415928440108, "res": {"Yes": 0.9956415928440108, "No": 0.0043583850134456805}, "ground_truth": 0}, {"key": "f79341438eed40986c3219b67a3b5d87eb6c018a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9991266763093062, "res": {"Yes": 0.9991266763093062, "No": 0.0008732261381387037}, "ground_truth": 0}, {"key": "f79341438eed40986c3219b67a3b5d87eb6c018a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995046360732434, "res": {"Yes": 0.9995046360732434, "No": 0.0004953564269826755}, "ground_truth": 1}, {"key": "f79341438eed40986c3219b67a3b5d87eb6c018a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999527020789949, "res": {"Yes": 0.999527020789949, "No": 0.0004728796696689214}, "ground_truth": 0}, {"key": "f79341438eed40986c3219b67a3b5d87eb6c018a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9993947265519222, "res": {"Yes": 0.9993947265519222, "No": 0.0006052521559927707}, "ground_truth": 0}, {"key": "8db6225f67580d64a196f2f3197386a6be950a09", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9560120226653813, "res": {"Yes": 0.9560120226653813, "No": 0.04398783326527336}, "ground_truth": 0}, {"key": "8db6225f67580d64a196f2f3197386a6be950a09", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9934947632656302, "res": {"Yes": 0.9934947632656302, "No": 0.006505209249815386}, "ground_truth": 0}, {"key": "8db6225f67580d64a196f2f3197386a6be950a09", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9988758949799252, "res": {"Yes": 0.9988758949799252, "No": 0.0011240031781264495}, "ground_truth": 1}, {"key": "8db6225f67580d64a196f2f3197386a6be950a09", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9970860924937339, "res": {"Yes": 0.9970860924937339, "No": 0.002913906378207372}, "ground_truth": 0}, {"key": "8db6225f67580d64a196f2f3197386a6be950a09", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9705391220055725, "res": {"Yes": 0.9705391220055725, "No": 0.029460744403142586}, "ground_truth": 0}, {"key": "e386d9ed70bac668fa6048b572f445d9b7b62f75", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9997120076777526, "res": {"Yes": 0.9997120076777526, "No": 0.00028797755928888433}, "ground_truth": 0}, {"key": "e386d9ed70bac668fa6048b572f445d9b7b62f75", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9981649062473149, "res": {"Yes": 0.9981649062473149, "No": 0.0018351123008039604}, "ground_truth": 0}, {"key": "e386d9ed70bac668fa6048b572f445d9b7b62f75", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997394133585984, "res": {"Yes": 0.9997394133585984, "No": 0.0002605249302016578}, "ground_truth": 1}, {"key": "e386d9ed70bac668fa6048b572f445d9b7b62f75", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999038520625092, "res": {"Yes": 0.9999038520625092, "No": 9.601117183365894e-05}, "ground_truth": 0}, {"key": "e386d9ed70bac668fa6048b572f445d9b7b62f75", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997669279355147, "res": {"Yes": 0.9997669279355147, "No": 0.0002329993253258443}, "ground_truth": 0}, {"key": "e35669d4530ea55bc8f0e451268cab9e9eaeab92", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9870483736098976, "res": {"Yes": 0.9870483736098976, "No": 0.012951495692701586}, "ground_truth": 0}, {"key": "e35669d4530ea55bc8f0e451268cab9e9eaeab92", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.997913897941756, "res": {"Yes": 0.997913897941756, "No": 0.0020860659653768043}, "ground_truth": 0}, {"key": "e35669d4530ea55bc8f0e451268cab9e9eaeab92", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9940989947308381, "res": {"Yes": 0.9940989947308381, "No": 0.005900945421355231}, "ground_truth": 1}, {"key": "e35669d4530ea55bc8f0e451268cab9e9eaeab92", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.998217411102249, "res": {"Yes": 0.998217411102249, "No": 0.0017825761244926166}, "ground_truth": 0}, {"key": "e35669d4530ea55bc8f0e451268cab9e9eaeab92", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9940936942091294, "res": {"Yes": 0.9940936942091294, "No": 0.005906250984703199}, "ground_truth": 0}, {"key": "d3ca193f45aef9aa766d52fbc58d6b8f1658ee60", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9924702552413628, "res": {"Yes": 0.9924702552413628, "No": 0.0075297126393759}, "ground_truth": 0}, {"key": "d3ca193f45aef9aa766d52fbc58d6b8f1658ee60", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.981931926807932, "res": {"Yes": 0.981931926807932, "No": 0.018068051696050892}, "ground_truth": 0}, {"key": "d3ca193f45aef9aa766d52fbc58d6b8f1658ee60", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995822846973398, "res": {"Yes": 0.9995822846973398, "No": 0.00041769460604964767}, "ground_truth": 1}, {"key": "d3ca193f45aef9aa766d52fbc58d6b8f1658ee60", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.99960384079179, "res": {"Yes": 0.99960384079179, "No": 0.00039607776093087993}, "ground_truth": 0}, {"key": "d3ca193f45aef9aa766d52fbc58d6b8f1658ee60", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9959506000210685, "res": {"Yes": 0.9959506000210685, "No": 0.004049424527236864}, "ground_truth": 0}, {"key": "1ca9bf09bf534add02b7cfe6ebae6d17cde66286", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9941077120231437, "res": {"Yes": 0.9941077120231437, "No": 0.005892267976503079}, "ground_truth": 0}, {"key": "1ca9bf09bf534add02b7cfe6ebae6d17cde66286", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998338996861849, "res": {"Yes": 0.9998338996861849, "No": 0.00016598784419997588}, "ground_truth": 0}, {"key": "1ca9bf09bf534add02b7cfe6ebae6d17cde66286", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9991549931195635, "res": {"Yes": 0.9991549931195635, "No": 0.0008449900549215237}, "ground_truth": 1}, {"key": "1ca9bf09bf534add02b7cfe6ebae6d17cde66286", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996939014577211, "res": {"Yes": 0.9996939014577211, "No": 0.0003060402281836997}, "ground_truth": 0}, {"key": "1ca9bf09bf534add02b7cfe6ebae6d17cde66286", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999475965531086, "res": {"Yes": 0.9999475965531086, "No": 5.227339584337038e-05}, "ground_truth": 0}, {"key": "c296e88117bef515bd9b36ad560223df3fd2b84c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.998605619553058, "res": {"Yes": 0.998605619553058, "No": 0.001394374989929095}, "ground_truth": 0}, {"key": "c296e88117bef515bd9b36ad560223df3fd2b84c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9885106623856977, "res": {"Yes": 0.9885106623856977, "No": 0.011489243834908692}, "ground_truth": 0}, {"key": "c296e88117bef515bd9b36ad560223df3fd2b84c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9933199454266782, "res": {"Yes": 0.9933199454266782, "No": 0.006680055387005951}, "ground_truth": 1}, {"key": "c296e88117bef515bd9b36ad560223df3fd2b84c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9993305570270018, "res": {"Yes": 0.9993305570270018, "No": 0.0006694223377065031}, "ground_truth": 0}, {"key": "c296e88117bef515bd9b36ad560223df3fd2b84c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9950000973344313, "res": {"Yes": 0.9950000973344313, "No": 0.0049998683344737935}, "ground_truth": 0}, {"key": "c5e11ee4c2f0ae511bd0e1fae2978d070b1efc15", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9969036185078102, "res": {"Yes": 0.9969036185078102, "No": 0.0030963282653979924}, "ground_truth": 0}, {"key": "c5e11ee4c2f0ae511bd0e1fae2978d070b1efc15", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998601168145652, "res": {"Yes": 0.9998601168145652, "No": 0.00013979646704204846}, "ground_truth": 0}, {"key": "c5e11ee4c2f0ae511bd0e1fae2978d070b1efc15", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9983737522762163, "res": {"Yes": 0.9983737522762163, "No": 0.0016261659708046961}, "ground_truth": 1}, {"key": "c5e11ee4c2f0ae511bd0e1fae2978d070b1efc15", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994878477947371, "res": {"Yes": 0.9994878477947371, "No": 0.0005121437042206052}, "ground_truth": 0}, {"key": "c5e11ee4c2f0ae511bd0e1fae2978d070b1efc15", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9986981102946529, "res": {"Yes": 0.9986981102946529, "No": 0.0013018206425797322}, "ground_truth": 0}, {"key": "1fc2d1ecf1ffdc2f0b642b686e1c21de53a4fd28", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9989175226018027, "res": {"Yes": 0.9989175226018027, "No": 0.001082432993253803}, "ground_truth": 0}, {"key": "1fc2d1ecf1ffdc2f0b642b686e1c21de53a4fd28", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995238038573905, "res": {"Yes": 0.9995238038573905, "No": 0.00047607078823994607}, "ground_truth": 0}, {"key": "1fc2d1ecf1ffdc2f0b642b686e1c21de53a4fd28", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9983827823081304, "res": {"Yes": 0.9983827823081304, "No": 0.00161714262308152}, "ground_truth": 1}, {"key": "1fc2d1ecf1ffdc2f0b642b686e1c21de53a4fd28", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9993412746148914, "res": {"Yes": 0.9993412746148914, "No": 0.000658716574627148}, "ground_truth": 0}, {"key": "1fc2d1ecf1ffdc2f0b642b686e1c21de53a4fd28", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9955083137318507, "res": {"Yes": 0.9955083137318507, "No": 0.004491641741827033}, "ground_truth": 0}, {"key": "6556c64821ece8123c130b27285efe9619c3c7fd", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9988843379142214, "res": {"Yes": 0.9988843379142214, "No": 0.0011156143937993805}, "ground_truth": 0}, {"key": "6556c64821ece8123c130b27285efe9619c3c7fd", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998990843823236, "res": {"Yes": 0.9998990843823236, "No": 0.00010082078733768723}, "ground_truth": 0}, {"key": "6556c64821ece8123c130b27285efe9619c3c7fd", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998074449011132, "res": {"Yes": 0.9998074449011132, "No": 0.00019247582433372607}, "ground_truth": 1}, {"key": "6556c64821ece8123c130b27285efe9619c3c7fd", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999208966888319, "res": {"Yes": 0.9999208966888319, "No": 7.901712158938726e-05}, "ground_truth": 0}, {"key": "6556c64821ece8123c130b27285efe9619c3c7fd", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9987413937101063, "res": {"Yes": 0.9987413937101063, "No": 0.0012585487594994832}, "ground_truth": 0}, {"key": "32ba61f4513f7ec21cedb6ddd2a8a93260f8cd56", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.990897991365772, "res": {"Yes": 0.990897991365772, "No": 0.009101934551282035}, "ground_truth": 0}, {"key": "32ba61f4513f7ec21cedb6ddd2a8a93260f8cd56", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9897874983747663, "res": {"Yes": 0.9897874983747663, "No": 0.010212409301107074}, "ground_truth": 0}, {"key": "32ba61f4513f7ec21cedb6ddd2a8a93260f8cd56", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9971819847801743, "res": {"Yes": 0.9971819847801743, "No": 0.002818058799608358}, "ground_truth": 1}, {"key": "32ba61f4513f7ec21cedb6ddd2a8a93260f8cd56", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9989246597931071, "res": {"Yes": 0.9989246597931071, "No": 0.0010752425660702338}, "ground_truth": 0}, {"key": "32ba61f4513f7ec21cedb6ddd2a8a93260f8cd56", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9958115594868158, "res": {"Yes": 0.9958115594868158, "No": 0.004188393631756114}, "ground_truth": 0}, {"key": "1146cfc5981ce23511ac1bac5f3957c30bebc7b8", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9981006545438107, "res": {"Yes": 0.9981006545438107, "No": 0.0018993193382887852}, "ground_truth": 0}, {"key": "1146cfc5981ce23511ac1bac5f3957c30bebc7b8", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996710252840525, "res": {"Yes": 0.9996710252840525, "No": 0.00032895124818086243}, "ground_truth": 0}, {"key": "1146cfc5981ce23511ac1bac5f3957c30bebc7b8", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998802522853301, "res": {"Yes": 0.9998802522853301, "No": 0.00011964279054013367}, "ground_truth": 1}, {"key": "1146cfc5981ce23511ac1bac5f3957c30bebc7b8", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9965795854907643, "res": {"Yes": 0.9965795854907643, "No": 0.003420450539117707}, "ground_truth": 0}, {"key": "1146cfc5981ce23511ac1bac5f3957c30bebc7b8", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9973257834048402, "res": {"Yes": 0.9973257834048402, "No": 0.0026742468223992957}, "ground_truth": 0}, {"key": "e060abdb2a5e2ce6152ca0ed53703a9d17b2df36", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9977738368847335, "res": {"Yes": 0.9977738368847335, "No": 0.0022261195975754014}, "ground_truth": 0}, {"key": "e060abdb2a5e2ce6152ca0ed53703a9d17b2df36", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9993860409500361, "res": {"Yes": 0.9993860409500361, "No": 0.0006138831238329406}, "ground_truth": 0}, {"key": "e060abdb2a5e2ce6152ca0ed53703a9d17b2df36", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9975374887675206, "res": {"Yes": 0.9975374887675206, "No": 0.0024625103053822137}, "ground_truth": 1}, {"key": "e060abdb2a5e2ce6152ca0ed53703a9d17b2df36", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9992910343949607, "res": {"Yes": 0.9992910343949607, "No": 0.0007088943891519357}, "ground_truth": 0}, {"key": "e060abdb2a5e2ce6152ca0ed53703a9d17b2df36", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.998974863311998, "res": {"Yes": 0.998974863311998, "No": 0.0010250888369010907}, "ground_truth": 0}, {"key": "aef2a80ff67e87a8c165a481ede2a643128f4aa7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9947572653573367, "res": {"Yes": 0.9947572653573367, "No": 0.005242696572209368}, "ground_truth": 0}, {"key": "aef2a80ff67e87a8c165a481ede2a643128f4aa7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994489072574181, "res": {"Yes": 0.9994489072574181, "No": 0.000551076290576107}, "ground_truth": 0}, {"key": "aef2a80ff67e87a8c165a481ede2a643128f4aa7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9934731147538581, "res": {"Yes": 0.9934731147538581, "No": 0.006526890734555342}, "ground_truth": 1}, {"key": "aef2a80ff67e87a8c165a481ede2a643128f4aa7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9805913803334476, "res": {"Yes": 0.9805913803334476, "No": 0.019408600236236476}, "ground_truth": 0}, {"key": "aef2a80ff67e87a8c165a481ede2a643128f4aa7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996891347886059, "res": {"Yes": 0.9996891347886059, "No": 0.0003107928328701585}, "ground_truth": 0}, {"key": "d4eaedd7f1055f95e9cafa21e870d049f5df8526", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9962699557625339, "res": {"Yes": 0.9962699557625339, "No": 0.00372997778913365}, "ground_truth": 0}, {"key": "d4eaedd7f1055f95e9cafa21e870d049f5df8526", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9978855226634955, "res": {"Yes": 0.9978855226634955, "No": 0.0021144067548299074}, "ground_truth": 0}, {"key": "d4eaedd7f1055f95e9cafa21e870d049f5df8526", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9981203683243325, "res": {"Yes": 0.9981203683243325, "No": 0.0018796362592163908}, "ground_truth": 1}, {"key": "d4eaedd7f1055f95e9cafa21e870d049f5df8526", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995378596197773, "res": {"Yes": 0.9995378596197773, "No": 0.00046203093973568867}, "ground_truth": 0}, {"key": "d4eaedd7f1055f95e9cafa21e870d049f5df8526", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9960269893736179, "res": {"Yes": 0.9960269893736179, "No": 0.00397297934899433}, "ground_truth": 0}, {"key": "ec9531cd58e385f0da60e23b2855a3c5ece3f270", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9947842740800724, "res": {"Yes": 0.9947842740800724, "No": 0.00521571028197657}, "ground_truth": 0}, {"key": "ec9531cd58e385f0da60e23b2855a3c5ece3f270", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9955935084987928, "res": {"Yes": 0.9955935084987928, "No": 0.004406451772992232}, "ground_truth": 0}, {"key": "ec9531cd58e385f0da60e23b2855a3c5ece3f270", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9992778195379277, "res": {"Yes": 0.9992778195379277, "No": 0.0007221252194269901}, "ground_truth": 1}, {"key": "ec9531cd58e385f0da60e23b2855a3c5ece3f270", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9989714098618648, "res": {"Yes": 0.9989714098618648, "No": 0.0010284967590691997}, "ground_truth": 0}, {"key": "ec9531cd58e385f0da60e23b2855a3c5ece3f270", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.999015906837589, "res": {"Yes": 0.999015906837589, "No": 0.0009840551654837274}, "ground_truth": 0}, {"key": "b171bad51c8ec09118fd553347c41d9301f195ee", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9806964994798413, "res": {"Yes": 0.9806964994798413, "No": 0.019303537687465247}, "ground_truth": 0}, {"key": "b171bad51c8ec09118fd553347c41d9301f195ee", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995321405203953, "res": {"Yes": 0.9995321405203953, "No": 0.0004678045394663056}, "ground_truth": 0}, {"key": "b171bad51c8ec09118fd553347c41d9301f195ee", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9993231787069315, "res": {"Yes": 0.9993231787069315, "No": 0.0006767709318424338}, "ground_truth": 1}, {"key": "b171bad51c8ec09118fd553347c41d9301f195ee", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995700192682222, "res": {"Yes": 0.9995700192682222, "No": 0.0004298565541207078}, "ground_truth": 0}, {"key": "b171bad51c8ec09118fd553347c41d9301f195ee", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9994921333079695, "res": {"Yes": 0.9994921333079695, "No": 0.0005077593211081002}, "ground_truth": 0}, {"key": "1df14bd442bd39ed653aa77a1e77105d4b321db0", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9929548562046011, "res": {"Yes": 0.9929548562046011, "No": 0.007045073505317333}, "ground_truth": 0}, {"key": "1df14bd442bd39ed653aa77a1e77105d4b321db0", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9966566616441809, "res": {"Yes": 0.9966566616441809, "No": 0.0033433231885934985}, "ground_truth": 0}, {"key": "1df14bd442bd39ed653aa77a1e77105d4b321db0", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998673872338504, "res": {"Yes": 0.9998673872338504, "No": 0.00013255869890956652}, "ground_truth": 1}, {"key": "1df14bd442bd39ed653aa77a1e77105d4b321db0", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999647611309035, "res": {"Yes": 0.9999647611309035, "No": 3.514943548525527e-05}, "ground_truth": 0}, {"key": "1df14bd442bd39ed653aa77a1e77105d4b321db0", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997733634559426, "res": {"Yes": 0.9997733634559426, "No": 0.0002265730836420626}, "ground_truth": 0}, {"key": "d4f57e3c18c38696345fb7a3d76a151bb9c5123b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9993402024921669, "res": {"Yes": 0.9993402024921669, "No": 0.0006597947870528228}, "ground_truth": 0}, {"key": "d4f57e3c18c38696345fb7a3d76a151bb9c5123b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996631641019533, "res": {"Yes": 0.9996631641019533, "No": 0.0003368071656969941}, "ground_truth": 0}, {"key": "d4f57e3c18c38696345fb7a3d76a151bb9c5123b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998726315120546, "res": {"Yes": 0.9998726315120546, "No": 0.00012731388312761676}, "ground_truth": 1}, {"key": "d4f57e3c18c38696345fb7a3d76a151bb9c5123b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995888345316984, "res": {"Yes": 0.9995888345316984, "No": 0.0004110711270192197}, "ground_truth": 0}, {"key": "d4f57e3c18c38696345fb7a3d76a151bb9c5123b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9991467786605043, "res": {"Yes": 0.9991467786605043, "No": 0.0008531387180113188}, "ground_truth": 0}, {"key": "28bbbf57c580be154f0602cbfe8b63603ef5a3a4", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9980078053592545, "res": {"Yes": 0.9980078053592545, "No": 0.001992221362979582}, "ground_truth": 0}, {"key": "28bbbf57c580be154f0602cbfe8b63603ef5a3a4", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9951347772241929, "res": {"Yes": 0.9951347772241929, "No": 0.004865275188452098}, "ground_truth": 0}, {"key": "28bbbf57c580be154f0602cbfe8b63603ef5a3a4", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999621949079265, "res": {"Yes": 0.999621949079265, "No": 0.000377969898164927}, "ground_truth": 1}, {"key": "28bbbf57c580be154f0602cbfe8b63603ef5a3a4", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9987985869751866, "res": {"Yes": 0.9987985869751866, "No": 0.0012013464614605896}, "ground_truth": 0}, {"key": "28bbbf57c580be154f0602cbfe8b63603ef5a3a4", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9960101955041312, "res": {"Yes": 0.9960101955041312, "No": 0.003989787530497912}, "ground_truth": 0}, {"key": "05850904cabf491c9ab7c0e0d85fd932eb8c0bb0", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9986935900973654, "res": {"Yes": 0.9986935900973654, "No": 0.0013063264730539992}, "ground_truth": 0}, {"key": "05850904cabf491c9ab7c0e0d85fd932eb8c0bb0", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9991615363572497, "res": {"Yes": 0.9991615363572497, "No": 0.0008384069121313544}, "ground_truth": 0}, {"key": "05850904cabf491c9ab7c0e0d85fd932eb8c0bb0", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9923190381149731, "res": {"Yes": 0.9923190381149731, "No": 0.0076808927122770125}, "ground_truth": 1}, {"key": "05850904cabf491c9ab7c0e0d85fd932eb8c0bb0", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995837145508416, "res": {"Yes": 0.9995837145508416, "No": 0.00041617155120710677}, "ground_truth": 0}, {"key": "05850904cabf491c9ab7c0e0d85fd932eb8c0bb0", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9993490140833357, "res": {"Yes": 0.9993490140833357, "No": 0.0006509826938870001}, "ground_truth": 0}, {"key": "df79c0f955e4ccf51ad45780f865e334d3577bf5", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.996827923897988, "res": {"Yes": 0.996827923897988, "No": 0.0031721036474579895}, "ground_truth": 0}, {"key": "df79c0f955e4ccf51ad45780f865e334d3577bf5", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998050612730233, "res": {"Yes": 0.9998050612730233, "No": 0.00019481269489767577}, "ground_truth": 0}, {"key": "df79c0f955e4ccf51ad45780f865e334d3577bf5", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997521612171065, "res": {"Yes": 0.9997521612171065, "No": 0.00024781451872232856}, "ground_truth": 1}, {"key": "df79c0f955e4ccf51ad45780f865e334d3577bf5", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9982706290881381, "res": {"Yes": 0.9982706290881381, "No": 0.001729354087526238}, "ground_truth": 0}, {"key": "df79c0f955e4ccf51ad45780f865e334d3577bf5", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9984616805282405, "res": {"Yes": 0.9984616805282405, "No": 0.0015383251797197468}, "ground_truth": 0}, {"key": "ffb8b9be838da033f6f3383f324d425556723147", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9976016602235727, "res": {"Yes": 0.9976016602235727, "No": 0.0023983089359581058}, "ground_truth": 0}, {"key": "ffb8b9be838da033f6f3383f324d425556723147", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9986913319538417, "res": {"Yes": 0.9986913319538417, "No": 0.001308653150560134}, "ground_truth": 0}, {"key": "ffb8b9be838da033f6f3383f324d425556723147", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999108844260287, "res": {"Yes": 0.9999108844260287, "No": 8.898123973017223e-05}, "ground_truth": 1}, {"key": "ffb8b9be838da033f6f3383f324d425556723147", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9992997227486438, "res": {"Yes": 0.9992997227486438, "No": 0.0007001660757423164}, "ground_truth": 0}, {"key": "ffb8b9be838da033f6f3383f324d425556723147", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998036311128943, "res": {"Yes": 0.9998036311128943, "No": 0.00019627202074082447}, "ground_truth": 0}, {"key": "d70dcffc2c07e08cc121d7126ada53dbc479626e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9806883636554291, "res": {"Yes": 0.9806883636554291, "No": 0.019311625633304505}, "ground_truth": 0}, {"key": "d70dcffc2c07e08cc121d7126ada53dbc479626e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9973708508824438, "res": {"Yes": 0.9973708508824438, "No": 0.0026291123480737004}, "ground_truth": 0}, {"key": "d70dcffc2c07e08cc121d7126ada53dbc479626e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9979503371336442, "res": {"Yes": 0.9979503371336442, "No": 0.0020496504757554963}, "ground_truth": 1}, {"key": "d70dcffc2c07e08cc121d7126ada53dbc479626e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9982859540575031, "res": {"Yes": 0.9982859540575031, "No": 0.0017140578635711223}, "ground_truth": 0}, {"key": "d70dcffc2c07e08cc121d7126ada53dbc479626e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9109002069850811, "res": {"Yes": 0.9109002069850811, "No": 0.0890994248172853}, "ground_truth": 0}, {"key": "4dc0cef39980df6382f9aee5e150e904fe62965e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9989561823570319, "res": {"Yes": 0.9989561823570319, "No": 0.0010437492503781933}, "ground_truth": 0}, {"key": "4dc0cef39980df6382f9aee5e150e904fe62965e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9974418895552825, "res": {"Yes": 0.9974418895552825, "No": 0.0025581140275030088}, "ground_truth": 0}, {"key": "4dc0cef39980df6382f9aee5e150e904fe62965e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997696689802632, "res": {"Yes": 0.9997696689802632, "No": 0.00023019725181433289}, "ground_truth": 1}, {"key": "4dc0cef39980df6382f9aee5e150e904fe62965e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995095209541931, "res": {"Yes": 0.9995095209541931, "No": 0.0004904775743419852}, "ground_truth": 0}, {"key": "4dc0cef39980df6382f9aee5e150e904fe62965e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997821825756604, "res": {"Yes": 0.9997821825756604, "No": 0.0002177101428191388}, "ground_truth": 0}, {"key": "6cb082c504e00c93686ff1e9287a84ba13c12b79", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9962962295409952, "res": {"Yes": 0.9962962295409952, "No": 0.0037038165165268694}, "ground_truth": 0}, {"key": "6cb082c504e00c93686ff1e9287a84ba13c12b79", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9993880662079002, "res": {"Yes": 0.9993880662079002, "No": 0.0006118221024374395}, "ground_truth": 0}, {"key": "6cb082c504e00c93686ff1e9287a84ba13c12b79", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9988745852547103, "res": {"Yes": 0.9988745852547103, "No": 0.0011253704597238223}, "ground_truth": 1}, {"key": "6cb082c504e00c93686ff1e9287a84ba13c12b79", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994823672680727, "res": {"Yes": 0.9994823672680727, "No": 0.0005175811155933802}, "ground_truth": 0}, {"key": "6cb082c504e00c93686ff1e9287a84ba13c12b79", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998446263017435, "res": {"Yes": 0.9998446263017435, "No": 0.000155346104201002}, "ground_truth": 0}, {"key": "19fe2a8e0bef37b495afcc83c2f46b60c56039bf", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9833274267788477, "res": {"Yes": 0.9833274267788477, "No": 0.016672543739657757}, "ground_truth": 0}, {"key": "19fe2a8e0bef37b495afcc83c2f46b60c56039bf", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994527160145436, "res": {"Yes": 0.9994527160145436, "No": 0.0005472552106992292}, "ground_truth": 0}, {"key": "19fe2a8e0bef37b495afcc83c2f46b60c56039bf", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999123106895666, "res": {"Yes": 0.999123106895666, "No": 0.0008768514186120192}, "ground_truth": 1}, {"key": "19fe2a8e0bef37b495afcc83c2f46b60c56039bf", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9990232829988661, "res": {"Yes": 0.9990232829988661, "No": 0.0009767136890453695}, "ground_truth": 0}, {"key": "19fe2a8e0bef37b495afcc83c2f46b60c56039bf", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9988783916733449, "res": {"Yes": 0.9988783916733449, "No": 0.0011216029629166303}, "ground_truth": 0}, {"key": "503c6aaeaf0e61372a896ebe1fdf2a2492844fe4", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9952436325120967, "res": {"Yes": 0.9952436325120967, "No": 0.004756336261817269}, "ground_truth": 0}, {"key": "503c6aaeaf0e61372a896ebe1fdf2a2492844fe4", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9992399739262403, "res": {"Yes": 0.9992399739262403, "No": 0.0007599168793055207}, "ground_truth": 0}, {"key": "503c6aaeaf0e61372a896ebe1fdf2a2492844fe4", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9985984809464417, "res": {"Yes": 0.9985984809464417, "No": 0.0014014950770428886}, "ground_truth": 1}, {"key": "503c6aaeaf0e61372a896ebe1fdf2a2492844fe4", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9973059954608156, "res": {"Yes": 0.9973059954608156, "No": 0.0026939862633127637}, "ground_truth": 0}, {"key": "503c6aaeaf0e61372a896ebe1fdf2a2492844fe4", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.99929603739802, "res": {"Yes": 0.99929603739802, "No": 0.0007038938460756984}, "ground_truth": 0}, {"key": "e4007135fdafe84907e2b018da3a4c7ab853c251", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9822600696302841, "res": {"Yes": 0.9822600696302841, "No": 0.01773993439938777}, "ground_truth": 0}, {"key": "e4007135fdafe84907e2b018da3a4c7ab853c251", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9992941315424348, "res": {"Yes": 0.9992941315424348, "No": 0.000705790787057297}, "ground_truth": 0}, {"key": "e4007135fdafe84907e2b018da3a4c7ab853c251", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9861195611085168, "res": {"Yes": 0.9861195611085168, "No": 0.013880373708811107}, "ground_truth": 1}, {"key": "e4007135fdafe84907e2b018da3a4c7ab853c251", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9993828244812086, "res": {"Yes": 0.9993828244812086, "No": 0.0006171470079247848}, "ground_truth": 0}, {"key": "e4007135fdafe84907e2b018da3a4c7ab853c251", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9990922863914691, "res": {"Yes": 0.9990922863914691, "No": 0.0009076688032789594}, "ground_truth": 0}, {"key": "5b97a507eed113ce329e8c85078fb53033bbe567", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9881302598215561, "res": {"Yes": 0.9881302598215561, "No": 0.011869656610146948}, "ground_truth": 0}, {"key": "5b97a507eed113ce329e8c85078fb53033bbe567", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9993021051419807, "res": {"Yes": 0.9993021051419807, "No": 0.0006978160699688935}, "ground_truth": 0}, {"key": "5b97a507eed113ce329e8c85078fb53033bbe567", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9883598489115539, "res": {"Yes": 0.9883598489115539, "No": 0.01164003069098222}, "ground_truth": 1}, {"key": "5b97a507eed113ce329e8c85078fb53033bbe567", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9768298829040246, "res": {"Yes": 0.9768298829040246, "No": 0.02317006813236221}, "ground_truth": 0}, {"key": "5b97a507eed113ce329e8c85078fb53033bbe567", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9893304863952002, "res": {"Yes": 0.9893304863952002, "No": 0.010669447454149909}, "ground_truth": 0}, {"key": "145a118e355e3a92383b41307a5b473a7c77a8bb", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9909512525732653, "res": {"Yes": 0.9909512525732653, "No": 0.009048624697007283}, "ground_truth": 0}, {"key": "145a118e355e3a92383b41307a5b473a7c77a8bb", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.999404372656435, "res": {"Yes": 0.999404372656435, "No": 0.0005955268988475861}, "ground_truth": 0}, {"key": "145a118e355e3a92383b41307a5b473a7c77a8bb", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9972380574010404, "res": {"Yes": 0.9972380574010404, "No": 0.0027619370933906757}, "ground_truth": 1}, {"key": "145a118e355e3a92383b41307a5b473a7c77a8bb", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9985807739551682, "res": {"Yes": 0.9985807739551682, "No": 0.0014191757977499626}, "ground_truth": 0}, {"key": "145a118e355e3a92383b41307a5b473a7c77a8bb", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.998416162696251, "res": {"Yes": 0.998416162696251, "No": 0.0015837649290187984}, "ground_truth": 0}, {"key": "1f6309c88c5711b2b579e3c0cf699c44d3b56d16", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9924323231576234, "res": {"Yes": 0.9924323231576234, "No": 0.007567637102157062}, "ground_truth": 0}, {"key": "1f6309c88c5711b2b579e3c0cf699c44d3b56d16", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9981620578880024, "res": {"Yes": 0.9981620578880024, "No": 0.0018378742024845543}, "ground_truth": 0}, {"key": "1f6309c88c5711b2b579e3c0cf699c44d3b56d16", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9987469818835858, "res": {"Yes": 0.9987469818835858, "No": 0.0012529275825372361}, "ground_truth": 1}, {"key": "1f6309c88c5711b2b579e3c0cf699c44d3b56d16", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9978988161838195, "res": {"Yes": 0.9978988161838195, "No": 0.002101192370313895}, "ground_truth": 0}, {"key": "1f6309c88c5711b2b579e3c0cf699c44d3b56d16", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9990815788670953, "res": {"Yes": 0.9990815788670953, "No": 0.0009183650393806538}, "ground_truth": 0}, {"key": "d8304c60e76a4b16053e74c50abd886dcdf7397c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9967415708405218, "res": {"Yes": 0.9967415708405218, "No": 0.0032584368254395175}, "ground_truth": 0}, {"key": "d8304c60e76a4b16053e74c50abd886dcdf7397c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9687165150709678, "res": {"Yes": 0.9687165150709678, "No": 0.03128340230120065}, "ground_truth": 0}, {"key": "d8304c60e76a4b16053e74c50abd886dcdf7397c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999428780448043, "res": {"Yes": 0.999428780448043, "No": 0.0005711686687908138}, "ground_truth": 1}, {"key": "d8304c60e76a4b16053e74c50abd886dcdf7397c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9969641587138849, "res": {"Yes": 0.9969641587138849, "No": 0.0030358373338043275}, "ground_truth": 0}, {"key": "d8304c60e76a4b16053e74c50abd886dcdf7397c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9988370039047572, "res": {"Yes": 0.9988370039047572, "No": 0.0011629694198352918}, "ground_truth": 0}, {"key": "d47e5724a866cf365428dbded7740555e5f80f2b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9705170036710213, "res": {"Yes": 0.9705170036710213, "No": 0.029482898370235777}, "ground_truth": 0}, {"key": "d47e5724a866cf365428dbded7740555e5f80f2b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9986302239008594, "res": {"Yes": 0.9986302239008594, "No": 0.0013696788262774861}, "ground_truth": 0}, {"key": "d47e5724a866cf365428dbded7740555e5f80f2b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9983259809356033, "res": {"Yes": 0.9983259809356033, "No": 0.0016739397486597873}, "ground_truth": 1}, {"key": "d47e5724a866cf365428dbded7740555e5f80f2b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995303532785633, "res": {"Yes": 0.9995303532785633, "No": 0.0004695630389805322}, "ground_truth": 0}, {"key": "d47e5724a866cf365428dbded7740555e5f80f2b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9993518731250197, "res": {"Yes": 0.9993518731250197, "No": 0.0006480705461643908}, "ground_truth": 0}, {"key": "0a078513cee66db2ccaaca1b7b0755c96f0c5c8a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9854834480647168, "res": {"Yes": 0.9854834480647168, "No": 0.014516505015072734}, "ground_truth": 0}, {"key": "0a078513cee66db2ccaaca1b7b0755c96f0c5c8a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999293595151566, "res": {"Yes": 0.9999293595151566, "No": 7.054956821171887e-05}, "ground_truth": 0}, {"key": "0a078513cee66db2ccaaca1b7b0755c96f0c5c8a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9970747261758998, "res": {"Yes": 0.9970747261758998, "No": 0.002925266474660088}, "ground_truth": 1}, {"key": "0a078513cee66db2ccaaca1b7b0755c96f0c5c8a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995590572738309, "res": {"Yes": 0.9995590572738309, "No": 0.0004408603937922351}, "ground_truth": 0}, {"key": "0a078513cee66db2ccaaca1b7b0755c96f0c5c8a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998043462027011, "res": {"Yes": 0.9998043462027011, "No": 0.00019556084487066402}, "ground_truth": 0}, {"key": "f0d93bdd383762936746b48f7e67ffe72cfb1ede", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9949452159361697, "res": {"Yes": 0.9949452159361697, "No": 0.0050547645764888365}, "ground_truth": 0}, {"key": "f0d93bdd383762936746b48f7e67ffe72cfb1ede", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9947372092635387, "res": {"Yes": 0.9947372092635387, "No": 0.005262768815226744}, "ground_truth": 0}, {"key": "f0d93bdd383762936746b48f7e67ffe72cfb1ede", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9985025693697336, "res": {"Yes": 0.9985025693697336, "No": 0.0014973785995632245}, "ground_truth": 1}, {"key": "f0d93bdd383762936746b48f7e67ffe72cfb1ede", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9843063028346035, "res": {"Yes": 0.9843063028346035, "No": 0.015693673651008152}, "ground_truth": 0}, {"key": "f0d93bdd383762936746b48f7e67ffe72cfb1ede", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9980872358693834, "res": {"Yes": 0.9980872358693834, "No": 0.001912686845580638}, "ground_truth": 0}, {"key": "38649fdccec39afa649297349b2ad2e5e1a39d82", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9637636555712995, "res": {"Yes": 0.9637636555712995, "No": 0.036236031857802775}, "ground_truth": 0}, {"key": "38649fdccec39afa649297349b2ad2e5e1a39d82", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9977660016968883, "res": {"Yes": 0.9977660016968883, "No": 0.0022340057600081023}, "ground_truth": 0}, {"key": "38649fdccec39afa649297349b2ad2e5e1a39d82", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.998996514328831, "res": {"Yes": 0.998996514328831, "No": 0.0010034251313849477}, "ground_truth": 1}, {"key": "38649fdccec39afa649297349b2ad2e5e1a39d82", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997971954076322, "res": {"Yes": 0.9997971954076322, "No": 0.0002027670043354384}, "ground_truth": 0}, {"key": "38649fdccec39afa649297349b2ad2e5e1a39d82", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9845106512675784, "res": {"Yes": 0.9845106512675784, "No": 0.015489157747230692}, "ground_truth": 0}, {"key": "d86d9a4e0b39556578f24ff54ed82716f0375fdb", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9763972557958538, "res": {"Yes": 0.9763972557958538, "No": 0.023602633126915754}, "ground_truth": 0}, {"key": "d86d9a4e0b39556578f24ff54ed82716f0375fdb", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9852260359640863, "res": {"Yes": 0.9852260359640863, "No": 0.014773911354427923}, "ground_truth": 0}, {"key": "d86d9a4e0b39556578f24ff54ed82716f0375fdb", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999705695416093, "res": {"Yes": 0.999705695416093, "No": 0.00029427113138791154}, "ground_truth": 1}, {"key": "d86d9a4e0b39556578f24ff54ed82716f0375fdb", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999362728606604, "res": {"Yes": 0.9999362728606604, "No": 6.369439435276315e-05}, "ground_truth": 0}, {"key": "d86d9a4e0b39556578f24ff54ed82716f0375fdb", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9976331008336496, "res": {"Yes": 0.9976331008336496, "No": 0.0023669184545865285}, "ground_truth": 0}, {"key": "d10d57811499caa54276d3e14f89c45cb1a8958f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9933225360084741, "res": {"Yes": 0.9933225360084741, "No": 0.006677397253965488}, "ground_truth": 0}, {"key": "d10d57811499caa54276d3e14f89c45cb1a8958f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996748385564448, "res": {"Yes": 0.9996748385564448, "No": 0.00032513242405255913}, "ground_truth": 0}, {"key": "d10d57811499caa54276d3e14f89c45cb1a8958f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9986679090218918, "res": {"Yes": 0.9986679090218918, "No": 0.001332006935560824}, "ground_truth": 1}, {"key": "d10d57811499caa54276d3e14f89c45cb1a8958f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.987570939814732, "res": {"Yes": 0.987570939814732, "No": 0.012428980451917376}, "ground_truth": 0}, {"key": "d10d57811499caa54276d3e14f89c45cb1a8958f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9989928264401058, "res": {"Yes": 0.9989928264401058, "No": 0.0010070839523017355}, "ground_truth": 0}, {"key": "66646c49afb65308491ec8ca6db79aa76263cc69", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.999082055269269, "res": {"Yes": 0.999082055269269, "No": 0.0009178821063924503}, "ground_truth": 0}, {"key": "66646c49afb65308491ec8ca6db79aa76263cc69", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999577283874698, "res": {"Yes": 0.9999577283874698, "No": 4.217765413653322e-05}, "ground_truth": 0}, {"key": "66646c49afb65308491ec8ca6db79aa76263cc69", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9991874866678676, "res": {"Yes": 0.9991874866678676, "No": 0.0008124097718156042}, "ground_truth": 1}, {"key": "66646c49afb65308491ec8ca6db79aa76263cc69", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9949161865446224, "res": {"Yes": 0.9949161865446224, "No": 0.005083855238490667}, "ground_truth": 0}, {"key": "66646c49afb65308491ec8ca6db79aa76263cc69", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.999745845032847, "res": {"Yes": 0.999745845032847, "No": 0.0002540935631283862}, "ground_truth": 0}, {"key": "7fbd6d867def06f6ae4c051562dc36ab8d0038c0", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9998437919917416, "res": {"Yes": 0.9998437919917416, "No": 0.0001560826317884294}, "ground_truth": 0}, {"key": "7fbd6d867def06f6ae4c051562dc36ab8d0038c0", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999719131244437, "res": {"Yes": 0.9999719131244437, "No": 2.7956606997029234e-05}, "ground_truth": 0}, {"key": "7fbd6d867def06f6ae4c051562dc36ab8d0038c0", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999224462174501, "res": {"Yes": 0.9999224462174501, "No": 7.744821798130433e-05}, "ground_truth": 1}, {"key": "7fbd6d867def06f6ae4c051562dc36ab8d0038c0", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999753699393249, "res": {"Yes": 0.9999753699393249, "No": 2.4512680570569933e-05}, "ground_truth": 0}, {"key": "7fbd6d867def06f6ae4c051562dc36ab8d0038c0", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998497475990228, "res": {"Yes": 0.9998497475990228, "No": 0.00015023407202280147}, "ground_truth": 0}, {"key": "5c243b51bf4de7f6974338d71ac26317254007bb", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9871104016785968, "res": {"Yes": 0.9871104016785968, "No": 0.012889516144661653}, "ground_truth": 0}, {"key": "5c243b51bf4de7f6974338d71ac26317254007bb", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9943952495493831, "res": {"Yes": 0.9943952495493831, "No": 0.005604750708921134}, "ground_truth": 0}, {"key": "5c243b51bf4de7f6974338d71ac26317254007bb", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9986052624517523, "res": {"Yes": 0.9986052624517523, "No": 0.0013946724418365643}, "ground_truth": 1}, {"key": "5c243b51bf4de7f6974338d71ac26317254007bb", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9991107317036185, "res": {"Yes": 0.9991107317036185, "No": 0.0008892736962811476}, "ground_truth": 0}, {"key": "5c243b51bf4de7f6974338d71ac26317254007bb", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9967919112489708, "res": {"Yes": 0.9967919112489708, "No": 0.0032081061511972106}, "ground_truth": 0}, {"key": "56b79c03a77d5261382e4705f3537396473f55cf", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9962871138707158, "res": {"Yes": 0.9962871138707158, "No": 0.0037128503831763662}, "ground_truth": 0}, {"key": "56b79c03a77d5261382e4705f3537396473f55cf", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9964980159996601, "res": {"Yes": 0.9964980159996601, "No": 0.003501972142115708}, "ground_truth": 0}, {"key": "56b79c03a77d5261382e4705f3537396473f55cf", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9945020577102544, "res": {"Yes": 0.9945020577102544, "No": 0.005497962554563996}, "ground_truth": 1}, {"key": "56b79c03a77d5261382e4705f3537396473f55cf", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999879179579375, "res": {"Yes": 0.999879179579375, "No": 0.00012072190057084577}, "ground_truth": 0}, {"key": "56b79c03a77d5261382e4705f3537396473f55cf", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997278536785302, "res": {"Yes": 0.9997278536785302, "No": 0.00027210432973728523}, "ground_truth": 0}, {"key": "7382d171d71d3219c5ad0a13ea0ac92ac6b6998b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9980075672346205, "res": {"Yes": 0.9980075672346205, "No": 0.0019923648080816788}, "ground_truth": 0}, {"key": "7382d171d71d3219c5ad0a13ea0ac92ac6b6998b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.998456573709343, "res": {"Yes": 0.998456573709343, "No": 0.0015433729968184113}, "ground_truth": 0}, {"key": "7382d171d71d3219c5ad0a13ea0ac92ac6b6998b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9957691210932578, "res": {"Yes": 0.9957691210932578, "No": 0.004230911603673662}, "ground_truth": 1}, {"key": "7382d171d71d3219c5ad0a13ea0ac92ac6b6998b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995990818891528, "res": {"Yes": 0.9995990818891528, "No": 0.00040080441210828716}, "ground_truth": 0}, {"key": "7382d171d71d3219c5ad0a13ea0ac92ac6b6998b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9991896306267579, "res": {"Yes": 0.9991896306267579, "No": 0.0008103226956034345}, "ground_truth": 0}, {"key": "ef1a5fa8dbba16e4d7ef3fc8cd540c2f55691fc8", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9981898458088714, "res": {"Yes": 0.9981898458088714, "No": 0.001810155546688224}, "ground_truth": 0}, {"key": "ef1a5fa8dbba16e4d7ef3fc8cd540c2f55691fc8", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9676553969240145, "res": {"Yes": 0.9676553969240145, "No": 0.03234456753362192}, "ground_truth": 0}, {"key": "ef1a5fa8dbba16e4d7ef3fc8cd540c2f55691fc8", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9970836002801604, "res": {"Yes": 0.9970836002801604, "No": 0.002916323883046651}, "ground_truth": 1}, {"key": "ef1a5fa8dbba16e4d7ef3fc8cd540c2f55691fc8", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9948449209623623, "res": {"Yes": 0.9948449209623623, "No": 0.00515508331327385}, "ground_truth": 0}, {"key": "ef1a5fa8dbba16e4d7ef3fc8cd540c2f55691fc8", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.979339745947146, "res": {"Yes": 0.979339745947146, "No": 0.020660285425876338}, "ground_truth": 0}, {"key": "1ba0f66a7f4f23171634a3455893f4d5b8b94db4", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.989714743749392, "res": {"Yes": 0.989714743749392, "No": 0.010285156907894053}, "ground_truth": 0}, {"key": "1ba0f66a7f4f23171634a3455893f4d5b8b94db4", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999160097413793, "res": {"Yes": 0.9999160097413793, "No": 8.387868745020441e-05}, "ground_truth": 0}, {"key": "1ba0f66a7f4f23171634a3455893f4d5b8b94db4", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994789157815629, "res": {"Yes": 0.9994789157815629, "No": 0.0005210482031756357}, "ground_truth": 1}, {"key": "1ba0f66a7f4f23171634a3455893f4d5b8b94db4", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998539191008537, "res": {"Yes": 0.9998539191008537, "No": 0.00014599458879364723}, "ground_truth": 0}, {"key": "1ba0f66a7f4f23171634a3455893f4d5b8b94db4", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997850428466002, "res": {"Yes": 0.9997850428466002, "No": 0.00021491090737926327}, "ground_truth": 0}, {"key": "3131e8ba238a31d83cad00bc7b02933b0a3587b3", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9927817521976597, "res": {"Yes": 0.9927817521976597, "No": 0.007218175924736674}, "ground_truth": 0}, {"key": "3131e8ba238a31d83cad00bc7b02933b0a3587b3", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9993485375938393, "res": {"Yes": 0.9993485375938393, "No": 0.000651399781860599}, "ground_truth": 0}, {"key": "3131e8ba238a31d83cad00bc7b02933b0a3587b3", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9972292825425316, "res": {"Yes": 0.9972292825425316, "No": 0.0027707240584439992}, "ground_truth": 1}, {"key": "3131e8ba238a31d83cad00bc7b02933b0a3587b3", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9985716157133125, "res": {"Yes": 0.9985716157133125, "No": 0.00142831335908905}, "ground_truth": 0}, {"key": "3131e8ba238a31d83cad00bc7b02933b0a3587b3", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9984433808902581, "res": {"Yes": 0.9984433808902581, "No": 0.001556591163890205}, "ground_truth": 0}, {"key": "112bc4ab4b6e7d6dd35fc33cd946fbfc4c9356d6", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9982472271037397, "res": {"Yes": 0.9982472271037397, "No": 0.0017527110821558276}, "ground_truth": 0}, {"key": "112bc4ab4b6e7d6dd35fc33cd946fbfc4c9356d6", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9979372773424222, "res": {"Yes": 0.9979372773424222, "No": 0.0020626995353969376}, "ground_truth": 0}, {"key": "112bc4ab4b6e7d6dd35fc33cd946fbfc4c9356d6", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999169632877386, "res": {"Yes": 0.9999169632877386, "No": 8.29142182228769e-05}, "ground_truth": 1}, {"key": "112bc4ab4b6e7d6dd35fc33cd946fbfc4c9356d6", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994052065597914, "res": {"Yes": 0.9994052065597914, "No": 0.0005947460798855422}, "ground_truth": 0}, {"key": "112bc4ab4b6e7d6dd35fc33cd946fbfc4c9356d6", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999820452021894, "res": {"Yes": 0.9999820452021894, "No": 1.782540270095809e-05}, "ground_truth": 0}, {"key": "3b1d3309825780556178569cd40d880a793f8da7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9997504927921019, "res": {"Yes": 0.9997504927921019, "No": 0.00024947022214892524}, "ground_truth": 0}, {"key": "3b1d3309825780556178569cd40d880a793f8da7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996338649837903, "res": {"Yes": 0.9996338649837903, "No": 0.00036604510296886413}, "ground_truth": 0}, {"key": "3b1d3309825780556178569cd40d880a793f8da7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9988643599278064, "res": {"Yes": 0.9988643599278064, "No": 0.0011356442135905002}, "ground_truth": 1}, {"key": "3b1d3309825780556178569cd40d880a793f8da7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998777566423283, "res": {"Yes": 0.9998777566423283, "No": 0.00012213030615358105}, "ground_truth": 0}, {"key": "3b1d3309825780556178569cd40d880a793f8da7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9994490263417625, "res": {"Yes": 0.9994490263417625, "No": 0.0005509564445166381}, "ground_truth": 0}, {"key": "c977693b405a89cec98e53b05199e608fd6adeca", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9881024413819395, "res": {"Yes": 0.9881024413819395, "No": 0.011897471267203476}, "ground_truth": 0}, {"key": "c977693b405a89cec98e53b05199e608fd6adeca", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999577283874698, "res": {"Yes": 0.9999577283874698, "No": 4.2159479484519195e-05}, "ground_truth": 0}, {"key": "c977693b405a89cec98e53b05199e608fd6adeca", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.998240098346271, "res": {"Yes": 0.998240098346271, "No": 0.0017598202602590273}, "ground_truth": 1}, {"key": "c977693b405a89cec98e53b05199e608fd6adeca", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996716210981578, "res": {"Yes": 0.9996716210981578, "No": 0.0003283398033565443}, "ground_truth": 0}, {"key": "c977693b405a89cec98e53b05199e608fd6adeca", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998990843823236, "res": {"Yes": 0.9998990843823236, "No": 0.00010082008159464596}, "ground_truth": 0}, {"key": "c84e272a80fa95f863e016bc54df0841fd83fc12", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.99643682104222, "res": {"Yes": 0.99643682104222, "No": 0.0035632294373600408}, "ground_truth": 0}, {"key": "c84e272a80fa95f863e016bc54df0841fd83fc12", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9957285832613173, "res": {"Yes": 0.9957285832613173, "No": 0.004271459956466762}, "ground_truth": 0}, {"key": "c84e272a80fa95f863e016bc54df0841fd83fc12", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9952722043764174, "res": {"Yes": 0.9952722043764174, "No": 0.004727730506377584}, "ground_truth": 1}, {"key": "c84e272a80fa95f863e016bc54df0841fd83fc12", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9986000283758467, "res": {"Yes": 0.9986000283758467, "No": 0.0013999580599335097}, "ground_truth": 0}, {"key": "c84e272a80fa95f863e016bc54df0841fd83fc12", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9963770514725258, "res": {"Yes": 0.9963770514725258, "No": 0.0036229100314087957}, "ground_truth": 0}, {"key": "f51cff1d73cd0f3954376d7b3eab1d5b2ccb354c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.979144151181748, "res": {"Yes": 0.979144151181748, "No": 0.020855841544033082}, "ground_truth": 0}, {"key": "f51cff1d73cd0f3954376d7b3eab1d5b2ccb354c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995350000659958, "res": {"Yes": 0.9995350000659958, "No": 0.0004649877020178935}, "ground_truth": 0}, {"key": "f51cff1d73cd0f3954376d7b3eab1d5b2ccb354c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994475966809253, "res": {"Yes": 0.9994475966809253, "No": 0.0005523572107807796}, "ground_truth": 1}, {"key": "f51cff1d73cd0f3954376d7b3eab1d5b2ccb354c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998953894470656, "res": {"Yes": 0.9998953894470656, "No": 0.00010453234635058032}, "ground_truth": 0}, {"key": "f51cff1d73cd0f3954376d7b3eab1d5b2ccb354c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.999582999628833, "res": {"Yes": 0.999582999628833, "No": 0.0004168942355958715}, "ground_truth": 0}, {"key": "a98f1a0258f641788e570264b79e5c816f80054f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9986468714051762, "res": {"Yes": 0.9986468714051762, "No": 0.001353088123480287}, "ground_truth": 0}, {"key": "a98f1a0258f641788e570264b79e5c816f80054f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9985405923725634, "res": {"Yes": 0.9985405923725634, "No": 0.0014593478808425223}, "ground_truth": 0}, {"key": "a98f1a0258f641788e570264b79e5c816f80054f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9753885066429276, "res": {"Yes": 0.9753885066429276, "No": 0.024611388401956123}, "ground_truth": 1}, {"key": "a98f1a0258f641788e570264b79e5c816f80054f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999652439463162, "res": {"Yes": 0.999652439463162, "No": 0.00034750427182965774}, "ground_truth": 0}, {"key": "a98f1a0258f641788e570264b79e5c816f80054f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9980614659888736, "res": {"Yes": 0.9980614659888736, "No": 0.0019385519541503075}, "ground_truth": 0}, {"key": "d69d50059c96bd729e9e4c54eee7901d43c0181d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9812138444350336, "res": {"Yes": 0.9812138444350336, "No": 0.018786100832431357}, "ground_truth": 0}, {"key": "d69d50059c96bd729e9e4c54eee7901d43c0181d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9927333417657513, "res": {"Yes": 0.9927333417657513, "No": 0.007266559831866499}, "ground_truth": 0}, {"key": "d69d50059c96bd729e9e4c54eee7901d43c0181d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997763428849825, "res": {"Yes": 0.9997763428849825, "No": 0.00022354227766218545}, "ground_truth": 1}, {"key": "d69d50059c96bd729e9e4c54eee7901d43c0181d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9992154552163187, "res": {"Yes": 0.9992154552163187, "No": 0.0007844474291147039}, "ground_truth": 0}, {"key": "d69d50059c96bd729e9e4c54eee7901d43c0181d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9976829393388617, "res": {"Yes": 0.9976829393388617, "No": 0.0023170788570352923}, "ground_truth": 0}, {"key": "379692c378381f092c0bae0516ea8abc5bdb98b8", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9638968713709531, "res": {"Yes": 0.9638968713709531, "No": 0.03610304746114503}, "ground_truth": 0}, {"key": "379692c378381f092c0bae0516ea8abc5bdb98b8", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9909736156225147, "res": {"Yes": 0.9909736156225147, "No": 0.009026315713134368}, "ground_truth": 0}, {"key": "379692c378381f092c0bae0516ea8abc5bdb98b8", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9992728202734226, "res": {"Yes": 0.9992728202734226, "No": 0.0007271272946162314}, "ground_truth": 1}, {"key": "379692c378381f092c0bae0516ea8abc5bdb98b8", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9859351601882299, "res": {"Yes": 0.9859351601882299, "No": 0.014064730475554513}, "ground_truth": 0}, {"key": "379692c378381f092c0bae0516ea8abc5bdb98b8", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9980859270784491, "res": {"Yes": 0.9980859270784491, "No": 0.0019139980591119145}, "ground_truth": 0}, {"key": "88b0d86cd06cd05a090f6a6c3a3ed17bf5f96df3", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9134755214183589, "res": {"Yes": 0.9134755214183589, "No": 0.0865243588054654}, "ground_truth": 0}, {"key": "88b0d86cd06cd05a090f6a6c3a3ed17bf5f96df3", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.988842529478185, "res": {"Yes": 0.988842529478185, "No": 0.011157403615915737}, "ground_truth": 0}, {"key": "88b0d86cd06cd05a090f6a6c3a3ed17bf5f96df3", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9799857129953121, "res": {"Yes": 0.9799857129953121, "No": 0.02001423717358713}, "ground_truth": 1}, {"key": "88b0d86cd06cd05a090f6a6c3a3ed17bf5f96df3", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9971487938310447, "res": {"Yes": 0.9971487938310447, "No": 0.0028512365545793356}, "ground_truth": 0}, {"key": "88b0d86cd06cd05a090f6a6c3a3ed17bf5f96df3", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9934407636234018, "res": {"Yes": 0.9934407636234018, "No": 0.0065592008834082435}, "ground_truth": 0}, {"key": "3b46e80db81819fdcb8b8044941f64d0fa5abd11", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9614943405827563, "res": {"Yes": 0.9614943405827563, "No": 0.03850557280429102}, "ground_truth": 0}, {"key": "3b46e80db81819fdcb8b8044941f64d0fa5abd11", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9979834649443112, "res": {"Yes": 0.9979834649443112, "No": 0.002016471655571151}, "ground_truth": 0}, {"key": "3b46e80db81819fdcb8b8044941f64d0fa5abd11", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9989163317929337, "res": {"Yes": 0.9989163317929337, "No": 0.001083588023851695}, "ground_truth": 1}, {"key": "3b46e80db81819fdcb8b8044941f64d0fa5abd11", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9985829166122335, "res": {"Yes": 0.9985829166122335, "No": 0.001417019864032911}, "ground_truth": 0}, {"key": "3b46e80db81819fdcb8b8044941f64d0fa5abd11", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997850428466002, "res": {"Yes": 0.9997850428466002, "No": 0.00021493798785962747}, "ground_truth": 0}, {"key": "786d56f09dae2902696b680395353986051a1139", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9993965135712305, "res": {"Yes": 0.9993965135712305, "No": 0.0006034067769678588}, "ground_truth": 0}, {"key": "786d56f09dae2902696b680395353986051a1139", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9986890774107047, "res": {"Yes": 0.9986890774107047, "No": 0.0013108384609300473}, "ground_truth": 0}, {"key": "786d56f09dae2902696b680395353986051a1139", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999630647657377, "res": {"Yes": 0.999630647657377, "No": 0.00036931327144231656}, "ground_truth": 1}, {"key": "786d56f09dae2902696b680395353986051a1139", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9923226798332935, "res": {"Yes": 0.9923226798332935, "No": 0.0076770800956057796}, "ground_truth": 0}, {"key": "786d56f09dae2902696b680395353986051a1139", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9938311648779148, "res": {"Yes": 0.9938311648779148, "No": 0.006168807707584356}, "ground_truth": 0}, {"key": "bf1a3fd65fdd6e91ccec0663d80fa567eada14df", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9659687260337056, "res": {"Yes": 0.9659687260337056, "No": 0.03403125880895287}, "ground_truth": 0}, {"key": "bf1a3fd65fdd6e91ccec0663d80fa567eada14df", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9962390734665906, "res": {"Yes": 0.9962390734665906, "No": 0.0037609526097952695}, "ground_truth": 0}, {"key": "bf1a3fd65fdd6e91ccec0663d80fa567eada14df", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9935690248070154, "res": {"Yes": 0.9935690248070154, "No": 0.006430956474042239}, "ground_truth": 1}, {"key": "bf1a3fd65fdd6e91ccec0663d80fa567eada14df", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9964576495794895, "res": {"Yes": 0.9964576495794895, "No": 0.00354231078737332}, "ground_truth": 0}, {"key": "bf1a3fd65fdd6e91ccec0663d80fa567eada14df", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9956718485617401, "res": {"Yes": 0.9956718485617401, "No": 0.0043281481821681525}, "ground_truth": 0}, {"key": "a0bd3047a6de80e3924843f1961ce28347c6a548", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9946267000984873, "res": {"Yes": 0.9946267000984873, "No": 0.005373305556344077}, "ground_truth": 0}, {"key": "a0bd3047a6de80e3924843f1961ce28347c6a548", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9965874003995409, "res": {"Yes": 0.9965874003995409, "No": 0.0034126051696530654}, "ground_truth": 0}, {"key": "a0bd3047a6de80e3924843f1961ce28347c6a548", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9897597015729864, "res": {"Yes": 0.9897597015729864, "No": 0.010240208482079018}, "ground_truth": 1}, {"key": "a0bd3047a6de80e3924843f1961ce28347c6a548", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9872550407291006, "res": {"Yes": 0.9872550407291006, "No": 0.012744913858799578}, "ground_truth": 0}, {"key": "a0bd3047a6de80e3924843f1961ce28347c6a548", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9984563356773242, "res": {"Yes": 0.9984563356773242, "No": 0.0015436549968248475}, "ground_truth": 0}, {"key": "090d322e93af624203dbd5dc83403170071c9e6a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.7782405051436418, "res": {"Yes": 0.7782405051436418, "No": 0.22175919345604425}, "ground_truth": 0}, {"key": "090d322e93af624203dbd5dc83403170071c9e6a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9834363628748994, "res": {"Yes": 0.9834363628748994, "No": 0.016563579500019167}, "ground_truth": 0}, {"key": "090d322e93af624203dbd5dc83403170071c9e6a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9989526135924451, "res": {"Yes": 0.9989526135924451, "No": 0.0010472950307672216}, "ground_truth": 1}, {"key": "090d322e93af624203dbd5dc83403170071c9e6a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9920569877685215, "res": {"Yes": 0.9920569877685215, "No": 0.007943013037103963}, "ground_truth": 0}, {"key": "090d322e93af624203dbd5dc83403170071c9e6a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9815410602090426, "res": {"Yes": 0.9815410602090426, "No": 0.018458930272286882}, "ground_truth": 0}, {"key": "ed5179817cb8f597a760800fcdce2b8e64fd30dc", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9986970426869437, "res": {"Yes": 0.9986970426869437, "No": 0.0013028774987925964}, "ground_truth": 0}, {"key": "ed5179817cb8f597a760800fcdce2b8e64fd30dc", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996572059673049, "res": {"Yes": 0.9996572059673049, "No": 0.00034266954529027683}, "ground_truth": 0}, {"key": "ed5179817cb8f597a760800fcdce2b8e64fd30dc", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996893731045271, "res": {"Yes": 0.9996893731045271, "No": 0.00031053001332189426}, "ground_truth": 1}, {"key": "ed5179817cb8f597a760800fcdce2b8e64fd30dc", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9987598251532275, "res": {"Yes": 0.9987598251532275, "No": 0.0012400907705349697}, "ground_truth": 0}, {"key": "ed5179817cb8f597a760800fcdce2b8e64fd30dc", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.99990659348666, "res": {"Yes": 0.99990659348666, "No": 9.32756938619723e-05}, "ground_truth": 0}, {"key": "571a8023d0deed8c667dd41fed88290309e56927", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9958375650469281, "res": {"Yes": 0.9958375650469281, "No": 0.004162412686965337}, "ground_truth": 0}, {"key": "571a8023d0deed8c667dd41fed88290309e56927", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.998499712757819, "res": {"Yes": 0.998499712757819, "No": 0.0015002042702184704}, "ground_truth": 0}, {"key": "571a8023d0deed8c667dd41fed88290309e56927", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994949926891393, "res": {"Yes": 0.9994949926891393, "No": 0.0005049367924438705}, "ground_truth": 1}, {"key": "571a8023d0deed8c667dd41fed88290309e56927", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998535615431577, "res": {"Yes": 0.9998535615431577, "No": 0.00014636925285161932}, "ground_truth": 0}, {"key": "571a8023d0deed8c667dd41fed88290309e56927", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9988391433160271, "res": {"Yes": 0.9988391433160271, "No": 0.0011607731352133736}, "ground_truth": 0}, {"key": "544d7c0cba7df38cc8dda8d0d91c6845cf5f52bb", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9999248301053981, "res": {"Yes": 0.9999248301053981, "No": 7.509958689061955e-05}, "ground_truth": 0}, {"key": "544d7c0cba7df38cc8dda8d0d91c6845cf5f52bb", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997675238167767, "res": {"Yes": 0.9997675238167767, "No": 0.00023234505156306936}, "ground_truth": 0}, {"key": "544d7c0cba7df38cc8dda8d0d91c6845cf5f52bb", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999593971683173, "res": {"Yes": 0.9999593971683173, "No": 4.0484434138073175e-05}, "ground_truth": 1}, {"key": "544d7c0cba7df38cc8dda8d0d91c6845cf5f52bb", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999490269261713, "res": {"Yes": 0.9999490269261713, "No": 5.0884866341583056e-05}, "ground_truth": 0}, {"key": "544d7c0cba7df38cc8dda8d0d91c6845cf5f52bb", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.999355681512362, "res": {"Yes": 0.999355681512362, "No": 0.0006442304849527591}, "ground_truth": 0}, {"key": "cbd3a0715a91ac8f651aa7b1f2c1fb37a2ef8a69", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9945869533221016, "res": {"Yes": 0.9945869533221016, "No": 0.005413042282013713}, "ground_truth": 0}, {"key": "cbd3a0715a91ac8f651aa7b1f2c1fb37a2ef8a69", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9675955783163889, "res": {"Yes": 0.9675955783163889, "No": 0.03240431778803644}, "ground_truth": 0}, {"key": "cbd3a0715a91ac8f651aa7b1f2c1fb37a2ef8a69", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9983075684801695, "res": {"Yes": 0.9983075684801695, "No": 0.0016924139439413585}, "ground_truth": 1}, {"key": "cbd3a0715a91ac8f651aa7b1f2c1fb37a2ef8a69", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9988534130337261, "res": {"Yes": 0.9988534130337261, "No": 0.00114650935050956}, "ground_truth": 0}, {"key": "cbd3a0715a91ac8f651aa7b1f2c1fb37a2ef8a69", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9922088307335872, "res": {"Yes": 0.9922088307335872, "No": 0.007791078359415997}, "ground_truth": 0}, {"key": "4ca8b3cf75948dffea7b96e71bf1d9621107354b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9552099946382858, "res": {"Yes": 0.9552099946382858, "No": 0.04478992738485042}, "ground_truth": 0}, {"key": "4ca8b3cf75948dffea7b96e71bf1d9621107354b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.962806475885075, "res": {"Yes": 0.962806475885075, "No": 0.03719346519259474}, "ground_truth": 0}, {"key": "4ca8b3cf75948dffea7b96e71bf1d9621107354b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9892237310293098, "res": {"Yes": 0.9892237310293098, "No": 0.0107762385819241}, "ground_truth": 1}, {"key": "4ca8b3cf75948dffea7b96e71bf1d9621107354b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9992521198518722, "res": {"Yes": 0.9992521198518722, "No": 0.0007478702930795811}, "ground_truth": 0}, {"key": "4ca8b3cf75948dffea7b96e71bf1d9621107354b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9840346215960638, "res": {"Yes": 0.9840346215960638, "No": 0.015965315356812317}, "ground_truth": 0}, {"key": "98f04e49605f36e0236b206fcb26a1549a7c00c6", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9996095603214171, "res": {"Yes": 0.9996095603214171, "No": 0.00039034269482345627}, "ground_truth": 0}, {"key": "98f04e49605f36e0236b206fcb26a1549a7c00c6", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998934823934031, "res": {"Yes": 0.9998934823934031, "No": 0.00010643011344500245}, "ground_truth": 0}, {"key": "98f04e49605f36e0236b206fcb26a1549a7c00c6", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999574899895761, "res": {"Yes": 0.9999574899895761, "No": 4.241688686071251e-05}, "ground_truth": 1}, {"key": "98f04e49605f36e0236b206fcb26a1549a7c00c6", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999927452391705, "res": {"Yes": 0.999927452391705, "No": 7.25256692606553e-05}, "ground_truth": 0}, {"key": "98f04e49605f36e0236b206fcb26a1549a7c00c6", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996464850411435, "res": {"Yes": 0.9996464850411435, "No": 0.0003534837650466903}, "ground_truth": 0}, {"key": "fb0003754a764d7f3252120de58b515d1f882556", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9776333930056631, "res": {"Yes": 0.9776333930056631, "No": 0.022366537469850532}, "ground_truth": 0}, {"key": "fb0003754a764d7f3252120de58b515d1f882556", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.991491802333986, "res": {"Yes": 0.991491802333986, "No": 0.008508181916973228}, "ground_truth": 0}, {"key": "fb0003754a764d7f3252120de58b515d1f882556", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9963877007077231, "res": {"Yes": 0.9963877007077231, "No": 0.003612323806573111}, "ground_truth": 1}, {"key": "fb0003754a764d7f3252120de58b515d1f882556", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994704604555806, "res": {"Yes": 0.9994704604555806, "No": 0.0005294347129278914}, "ground_truth": 0}, {"key": "fb0003754a764d7f3252120de58b515d1f882556", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9979187637817852, "res": {"Yes": 0.9979187637817852, "No": 0.0020811950645402105}, "ground_truth": 0}, {"key": "0130f10c1d700cf42cad5fd24b242667342c86be", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9984203246026996, "res": {"Yes": 0.9984203246026996, "No": 0.0015796625985007274}, "ground_truth": 0}, {"key": "0130f10c1d700cf42cad5fd24b242667342c86be", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999100500717374, "res": {"Yes": 0.9999100500717374, "No": 8.986059795938776e-05}, "ground_truth": 0}, {"key": "0130f10c1d700cf42cad5fd24b242667342c86be", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997278536785302, "res": {"Yes": 0.9997278536785302, "No": 0.00027209099695178383}, "ground_truth": 1}, {"key": "0130f10c1d700cf42cad5fd24b242667342c86be", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999087389545426, "res": {"Yes": 0.9999087389545426, "No": 9.115232953569738e-05}, "ground_truth": 0}, {"key": "0130f10c1d700cf42cad5fd24b242667342c86be", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9994411669653781, "res": {"Yes": 0.9994411669653781, "No": 0.0005588242695359403}, "ground_truth": 0}, {"key": "fb8431bc320c2c1a6c32699da0fdae228ed8d73b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9995507204063162, "res": {"Yes": 0.9995507204063162, "No": 0.0004492536085255394}, "ground_truth": 0}, {"key": "fb8431bc320c2c1a6c32699da0fdae228ed8d73b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996461275676244, "res": {"Yes": 0.9996461275676244, "No": 0.00035374685482514686}, "ground_truth": 0}, {"key": "fb8431bc320c2c1a6c32699da0fdae228ed8d73b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9991721330111232, "res": {"Yes": 0.9991721330111232, "No": 0.0008278707622504611}, "ground_truth": 1}, {"key": "fb8431bc320c2c1a6c32699da0fdae228ed8d73b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9992688894916242, "res": {"Yes": 0.9992688894916242, "No": 0.00073108231392891}, "ground_truth": 0}, {"key": "fb8431bc320c2c1a6c32699da0fdae228ed8d73b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9965568537696755, "res": {"Yes": 0.9965568537696755, "No": 0.0034431645608289903}, "ground_truth": 0}, {"key": "875d564015e85f88103edf0b36ce10e292334902", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9292645334022371, "res": {"Yes": 0.9292645334022371, "No": 0.07073540878378531}, "ground_truth": 0}, {"key": "875d564015e85f88103edf0b36ce10e292334902", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994278310511182, "res": {"Yes": 0.9994278310511182, "No": 0.0005721690890736293}, "ground_truth": 0}, {"key": "875d564015e85f88103edf0b36ce10e292334902", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9920585105771665, "res": {"Yes": 0.9920585105771665, "No": 0.007941419034350939}, "ground_truth": 1}, {"key": "875d564015e85f88103edf0b36ce10e292334902", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9971119265313897, "res": {"Yes": 0.9971119265313897, "No": 0.0028880764115749393}, "ground_truth": 0}, {"key": "875d564015e85f88103edf0b36ce10e292334902", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997065295708732, "res": {"Yes": 0.9997065295708732, "No": 0.0002933702778133017}, "ground_truth": 0}, {"key": "53102b470b50905c033a90940fc6fa87190f7343", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9921682254180815, "res": {"Yes": 0.9921682254180815, "No": 0.007831760138913806}, "ground_truth": 0}, {"key": "53102b470b50905c033a90940fc6fa87190f7343", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9989401249647258, "res": {"Yes": 0.9989401249647258, "No": 0.001059821268179771}, "ground_truth": 0}, {"key": "53102b470b50905c033a90940fc6fa87190f7343", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9875063044243987, "res": {"Yes": 0.9875063044243987, "No": 0.01249362206110139}, "ground_truth": 1}, {"key": "53102b470b50905c033a90940fc6fa87190f7343", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9978957344774538, "res": {"Yes": 0.9978957344774538, "No": 0.00210420132899228}, "ground_truth": 0}, {"key": "53102b470b50905c033a90940fc6fa87190f7343", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9992490228246139, "res": {"Yes": 0.9992490228246139, "No": 0.0007509439821456232}, "ground_truth": 0}, {"key": "58776b77856719aff45f09e50d370f52c756dc10", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9996438634816706, "res": {"Yes": 0.9996438634816706, "No": 0.0003560271378862668}, "ground_truth": 0}, {"key": "58776b77856719aff45f09e50d370f52c756dc10", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998992035803005, "res": {"Yes": 0.9998992035803005, "No": 0.00010067943495493558}, "ground_truth": 0}, {"key": "58776b77856719aff45f09e50d370f52c756dc10", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999624963432797, "res": {"Yes": 0.9999624963432797, "No": 3.737393580887252e-05}, "ground_truth": 1}, {"key": "58776b77856719aff45f09e50d370f52c756dc10", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999788267671561, "res": {"Yes": 0.9999788267671561, "No": 2.113713736265604e-05}, "ground_truth": 0}, {"key": "58776b77856719aff45f09e50d370f52c756dc10", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998981308540138, "res": {"Yes": 0.9998981308540138, "No": 0.00010182529574399366}, "ground_truth": 0}, {"key": "1b1479bb6e290e86e4b2a1e862e06909a0a62f8e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9364126577464926, "res": {"Yes": 0.9364126577464926, "No": 0.06358720551636643}, "ground_truth": 0}, {"key": "1b1479bb6e290e86e4b2a1e862e06909a0a62f8e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9908426274781177, "res": {"Yes": 0.9908426274781177, "No": 0.009157253330595512}, "ground_truth": 0}, {"key": "1b1479bb6e290e86e4b2a1e862e06909a0a62f8e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9940044007713514, "res": {"Yes": 0.9940044007713514, "No": 0.005995586882288078}, "ground_truth": 1}, {"key": "1b1479bb6e290e86e4b2a1e862e06909a0a62f8e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.982626309710504, "res": {"Yes": 0.982626309710504, "No": 0.017373648060955555}, "ground_truth": 0}, {"key": "1b1479bb6e290e86e4b2a1e862e06909a0a62f8e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9660382338480713, "res": {"Yes": 0.9660382338480713, "No": 0.03396172920364714}, "ground_truth": 0}, {"key": "137019bb8262f8ab776ec6367a0ae4b020fa638e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9990475441143268, "res": {"Yes": 0.9990475441143268, "No": 0.0009523551843024783}, "ground_truth": 0}, {"key": "137019bb8262f8ab776ec6367a0ae4b020fa638e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999877668918251, "res": {"Yes": 0.9999877668918251, "No": 1.217910231449518e-05}, "ground_truth": 0}, {"key": "137019bb8262f8ab776ec6367a0ae4b020fa638e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999845484373766, "res": {"Yes": 0.9999845484373766, "No": 1.532832705297498e-05}, "ground_truth": 1}, {"key": "137019bb8262f8ab776ec6367a0ae4b020fa638e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999686947220204, "res": {"Yes": 0.9999686947220204, "No": 3.1197393759103834e-05}, "ground_truth": 0}, {"key": "137019bb8262f8ab776ec6367a0ae4b020fa638e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999454510038506, "res": {"Yes": 0.9999454510038506, "No": 5.444715280691238e-05}, "ground_truth": 0}, {"key": "1f738e018a3a193d04ec22612e4fe6b01b4969fe", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9987218910778332, "res": {"Yes": 0.9987218910778332, "No": 0.001278050639231476}, "ground_truth": 0}, {"key": "1f738e018a3a193d04ec22612e4fe6b01b4969fe", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9920759927862752, "res": {"Yes": 0.9920759927862752, "No": 0.007923945721941475}, "ground_truth": 0}, {"key": "1f738e018a3a193d04ec22612e4fe6b01b4969fe", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9818705589621729, "res": {"Yes": 0.9818705589621729, "No": 0.018129478846415317}, "ground_truth": 1}, {"key": "1f738e018a3a193d04ec22612e4fe6b01b4969fe", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9968330153092946, "res": {"Yes": 0.9968330153092946, "No": 0.0031669335298825426}, "ground_truth": 0}, {"key": "1f738e018a3a193d04ec22612e4fe6b01b4969fe", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9983912175800541, "res": {"Yes": 0.9983912175800541, "No": 0.0016087018975721797}, "ground_truth": 0}, {"key": "2bb7b7d5a229ccb9c9b5b4fa1475a54297fc0a25", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9283882322163752, "res": {"Yes": 0.9283882322163752, "No": 0.07161173656092677}, "ground_truth": 0}, {"key": "2bb7b7d5a229ccb9c9b5b4fa1475a54297fc0a25", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9974994106952473, "res": {"Yes": 0.9974994106952473, "No": 0.0025005639313641077}, "ground_truth": 0}, {"key": "2bb7b7d5a229ccb9c9b5b4fa1475a54297fc0a25", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.997536065482047, "res": {"Yes": 0.997536065482047, "No": 0.0024639158147161428}, "ground_truth": 1}, {"key": "2bb7b7d5a229ccb9c9b5b4fa1475a54297fc0a25", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9987756415391756, "res": {"Yes": 0.9987756415391756, "No": 0.0012243539033011648}, "ground_truth": 0}, {"key": "2bb7b7d5a229ccb9c9b5b4fa1475a54297fc0a25", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9982835742464539, "res": {"Yes": 0.9982835742464539, "No": 0.001716376322162547}, "ground_truth": 0}, {"key": "c136c209869b227e534aca60b6df7e0b26bce7fe", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.5994210237839778, "res": {"Yes": 0.5994210237839778, "No": 0.4005789793671437}, "ground_truth": 0}, {"key": "c136c209869b227e534aca60b6df7e0b26bce7fe", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.999852727245697, "res": {"Yes": 0.999852727245697, "No": 0.00014719232396589804}, "ground_truth": 0}, {"key": "c136c209869b227e534aca60b6df7e0b26bce7fe", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996968769710715, "res": {"Yes": 0.9996968769710715, "No": 0.000303077502904193}, "ground_truth": 1}, {"key": "c136c209869b227e534aca60b6df7e0b26bce7fe", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996039599145867, "res": {"Yes": 0.9996039599145867, "No": 0.0003960219179036146}, "ground_truth": 0}, {"key": "c136c209869b227e534aca60b6df7e0b26bce7fe", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997198728829297, "res": {"Yes": 0.9997198728829297, "No": 0.00028005823339735425}, "ground_truth": 0}, {"key": "bfbf2be3d5b9da8425253502393528d0f95d2b5d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.94668288556753, "res": {"Yes": 0.94668288556753, "No": 0.05331699196622318}, "ground_truth": 0}, {"key": "bfbf2be3d5b9da8425253502393528d0f95d2b5d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9912982344400509, "res": {"Yes": 0.9912982344400509, "No": 0.008701637958068597}, "ground_truth": 0}, {"key": "bfbf2be3d5b9da8425253502393528d0f95d2b5d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9945369480401623, "res": {"Yes": 0.9945369480401623, "No": 0.00546297044289298}, "ground_truth": 1}, {"key": "bfbf2be3d5b9da8425253502393528d0f95d2b5d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995271399835534, "res": {"Yes": 0.9995271399835534, "No": 0.000472762410051637}, "ground_truth": 0}, {"key": "bfbf2be3d5b9da8425253502393528d0f95d2b5d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.99951534443347, "res": {"Yes": 0.99951534443347, "No": 0.00048457196649824666}, "ground_truth": 0}, {"key": "de149988c9768a12f4a656f1d86efe42af51b4c8", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.954253581647446, "res": {"Yes": 0.954253581647446, "No": 0.04574628009341271}, "ground_truth": 0}, {"key": "de149988c9768a12f4a656f1d86efe42af51b4c8", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.990665006125071, "res": {"Yes": 0.990665006125071, "No": 0.009334906669028753}, "ground_truth": 0}, {"key": "de149988c9768a12f4a656f1d86efe42af51b4c8", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9571643439570645, "res": {"Yes": 0.9571643439570645, "No": 0.04283560627229562}, "ground_truth": 1}, {"key": "de149988c9768a12f4a656f1d86efe42af51b4c8", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9992592593838862, "res": {"Yes": 0.9992592593838862, "No": 0.000740653095135573}, "ground_truth": 0}, {"key": "de149988c9768a12f4a656f1d86efe42af51b4c8", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997297604212948, "res": {"Yes": 0.9997297604212948, "No": 0.0002701903087585968}, "ground_truth": 0}, {"key": "45986f43dd61461844801c08c8753a71ccbf9083", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.993885807217464, "res": {"Yes": 0.993885807217464, "No": 0.006114229618721199}, "ground_truth": 0}, {"key": "45986f43dd61461844801c08c8753a71ccbf9083", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9714385858142064, "res": {"Yes": 0.9714385858142064, "No": 0.028561289270646717}, "ground_truth": 0}, {"key": "45986f43dd61461844801c08c8753a71ccbf9083", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997257085847705, "res": {"Yes": 0.9997257085847705, "No": 0.00027420542670094717}, "ground_truth": 1}, {"key": "45986f43dd61461844801c08c8753a71ccbf9083", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9977058339307979, "res": {"Yes": 0.9977058339307979, "No": 0.0022940969471192444}, "ground_truth": 0}, {"key": "45986f43dd61461844801c08c8753a71ccbf9083", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9990908572209484, "res": {"Yes": 0.9990908572209484, "No": 0.0009090567813555674}, "ground_truth": 0}, {"key": "e27256e1314f48219ceca70c273dd8429fc4ca4f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9845204737793456, "res": {"Yes": 0.9845204737793456, "No": 0.015479449089305491}, "ground_truth": 0}, {"key": "e27256e1314f48219ceca70c273dd8429fc4ca4f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9832324557025168, "res": {"Yes": 0.9832324557025168, "No": 0.016767558518515847}, "ground_truth": 0}, {"key": "e27256e1314f48219ceca70c273dd8429fc4ca4f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9922082433461333, "res": {"Yes": 0.9922082433461333, "No": 0.007791728941620442}, "ground_truth": 1}, {"key": "e27256e1314f48219ceca70c273dd8429fc4ca4f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9980759550521507, "res": {"Yes": 0.9980759550521507, "No": 0.001923978117969545}, "ground_truth": 0}, {"key": "e27256e1314f48219ceca70c273dd8429fc4ca4f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9943168641631549, "res": {"Yes": 0.9943168641631549, "No": 0.005683104662883195}, "ground_truth": 0}, {"key": "8da9fddf6f4a0d798e454ee7b778ce0d7ce3c447", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.8998476119946691, "res": {"Yes": 0.8998476119946691, "No": 0.10015227512040321}, "ground_truth": 0}, {"key": "8da9fddf6f4a0d798e454ee7b778ce0d7ce3c447", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.990911812507344, "res": {"Yes": 0.990911812507344, "No": 0.009088117390484535}, "ground_truth": 0}, {"key": "8da9fddf6f4a0d798e454ee7b778ce0d7ce3c447", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.94812053991609, "res": {"Yes": 0.94812053991609, "No": 0.05187941477271924}, "ground_truth": 1}, {"key": "8da9fddf6f4a0d798e454ee7b778ce0d7ce3c447", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9838563178861436, "res": {"Yes": 0.9838563178861436, "No": 0.016143683828843604}, "ground_truth": 0}, {"key": "8da9fddf6f4a0d798e454ee7b778ce0d7ce3c447", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9872961745800121, "res": {"Yes": 0.9872961745800121, "No": 0.01270374305755875}, "ground_truth": 0}, {"key": "d2e357c372891291bbdb4f496792ea9d5807cd3a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9130298212464988, "res": {"Yes": 0.9130298212464988, "No": 0.08697012587876998}, "ground_truth": 0}, {"key": "d2e357c372891291bbdb4f496792ea9d5807cd3a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9633765913850434, "res": {"Yes": 0.9633765913850434, "No": 0.036623286416582856}, "ground_truth": 0}, {"key": "d2e357c372891291bbdb4f496792ea9d5807cd3a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9750153982812747, "res": {"Yes": 0.9750153982812747, "No": 0.024984541134350162}, "ground_truth": 1}, {"key": "d2e357c372891291bbdb4f496792ea9d5807cd3a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9879269600914029, "res": {"Yes": 0.9879269600914029, "No": 0.012072918201257873}, "ground_truth": 0}, {"key": "d2e357c372891291bbdb4f496792ea9d5807cd3a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9977374005457267, "res": {"Yes": 0.9977374005457267, "No": 0.002262586615565727}, "ground_truth": 0}, {"key": "2a17cad6a35f54ceab81d03772a75c58aa7147ef", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9500060461874131, "res": {"Yes": 0.9500060461874131, "No": 0.049993834057919226}, "ground_truth": 0}, {"key": "2a17cad6a35f54ceab81d03772a75c58aa7147ef", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9917929551973212, "res": {"Yes": 0.9917929551973212, "No": 0.008206953469187662}, "ground_truth": 0}, {"key": "2a17cad6a35f54ceab81d03772a75c58aa7147ef", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9943620094605591, "res": {"Yes": 0.9943620094605591, "No": 0.005637950872318284}, "ground_truth": 1}, {"key": "2a17cad6a35f54ceab81d03772a75c58aa7147ef", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9902658604464438, "res": {"Yes": 0.9902658604464438, "No": 0.009733997737535156}, "ground_truth": 0}, {"key": "2a17cad6a35f54ceab81d03772a75c58aa7147ef", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.996182881787928, "res": {"Yes": 0.996182881787928, "No": 0.003817036281593525}, "ground_truth": 0}, {"key": "4b25022fda48e7f91ab1459f1a6dca501318672e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9994491455460549, "res": {"Yes": 0.9994491455460549, "No": 0.0005508304001021634}, "ground_truth": 0}, {"key": "4b25022fda48e7f91ab1459f1a6dca501318672e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9953086932172018, "res": {"Yes": 0.9953086932172018, "No": 0.0046913415155225655}, "ground_truth": 0}, {"key": "4b25022fda48e7f91ab1459f1a6dca501318672e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9987413937101063, "res": {"Yes": 0.9987413937101063, "No": 0.0012585198132108978}, "ground_truth": 1}, {"key": "4b25022fda48e7f91ab1459f1a6dca501318672e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999775155557789, "res": {"Yes": 0.9999775155557789, "No": 2.2441177621730367e-05}, "ground_truth": 0}, {"key": "4b25022fda48e7f91ab1459f1a6dca501318672e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9932264386884471, "res": {"Yes": 0.9932264386884471, "No": 0.006773465719020433}, "ground_truth": 0}, {"key": "62066d76f4ef9e06a1129551c41371bb5d2f7465", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9733879976458782, "res": {"Yes": 0.9733879976458782, "No": 0.026611846638213525}, "ground_truth": 0}, {"key": "62066d76f4ef9e06a1129551c41371bb5d2f7465", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9925532782550958, "res": {"Yes": 0.9925532782550958, "No": 0.007446732004075084}, "ground_truth": 0}, {"key": "62066d76f4ef9e06a1129551c41371bb5d2f7465", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9955658549788807, "res": {"Yes": 0.9955658549788807, "No": 0.004434118246548749}, "ground_truth": 1}, {"key": "62066d76f4ef9e06a1129551c41371bb5d2f7465", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9963945676361415, "res": {"Yes": 0.9963945676361415, "No": 0.003605436982206423}, "ground_truth": 0}, {"key": "62066d76f4ef9e06a1129551c41371bb5d2f7465", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9983650754476404, "res": {"Yes": 0.9983650754476404, "No": 0.0016348470931752698}, "ground_truth": 0}, {"key": "16c4a27e42066259797d93d5db0239bce0a15f18", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9999009914516699, "res": {"Yes": 0.9999009914516699, "No": 9.893851117837836e-05}, "ground_truth": 0}, {"key": "16c4a27e42066259797d93d5db0239bce0a15f18", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994380693821807, "res": {"Yes": 0.9994380693821807, "No": 0.0005618169347578761}, "ground_truth": 0}, {"key": "16c4a27e42066259797d93d5db0239bce0a15f18", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998955086436021, "res": {"Yes": 0.9998955086436021, "No": 0.00010443716074668052}, "ground_truth": 1}, {"key": "16c4a27e42066259797d93d5db0239bce0a15f18", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999531988512079, "res": {"Yes": 0.9999531988512079, "No": 4.673271982365091e-05}, "ground_truth": 0}, {"key": "16c4a27e42066259797d93d5db0239bce0a15f18", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9994411669653781, "res": {"Yes": 0.9994411669653781, "No": 0.0005587411226709131}, "ground_truth": 0}, {"key": "8626e8d0c96f4d12eca92423e0c43c6ea3e53545", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9981782069831238, "res": {"Yes": 0.9981782069831238, "No": 0.001821772592415829}, "ground_truth": 0}, {"key": "8626e8d0c96f4d12eca92423e0c43c6ea3e53545", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9986943045630153, "res": {"Yes": 0.9986943045630153, "No": 0.001305699978521603}, "ground_truth": 0}, {"key": "8626e8d0c96f4d12eca92423e0c43c6ea3e53545", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9890498294087223, "res": {"Yes": 0.9890498294087223, "No": 0.010950110844340938}, "ground_truth": 1}, {"key": "8626e8d0c96f4d12eca92423e0c43c6ea3e53545", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.998968674781786, "res": {"Yes": 0.998968674781786, "No": 0.001031299109619538}, "ground_truth": 0}, {"key": "8626e8d0c96f4d12eca92423e0c43c6ea3e53545", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996803164896663, "res": {"Yes": 0.9996803164896663, "No": 0.0003196506107723126}, "ground_truth": 0}, {"key": "f675d806e17d1920e6622d3a925367697c94e58e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.992481643307395, "res": {"Yes": 0.992481643307395, "No": 0.007518261133545313}, "ground_truth": 0}, {"key": "f675d806e17d1920e6622d3a925367697c94e58e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9979372773424222, "res": {"Yes": 0.9979372773424222, "No": 0.002062738933334313}, "ground_truth": 0}, {"key": "f675d806e17d1920e6622d3a925367697c94e58e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9962289082934412, "res": {"Yes": 0.9962289082934412, "No": 0.003771083191839645}, "ground_truth": 1}, {"key": "f675d806e17d1920e6622d3a925367697c94e58e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9926699106136134, "res": {"Yes": 0.9926699106136134, "No": 0.007330071563751958}, "ground_truth": 0}, {"key": "f675d806e17d1920e6622d3a925367697c94e58e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9932778408772223, "res": {"Yes": 0.9932778408772223, "No": 0.00672207763739208}, "ground_truth": 0}, {"key": "835b6e4637bd2c3fe2644f2cefa00270e950ebdf", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9646771241526212, "res": {"Yes": 0.9646771241526212, "No": 0.0353227516498419}, "ground_truth": 0}, {"key": "835b6e4637bd2c3fe2644f2cefa00270e950ebdf", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9904928187460506, "res": {"Yes": 0.9904928187460506, "No": 0.009507065689844}, "ground_truth": 0}, {"key": "835b6e4637bd2c3fe2644f2cefa00270e950ebdf", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9985907582150917, "res": {"Yes": 0.9985907582150917, "No": 0.001409236374031274}, "ground_truth": 1}, {"key": "835b6e4637bd2c3fe2644f2cefa00270e950ebdf", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9956348567540371, "res": {"Yes": 0.9956348567540371, "No": 0.004365178050630634}, "ground_truth": 0}, {"key": "835b6e4637bd2c3fe2644f2cefa00270e950ebdf", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9523514425233218, "res": {"Yes": 0.9523514425233218, "No": 0.04764832031382934}, "ground_truth": 0}, {"key": "aec90720358f66b9d149db8afd08fbb821e3aa52", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9969376224732511, "res": {"Yes": 0.9969376224732511, "No": 0.003062379655642926}, "ground_truth": 0}, {"key": "aec90720358f66b9d149db8afd08fbb821e3aa52", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9863143429007724, "res": {"Yes": 0.9863143429007724, "No": 0.013685510959397391}, "ground_truth": 0}, {"key": "aec90720358f66b9d149db8afd08fbb821e3aa52", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9981172822407366, "res": {"Yes": 0.9981172822407366, "No": 0.0018826374242971577}, "ground_truth": 1}, {"key": "aec90720358f66b9d149db8afd08fbb821e3aa52", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9922425143146021, "res": {"Yes": 0.9922425143146021, "No": 0.007757384892289794}, "ground_truth": 0}, {"key": "aec90720358f66b9d149db8afd08fbb821e3aa52", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9995988435847601, "res": {"Yes": 0.9995988435847601, "No": 0.0004011087381341943}, "ground_truth": 0}, {"key": "0033829349643e2964a8c3ce5fd997116ce43f7c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.993363229292999, "res": {"Yes": 0.993363229292999, "No": 0.006636734026348891}, "ground_truth": 0}, {"key": "0033829349643e2964a8c3ce5fd997116ce43f7c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9975132899986058, "res": {"Yes": 0.9975132899986058, "No": 0.002486748114874495}, "ground_truth": 0}, {"key": "0033829349643e2964a8c3ce5fd997116ce43f7c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9993688936714276, "res": {"Yes": 0.9993688936714276, "No": 0.0006310942755539443}, "ground_truth": 1}, {"key": "0033829349643e2964a8c3ce5fd997116ce43f7c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9978420753743339, "res": {"Yes": 0.9978420753743339, "No": 0.0021579253607023016}, "ground_truth": 0}, {"key": "0033829349643e2964a8c3ce5fd997116ce43f7c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9973158335340626, "res": {"Yes": 0.9973158335340626, "No": 0.0026841539583138028}, "ground_truth": 0}, {"key": "3276a47234f0aa9eb7d4d06940ffb912be059454", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9574028166976609, "res": {"Yes": 0.9574028166976609, "No": 0.04259710013381449}, "ground_truth": 0}, {"key": "3276a47234f0aa9eb7d4d06940ffb912be059454", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996283836364722, "res": {"Yes": 0.9996283836364722, "No": 0.0003715324310645949}, "ground_truth": 0}, {"key": "3276a47234f0aa9eb7d4d06940ffb912be059454", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9924889135598051, "res": {"Yes": 0.9924889135598051, "No": 0.007510979467044091}, "ground_truth": 1}, {"key": "3276a47234f0aa9eb7d4d06940ffb912be059454", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999763710200833, "res": {"Yes": 0.999763710200833, "No": 0.00023623558720289262}, "ground_truth": 0}, {"key": "3276a47234f0aa9eb7d4d06940ffb912be059454", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996964002957203, "res": {"Yes": 0.9996964002957203, "No": 0.0003035333708900317}, "ground_truth": 0}, {"key": "8d3bb51d711ce00e3fff53625a70923985573834", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9985537863762859, "res": {"Yes": 0.9985537863762859, "No": 0.0014462272643425101}, "ground_truth": 0}, {"key": "8d3bb51d711ce00e3fff53625a70923985573834", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9795266051461096, "res": {"Yes": 0.9795266051461096, "No": 0.02047340197311492}, "ground_truth": 0}, {"key": "8d3bb51d711ce00e3fff53625a70923985573834", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9980037678270443, "res": {"Yes": 0.9980037678270443, "No": 0.0019961992129220256}, "ground_truth": 1}, {"key": "8d3bb51d711ce00e3fff53625a70923985573834", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9984962686386998, "res": {"Yes": 0.9984962686386998, "No": 0.0015037459258586301}, "ground_truth": 0}, {"key": "8d3bb51d711ce00e3fff53625a70923985573834", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9827802246267064, "res": {"Yes": 0.9827802246267064, "No": 0.01721978096269263}, "ground_truth": 0}, {"key": "e43fc7e34d05a75894279f55935b25040a6d7017", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.977051636732262, "res": {"Yes": 0.977051636732262, "No": 0.02294826831514977}, "ground_truth": 0}, {"key": "e43fc7e34d05a75894279f55935b25040a6d7017", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9980897269984241, "res": {"Yes": 0.9980897269984241, "No": 0.0019102705456832496}, "ground_truth": 0}, {"key": "e43fc7e34d05a75894279f55935b25040a6d7017", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9787477270291246, "res": {"Yes": 0.9787477270291246, "No": 0.021252252271098664}, "ground_truth": 1}, {"key": "e43fc7e34d05a75894279f55935b25040a6d7017", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9992719864805292, "res": {"Yes": 0.9992719864805292, "No": 0.0007279957119234332}, "ground_truth": 0}, {"key": "e43fc7e34d05a75894279f55935b25040a6d7017", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9993827053447892, "res": {"Yes": 0.9993827053447892, "No": 0.000617196566819284}, "ground_truth": 0}, {"key": "518daa3f4d9f4eb8d3e6dec7852c8b645d1ceb49", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9793541550788286, "res": {"Yes": 0.9793541550788286, "No": 0.02064577667216108}, "ground_truth": 0}, {"key": "518daa3f4d9f4eb8d3e6dec7852c8b645d1ceb49", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.989099399850248, "res": {"Yes": 0.989099399850248, "No": 0.010900520920689966}, "ground_truth": 0}, {"key": "518daa3f4d9f4eb8d3e6dec7852c8b645d1ceb49", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9910761807532362, "res": {"Yes": 0.9910761807532362, "No": 0.008923780493823032}, "ground_truth": 1}, {"key": "518daa3f4d9f4eb8d3e6dec7852c8b645d1ceb49", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9930953354925761, "res": {"Yes": 0.9930953354925761, "No": 0.006904571158412044}, "ground_truth": 0}, {"key": "518daa3f4d9f4eb8d3e6dec7852c8b645d1ceb49", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9974042943848774, "res": {"Yes": 0.9974042943848774, "No": 0.00259572504660597}, "ground_truth": 0}, {"key": "a86f10b66912e0221e9ad4d44c37fc7f4e138ed2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9968886931779444, "res": {"Yes": 0.9968886931779444, "No": 0.0031113312180089067}, "ground_truth": 0}, {"key": "a86f10b66912e0221e9ad4d44c37fc7f4e138ed2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9926748473718886, "res": {"Yes": 0.9926748473718886, "No": 0.007325077821792111}, "ground_truth": 0}, {"key": "a86f10b66912e0221e9ad4d44c37fc7f4e138ed2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9918399821751751, "res": {"Yes": 0.9918399821751751, "No": 0.008159919298049025}, "ground_truth": 1}, {"key": "a86f10b66912e0221e9ad4d44c37fc7f4e138ed2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9982650397865331, "res": {"Yes": 0.9982650397865331, "No": 0.001734881270349694}, "ground_truth": 0}, {"key": "a86f10b66912e0221e9ad4d44c37fc7f4e138ed2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9965360194715991, "res": {"Yes": 0.9965360194715991, "No": 0.003463987142632531}, "ground_truth": 0}, {"key": "a34829c5a83a3ef39c34b57b65cc48c9bf11f14b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9594676246504551, "res": {"Yes": 0.9594676246504551, "No": 0.04053228345732051}, "ground_truth": 0}, {"key": "a34829c5a83a3ef39c34b57b65cc48c9bf11f14b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9991503516557392, "res": {"Yes": 0.9991503516557392, "No": 0.0008495918502312079}, "ground_truth": 0}, {"key": "a34829c5a83a3ef39c34b57b65cc48c9bf11f14b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9993688936714276, "res": {"Yes": 0.9993688936714276, "No": 0.0006310340720318099}, "ground_truth": 1}, {"key": "a34829c5a83a3ef39c34b57b65cc48c9bf11f14b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9938381141672482, "res": {"Yes": 0.9938381141672482, "No": 0.006161843356603846}, "ground_truth": 0}, {"key": "a34829c5a83a3ef39c34b57b65cc48c9bf11f14b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9980852131478408, "res": {"Yes": 0.9980852131478408, "No": 0.0019146910518334278}, "ground_truth": 0}, {"key": "05c6b2a21f9c0ffd5f0c76c9ce4c6d3d875fcf39", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.39632547450151806, "res": {"No": 0.6036743995454398, "Yes": 0.39632547450151806}, "ground_truth": 0}, {"key": "05c6b2a21f9c0ffd5f0c76c9ce4c6d3d875fcf39", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9987559001347031, "res": {"Yes": 0.9987559001347031, "No": 0.0012440608140098697}, "ground_truth": 0}, {"key": "05c6b2a21f9c0ffd5f0c76c9ce4c6d3d875fcf39", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9981611059413018, "res": {"Yes": 0.9981611059413018, "No": 0.001838868209771491}, "ground_truth": 1}, {"key": "05c6b2a21f9c0ffd5f0c76c9ce4c6d3d875fcf39", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999692709783313, "res": {"Yes": 0.999692709783313, "No": 0.0003071722170718097}, "ground_truth": 0}, {"key": "05c6b2a21f9c0ffd5f0c76c9ce4c6d3d875fcf39", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9979883316225442, "res": {"Yes": 0.9979883316225442, "No": 0.0020116715210268693}, "ground_truth": 0}, {"key": "b3e6507640b009ed51c8c8f8be85d0b7fec5b579", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9983386997894526, "res": {"Yes": 0.9983386997894526, "No": 0.0016612831388405646}, "ground_truth": 0}, {"key": "b3e6507640b009ed51c8c8f8be85d0b7fec5b579", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999706019221319, "res": {"Yes": 0.9999706019221319, "No": 2.933142767175988e-05}, "ground_truth": 0}, {"key": "b3e6507640b009ed51c8c8f8be85d0b7fec5b579", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9943110820285386, "res": {"Yes": 0.9943110820285386, "No": 0.0056889043869958}, "ground_truth": 1}, {"key": "b3e6507640b009ed51c8c8f8be85d0b7fec5b579", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9983534346785624, "res": {"Yes": 0.9983534346785624, "No": 0.001646537797731287}, "ground_truth": 0}, {"key": "b3e6507640b009ed51c8c8f8be85d0b7fec5b579", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9984096352724, "res": {"Yes": 0.9984096352724, "No": 0.0015903534370489848}, "ground_truth": 0}, {"key": "6e930c55a62640d218e4b94f39cb7c83b6ad9117", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9997537105042387, "res": {"Yes": 0.9997537105042387, "No": 0.0002462818367436981}, "ground_truth": 0}, {"key": "6e930c55a62640d218e4b94f39cb7c83b6ad9117", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999932502087799, "res": {"Yes": 0.9999932502087799, "No": 6.7114200330795465e-06}, "ground_truth": 0}, {"key": "6e930c55a62640d218e4b94f39cb7c83b6ad9117", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9979693340974825, "res": {"Yes": 0.9979693340974825, "No": 0.0020306689666362296}, "ground_truth": 1}, {"key": "6e930c55a62640d218e4b94f39cb7c83b6ad9117", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9989341747955927, "res": {"Yes": 0.9989341747955927, "No": 0.0010658400621516024}, "ground_truth": 0}, {"key": "6e930c55a62640d218e4b94f39cb7c83b6ad9117", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.999431997074772, "res": {"Yes": 0.999431997074772, "No": 0.0005678984161750444}, "ground_truth": 0}, {"key": "cd13c83364964da3b3659782e6ef0d2b3dc273b1", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9960395277390864, "res": {"Yes": 0.9960395277390864, "No": 0.003960441777354857}, "ground_truth": 0}, {"key": "cd13c83364964da3b3659782e6ef0d2b3dc273b1", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9945512240243363, "res": {"Yes": 0.9945512240243363, "No": 0.005448792252014414}, "ground_truth": 0}, {"key": "cd13c83364964da3b3659782e6ef0d2b3dc273b1", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9823554389431496, "res": {"Yes": 0.9823554389431496, "No": 0.017644525741503398}, "ground_truth": 1}, {"key": "cd13c83364964da3b3659782e6ef0d2b3dc273b1", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995972945376205, "res": {"Yes": 0.9995972945376205, "No": 0.0004027039022249413}, "ground_truth": 0}, {"key": "cd13c83364964da3b3659782e6ef0d2b3dc273b1", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9984744011109271, "res": {"Yes": 0.9984744011109271, "No": 0.0015255473733189748}, "ground_truth": 0}, {"key": "328210eaa50ae54cc3d979958c1d2008faa27401", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9993411554834252, "res": {"Yes": 0.9993411554834252, "No": 0.0006587617641341495}, "ground_truth": 0}, {"key": "328210eaa50ae54cc3d979958c1d2008faa27401", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9993910335353883, "res": {"Yes": 0.9993910335353883, "No": 0.0006088509899977591}, "ground_truth": 0}, {"key": "328210eaa50ae54cc3d979958c1d2008faa27401", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997858770875837, "res": {"Yes": 0.9997858770875837, "No": 0.00021399476625367958}, "ground_truth": 1}, {"key": "328210eaa50ae54cc3d979958c1d2008faa27401", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998908601961818, "res": {"Yes": 0.9998908601961818, "No": 0.00010903615865127145}, "ground_truth": 0}, {"key": "328210eaa50ae54cc3d979958c1d2008faa27401", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.999576088715789, "res": {"Yes": 0.999576088715789, "No": 0.00042387667749646155}, "ground_truth": 0}, {"key": "b67b63d1da9cccd6e229d5755065d57dd1653feb", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9625386738538894, "res": {"Yes": 0.9625386738538894, "No": 0.03746117184078229}, "ground_truth": 0}, {"key": "b67b63d1da9cccd6e229d5755065d57dd1653feb", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9953122380154251, "res": {"Yes": 0.9953122380154251, "No": 0.004687710415748791}, "ground_truth": 0}, {"key": "b67b63d1da9cccd6e229d5755065d57dd1653feb", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9979090279342346, "res": {"Yes": 0.9979090279342346, "No": 0.002090906865872205}, "ground_truth": 1}, {"key": "b67b63d1da9cccd6e229d5755065d57dd1653feb", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9991486842750236, "res": {"Yes": 0.9991486842750236, "No": 0.000851284051107121}, "ground_truth": 0}, {"key": "b67b63d1da9cccd6e229d5755065d57dd1653feb", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9975922913913342, "res": {"Yes": 0.9975922913913342, "No": 0.0024076647130422723}, "ground_truth": 0}, {"key": "e4c55cb4bb6e5418d8486fb7c9369b6c84b2716d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9985650834785968, "res": {"Yes": 0.9985650834785968, "No": 0.0014348249833943682}, "ground_truth": 0}, {"key": "e4c55cb4bb6e5418d8486fb7c9369b6c84b2716d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9734056297557612, "res": {"Yes": 0.9734056297557612, "No": 0.026593961506479337}, "ground_truth": 0}, {"key": "e4c55cb4bb6e5418d8486fb7c9369b6c84b2716d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9810767195706441, "res": {"Yes": 0.9810767195706441, "No": 0.018923139379009413}, "ground_truth": 1}, {"key": "e4c55cb4bb6e5418d8486fb7c9369b6c84b2716d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9992234282480413, "res": {"Yes": 0.9992234282480413, "No": 0.0007764809102737588}, "ground_truth": 0}, {"key": "e4c55cb4bb6e5418d8486fb7c9369b6c84b2716d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.997390056240981, "res": {"Yes": 0.997390056240981, "No": 0.002609935945246097}, "ground_truth": 0}, {"key": "544b165ad1d22a5dcbbab469a2aa7666c2868361", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.997032772945875, "res": {"Yes": 0.997032772945875, "No": 0.002967266857077205}, "ground_truth": 0}, {"key": "544b165ad1d22a5dcbbab469a2aa7666c2868361", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9992054610133957, "res": {"Yes": 0.9992054610133957, "No": 0.0007945467751709317}, "ground_truth": 0}, {"key": "544b165ad1d22a5dcbbab469a2aa7666c2868361", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9925724228073562, "res": {"Yes": 0.9925724228073562, "No": 0.007427581336231178}, "ground_truth": 1}, {"key": "544b165ad1d22a5dcbbab469a2aa7666c2868361", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9987166601857551, "res": {"Yes": 0.9987166601857551, "No": 0.0012832517407604263}, "ground_truth": 0}, {"key": "544b165ad1d22a5dcbbab469a2aa7666c2868361", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9965959135844523, "res": {"Yes": 0.9965959135844523, "No": 0.0034040138484226845}, "ground_truth": 0}, {"key": "2577ef64a22c06cf63cb244abfad9ecc46603208", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9888871796929565, "res": {"Yes": 0.9888871796929565, "No": 0.01111266311205133}, "ground_truth": 0}, {"key": "2577ef64a22c06cf63cb244abfad9ecc46603208", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.993975423977428, "res": {"Yes": 0.993975423977428, "No": 0.006024551151842535}, "ground_truth": 0}, {"key": "2577ef64a22c06cf63cb244abfad9ecc46603208", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9973885181669612, "res": {"Yes": 0.9973885181669612, "No": 0.0026115156953917694}, "ground_truth": 1}, {"key": "2577ef64a22c06cf63cb244abfad9ecc46603208", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9989285856746271, "res": {"Yes": 0.9989285856746271, "No": 0.001071316513890848}, "ground_truth": 0}, {"key": "2577ef64a22c06cf63cb244abfad9ecc46603208", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9994680776208963, "res": {"Yes": 0.9994680776208963, "No": 0.000531905189356606}, "ground_truth": 0}, {"key": "2a73430dd3232ad3da7570a5ac133d7f29a6b6c6", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.8254191426211551, "res": {"Yes": 0.8254191426211551, "No": 0.17458055685433796}, "ground_truth": 0}, {"key": "2a73430dd3232ad3da7570a5ac133d7f29a6b6c6", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998800139029076, "res": {"Yes": 0.9998800139029076, "No": 0.00011994802907918703}, "ground_truth": 0}, {"key": "2a73430dd3232ad3da7570a5ac133d7f29a6b6c6", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999831180165023, "res": {"Yes": 0.9999831180165023, "No": 1.6780282861589185e-05}, "ground_truth": 1}, {"key": "2a73430dd3232ad3da7570a5ac133d7f29a6b6c6", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999844292352256, "res": {"Yes": 0.9999844292352256, "No": 1.5534720607390183e-05}, "ground_truth": 0}, {"key": "2a73430dd3232ad3da7570a5ac133d7f29a6b6c6", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999620195462757, "res": {"Yes": 0.9999620195462757, "No": 3.7902973215313064e-05}, "ground_truth": 0}, {"key": "8fbcaf3abc124b7baaa278d382411f43e7e48353", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9882401455218874, "res": {"Yes": 0.9882401455218874, "No": 0.011759741712213537}, "ground_truth": 0}, {"key": "8fbcaf3abc124b7baaa278d382411f43e7e48353", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9899482251988672, "res": {"Yes": 0.9899482251988672, "No": 0.010051644761568782}, "ground_truth": 0}, {"key": "8fbcaf3abc124b7baaa278d382411f43e7e48353", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9648506707058423, "res": {"Yes": 0.9648506707058423, "No": 0.03514924290265373}, "ground_truth": 1}, {"key": "8fbcaf3abc124b7baaa278d382411f43e7e48353", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9914319155590693, "res": {"Yes": 0.9914319155590693, "No": 0.008568016671944978}, "ground_truth": 0}, {"key": "8fbcaf3abc124b7baaa278d382411f43e7e48353", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.5320337243576183, "res": {"Yes": 0.5320337243576183, "No": 0.46796604458604674}, "ground_truth": 0}, {"key": "bc04e5a298854589044eb5a162f281f3e7e12c16", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9984967446219845, "res": {"Yes": 0.9984967446219845, "No": 0.0015032765296571372}, "ground_truth": 0}, {"key": "bc04e5a298854589044eb5a162f281f3e7e12c16", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996878239370904, "res": {"Yes": 0.9996878239370904, "No": 0.0003120590890829731}, "ground_truth": 0}, {"key": "bc04e5a298854589044eb5a162f281f3e7e12c16", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9985206357397115, "res": {"Yes": 0.9985206357397115, "No": 0.0014793452318660613}, "ground_truth": 1}, {"key": "bc04e5a298854589044eb5a162f281f3e7e12c16", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995021376446124, "res": {"Yes": 0.9995021376446124, "No": 0.0004978578318047618}, "ground_truth": 0}, {"key": "bc04e5a298854589044eb5a162f281f3e7e12c16", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9969401107329683, "res": {"Yes": 0.9969401107329683, "No": 0.00305991237208932}, "ground_truth": 0}, {"key": "8c3b7358950d0684a90736fc39d08340d3ec41ee", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.99836804708571, "res": {"Yes": 0.99836804708571, "No": 0.0016318858006181853}, "ground_truth": 0}, {"key": "8c3b7358950d0684a90736fc39d08340d3ec41ee", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9975915779135826, "res": {"Yes": 0.9975915779135826, "No": 0.002408378932309643}, "ground_truth": 0}, {"key": "8c3b7358950d0684a90736fc39d08340d3ec41ee", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9895215158620707, "res": {"Yes": 0.9895215158620707, "No": 0.010478325430736816}, "ground_truth": 1}, {"key": "8c3b7358950d0684a90736fc39d08340d3ec41ee", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.994891998436327, "res": {"Yes": 0.994891998436327, "No": 0.0051080502671142355}, "ground_truth": 0}, {"key": "8c3b7358950d0684a90736fc39d08340d3ec41ee", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9489378144207476, "res": {"Yes": 0.9489378144207476, "No": 0.05106202033960819}, "ground_truth": 0}, {"key": "d1a86381ca59e3471d2863688229922f83218150", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9975122196674199, "res": {"Yes": 0.9975122196674199, "No": 0.002487722862450027}, "ground_truth": 0}, {"key": "d1a86381ca59e3471d2863688229922f83218150", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9952259213140071, "res": {"Yes": 0.9952259213140071, "No": 0.004774121027520771}, "ground_truth": 0}, {"key": "d1a86381ca59e3471d2863688229922f83218150", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9955787356932376, "res": {"Yes": 0.9955787356932376, "No": 0.00442127129917817}, "ground_truth": 1}, {"key": "d1a86381ca59e3471d2863688229922f83218150", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9988965912036609, "res": {"Yes": 0.9988965912036609, "No": 0.0011033966424978743}, "ground_truth": 0}, {"key": "d1a86381ca59e3471d2863688229922f83218150", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9987460331742782, "res": {"Yes": 0.9987460331742782, "No": 0.0012539207958127687}, "ground_truth": 0}, {"key": "313d98f4c6047ff556795b6da47e42e9f650946a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9948577777248051, "res": {"Yes": 0.9948577777248051, "No": 0.005142244611469909}, "ground_truth": 0}, {"key": "313d98f4c6047ff556795b6da47e42e9f650946a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997353615029462, "res": {"Yes": 0.9997353615029462, "No": 0.0002645828825962928}, "ground_truth": 0}, {"key": "313d98f4c6047ff556795b6da47e42e9f650946a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994480732676254, "res": {"Yes": 0.9994480732676254, "No": 0.0005519172678517688}, "ground_truth": 1}, {"key": "313d98f4c6047ff556795b6da47e42e9f650946a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999199431347219, "res": {"Yes": 0.9999199431347219, "No": 8.002599808669463e-05}, "ground_truth": 0}, {"key": "313d98f4c6047ff556795b6da47e42e9f650946a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997749127559357, "res": {"Yes": 0.9997749127559357, "No": 0.00022499913504762966}, "ground_truth": 0}, {"key": "8c0934c689bbc9234dc51eff9d8d156aec106ca4", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9977136686461618, "res": {"Yes": 0.9977136686461618, "No": 0.0022863491299691292}, "ground_truth": 0}, {"key": "8c0934c689bbc9234dc51eff9d8d156aec106ca4", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9749782343276321, "res": {"Yes": 0.9749782343276321, "No": 0.025021740800619648}, "ground_truth": 0}, {"key": "8c0934c689bbc9234dc51eff9d8d156aec106ca4", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9960331414529199, "res": {"Yes": 0.9960331414529199, "No": 0.0039668260010799735}, "ground_truth": 1}, {"key": "8c0934c689bbc9234dc51eff9d8d156aec106ca4", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9847972968379025, "res": {"Yes": 0.9847972968379025, "No": 0.0152026944559652}, "ground_truth": 0}, {"key": "8c0934c689bbc9234dc51eff9d8d156aec106ca4", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.992554332347237, "res": {"Yes": 0.992554332347237, "No": 0.007445545834147778}, "ground_truth": 0}, {"key": "70f210b63211c32ab22354506b7bd9b80ee194af", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.99811871004853, "res": {"Yes": 0.99811871004853, "No": 0.0018812402729601067}, "ground_truth": 0}, {"key": "70f210b63211c32ab22354506b7bd9b80ee194af", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9786315976713057, "res": {"Yes": 0.9786315976713057, "No": 0.021368327000734304}, "ground_truth": 0}, {"key": "70f210b63211c32ab22354506b7bd9b80ee194af", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9980745276049406, "res": {"Yes": 0.9980745276049406, "No": 0.0019253923766256258}, "ground_truth": 1}, {"key": "70f210b63211c32ab22354506b7bd9b80ee194af", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997464409015432, "res": {"Yes": 0.9997464409015432, "No": 0.00025343453825912897}, "ground_truth": 0}, {"key": "70f210b63211c32ab22354506b7bd9b80ee194af", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998312776452177, "res": {"Yes": 0.9998312776452177, "No": 0.000168621837204117}, "ground_truth": 0}, {"key": "dea4bf4a53b568dea8b3d5009a24d090485be8c9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9748358052868268, "res": {"Yes": 0.9748358052868268, "No": 0.02516414537975179}, "ground_truth": 0}, {"key": "dea4bf4a53b568dea8b3d5009a24d090485be8c9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9374927729693291, "res": {"Yes": 0.9374927729693291, "No": 0.06250718305272467}, "ground_truth": 0}, {"key": "dea4bf4a53b568dea8b3d5009a24d090485be8c9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9751731831635837, "res": {"Yes": 0.9751731831635837, "No": 0.0248267717999137}, "ground_truth": 1}, {"key": "dea4bf4a53b568dea8b3d5009a24d090485be8c9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9991120417684421, "res": {"Yes": 0.9991120417684421, "No": 0.000887959876601724}, "ground_truth": 0}, {"key": "dea4bf4a53b568dea8b3d5009a24d090485be8c9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9957643910020133, "res": {"Yes": 0.9957643910020133, "No": 0.0042356562678121604}, "ground_truth": 0}, {"key": "3ffb4210cf832dc039ffe7522c47c6eb7a5e5ab5", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9964861776735499, "res": {"Yes": 0.9964861776735499, "No": 0.0035138674868079563}, "ground_truth": 0}, {"key": "3ffb4210cf832dc039ffe7522c47c6eb7a5e5ab5", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9981273735569083, "res": {"Yes": 0.9981273735569083, "No": 0.001872578351856964}, "ground_truth": 0}, {"key": "3ffb4210cf832dc039ffe7522c47c6eb7a5e5ab5", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9950751621771544, "res": {"Yes": 0.9950751621771544, "No": 0.004924865078311709}, "ground_truth": 1}, {"key": "3ffb4210cf832dc039ffe7522c47c6eb7a5e5ab5", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.995175736321823, "res": {"Yes": 0.995175736321823, "No": 0.004824298226762216}, "ground_truth": 0}, {"key": "3ffb4210cf832dc039ffe7522c47c6eb7a5e5ab5", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9976402251565873, "res": {"Yes": 0.9976402251565873, "No": 0.00235981276489702}, "ground_truth": 0}, {"key": "c010d72bf64ac4e1c758b7f078aeed89bc57a9db", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9989124060594639, "res": {"Yes": 0.9989124060594639, "No": 0.0010875802550252337}, "ground_truth": 0}, {"key": "c010d72bf64ac4e1c758b7f078aeed89bc57a9db", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.999773720984959, "res": {"Yes": 0.999773720984959, "No": 0.0002261805463432205}, "ground_truth": 0}, {"key": "c010d72bf64ac4e1c758b7f078aeed89bc57a9db", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995507204063162, "res": {"Yes": 0.9995507204063162, "No": 0.00044923137102230004}, "ground_truth": 1}, {"key": "c010d72bf64ac4e1c758b7f078aeed89bc57a9db", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9993284201507631, "res": {"Yes": 0.9993284201507631, "No": 0.000671487269824329}, "ground_truth": 0}, {"key": "c010d72bf64ac4e1c758b7f078aeed89bc57a9db", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9995411957828588, "res": {"Yes": 0.9995411957828588, "No": 0.00045876232154615194}, "ground_truth": 0}, {"key": "343644770a597a2dfa7548ba165fa9c6bdc88245", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9956204473337176, "res": {"Yes": 0.9956204473337176, "No": 0.004379507516810023}, "ground_truth": 0}, {"key": "343644770a597a2dfa7548ba165fa9c6bdc88245", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.993946800382302, "res": {"Yes": 0.993946800382302, "No": 0.006053128700693588}, "ground_truth": 0}, {"key": "343644770a597a2dfa7548ba165fa9c6bdc88245", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9986165562418314, "res": {"Yes": 0.9986165562418314, "No": 0.001383354406374409}, "ground_truth": 1}, {"key": "343644770a597a2dfa7548ba165fa9c6bdc88245", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996891347886059, "res": {"Yes": 0.9996891347886059, "No": 0.0003108309073241977}, "ground_truth": 0}, {"key": "343644770a597a2dfa7548ba165fa9c6bdc88245", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9995968178997561, "res": {"Yes": 0.9995968178997561, "No": 0.0004030588406902379}, "ground_truth": 0}, {"key": "4b89e8a6a25de61d21a1fed20fb911cd4b1e4b47", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9990715786000898, "res": {"Yes": 0.9990715786000898, "No": 0.0009283697193946354}, "ground_truth": 0}, {"key": "4b89e8a6a25de61d21a1fed20fb911cd4b1e4b47", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996935439372551, "res": {"Yes": 0.9996935439372551, "No": 0.0003063755257754896}, "ground_truth": 0}, {"key": "4b89e8a6a25de61d21a1fed20fb911cd4b1e4b47", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9968134747825872, "res": {"Yes": 0.9968134747825872, "No": 0.00318650268688956}, "ground_truth": 1}, {"key": "4b89e8a6a25de61d21a1fed20fb911cd4b1e4b47", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9971077847364714, "res": {"Yes": 0.9971077847364714, "No": 0.0028922428649879235}, "ground_truth": 0}, {"key": "4b89e8a6a25de61d21a1fed20fb911cd4b1e4b47", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9968475834324815, "res": {"Yes": 0.9968475834324815, "No": 0.0031523817523374316}, "ground_truth": 0}, {"key": "9b382d27692cb18aa61af350038b175b3ce75e2b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9990125762240092, "res": {"Yes": 0.9990125762240092, "No": 0.0009873909476960487}, "ground_truth": 0}, {"key": "9b382d27692cb18aa61af350038b175b3ce75e2b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9955491913370585, "res": {"Yes": 0.9955491913370585, "No": 0.0044507707302144294}, "ground_truth": 0}, {"key": "9b382d27692cb18aa61af350038b175b3ce75e2b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9884715280232437, "res": {"Yes": 0.9884715280232437, "No": 0.011528346079039183}, "ground_truth": 1}, {"key": "9b382d27692cb18aa61af350038b175b3ce75e2b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.99871416369684, "res": {"Yes": 0.99871416369684, "No": 0.0012858259557508227}, "ground_truth": 0}, {"key": "9b382d27692cb18aa61af350038b175b3ce75e2b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9981519626278165, "res": {"Yes": 0.9981519626278165, "No": 0.0018480356773937564}, "ground_truth": 0}, {"key": "59be27355e5b86754678f70254cfbb1e6dc8bf0c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9844264319528321, "res": {"Yes": 0.9844264319528321, "No": 0.015573505094602678}, "ground_truth": 0}, {"key": "59be27355e5b86754678f70254cfbb1e6dc8bf0c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994093725491017, "res": {"Yes": 0.9994093725491017, "No": 0.0005906022966437846}, "ground_truth": 0}, {"key": "59be27355e5b86754678f70254cfbb1e6dc8bf0c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998605935679798, "res": {"Yes": 0.9998605935679798, "No": 0.0001393647445724647}, "ground_truth": 1}, {"key": "59be27355e5b86754678f70254cfbb1e6dc8bf0c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997189195305191, "res": {"Yes": 0.9997189195305191, "No": 0.0002809696223472931}, "ground_truth": 0}, {"key": "59be27355e5b86754678f70254cfbb1e6dc8bf0c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9991077578849247, "res": {"Yes": 0.9991077578849247, "No": 0.0008922135930115385}, "ground_truth": 0}, {"key": "ff10934c1dc127286575fb1a6b192da6da21f235", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9984804603712155, "res": {"Yes": 0.9984804603712155, "No": 0.0015194816834608826}, "ground_truth": 0}, {"key": "ff10934c1dc127286575fb1a6b192da6da21f235", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9858006394993729, "res": {"Yes": 0.9858006394993729, "No": 0.01419930300123034}, "ground_truth": 0}, {"key": "ff10934c1dc127286575fb1a6b192da6da21f235", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996595892129042, "res": {"Yes": 0.9996595892129042, "No": 0.0003402852232892889}, "ground_truth": 1}, {"key": "ff10934c1dc127286575fb1a6b192da6da21f235", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9973285138871079, "res": {"Yes": 0.9973285138871079, "No": 0.002671430185444517}, "ground_truth": 0}, {"key": "ff10934c1dc127286575fb1a6b192da6da21f235", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9992631865804186, "res": {"Yes": 0.9992631865804186, "No": 0.000736809263861714}, "ground_truth": 0}, {"key": "1997690ceabc952c3396cb2a30e9050241f6f6c7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9899684361881459, "res": {"Yes": 0.9899684361881459, "No": 0.01003143416191682}, "ground_truth": 0}, {"key": "1997690ceabc952c3396cb2a30e9050241f6f6c7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9974213675990116, "res": {"Yes": 0.9974213675990116, "No": 0.0025786661754287885}, "ground_truth": 0}, {"key": "1997690ceabc952c3396cb2a30e9050241f6f6c7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9976531584487703, "res": {"Yes": 0.9976531584487703, "No": 0.0023468271782429917}, "ground_truth": 1}, {"key": "1997690ceabc952c3396cb2a30e9050241f6f6c7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9947250613084954, "res": {"Yes": 0.9947250613084954, "No": 0.0052749029388885205}, "ground_truth": 0}, {"key": "1997690ceabc952c3396cb2a30e9050241f6f6c7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9976875591204082, "res": {"Yes": 0.9976875591204082, "No": 0.002312462511389789}, "ground_truth": 0}, {"key": "c676027eea6109621206e3864c4eea30912fe09f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.5885850751665992, "res": {"Yes": 0.5885850751665992, "No": 0.41141460568467575}, "ground_truth": 0}, {"key": "c676027eea6109621206e3864c4eea30912fe09f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.948911197088363, "res": {"Yes": 0.948911197088363, "No": 0.05108864591016322}, "ground_truth": 0}, {"key": "c676027eea6109621206e3864c4eea30912fe09f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9339150703411853, "res": {"Yes": 0.9339150703411853, "No": 0.06608464887628468}, "ground_truth": 1}, {"key": "c676027eea6109621206e3864c4eea30912fe09f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9543468301376904, "res": {"Yes": 0.9543468301376904, "No": 0.04565312128581692}, "ground_truth": 0}, {"key": "c676027eea6109621206e3864c4eea30912fe09f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9847126778266481, "res": {"Yes": 0.9847126778266481, "No": 0.0152872825822448}, "ground_truth": 0}, {"key": "78e22dd79495204cf719e3728927241bb60b80e1", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9928472124277128, "res": {"Yes": 0.9928472124277128, "No": 0.007152759876789692}, "ground_truth": 0}, {"key": "78e22dd79495204cf719e3728927241bb60b80e1", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9975842197052409, "res": {"Yes": 0.9975842197052409, "No": 0.0024157414991956977}, "ground_truth": 0}, {"key": "78e22dd79495204cf719e3728927241bb60b80e1", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9984130829865053, "res": {"Yes": 0.9984130829865053, "No": 0.0015869226157006356}, "ground_truth": 1}, {"key": "78e22dd79495204cf719e3728927241bb60b80e1", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.998086997825606, "res": {"Yes": 0.998086997825606, "No": 0.0019130288645642147}, "ground_truth": 0}, {"key": "78e22dd79495204cf719e3728927241bb60b80e1", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9976896962692177, "res": {"Yes": 0.9976896962692177, "No": 0.0023103244725621586}, "ground_truth": 0}, {"key": "6c1a1cf59028ba2567b0a1941e510c40133da1c0", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9824641573233491, "res": {"Yes": 0.9824641573233491, "No": 0.017535822329945168}, "ground_truth": 0}, {"key": "6c1a1cf59028ba2567b0a1941e510c40133da1c0", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995949114105473, "res": {"Yes": 0.9995949114105473, "No": 0.0004050586850487435}, "ground_truth": 0}, {"key": "6c1a1cf59028ba2567b0a1941e510c40133da1c0", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9982212078293926, "res": {"Yes": 0.9982212078293926, "No": 0.0017787179980143582}, "ground_truth": 1}, {"key": "6c1a1cf59028ba2567b0a1941e510c40133da1c0", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995234463877612, "res": {"Yes": 0.9995234463877612, "No": 0.0004765321337990202}, "ground_truth": 0}, {"key": "6c1a1cf59028ba2567b0a1941e510c40133da1c0", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9994704604555806, "res": {"Yes": 0.9994704604555806, "No": 0.0005294347129278914}, "ground_truth": 0}, {"key": "e8bbd5a77edf96611a4b8efcfbc625ec65985e98", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9998085175151116, "res": {"Yes": 0.9998085175151116, "No": 0.00019138508649119913}, "ground_truth": 0}, {"key": "e8bbd5a77edf96611a4b8efcfbc625ec65985e98", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9820993152981976, "res": {"Yes": 0.9820993152981976, "No": 0.017900636645145594}, "ground_truth": 0}, {"key": "e8bbd5a77edf96611a4b8efcfbc625ec65985e98", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9991955786504705, "res": {"Yes": 0.9991955786504705, "No": 0.0008043154620554554}, "ground_truth": 1}, {"key": "e8bbd5a77edf96611a4b8efcfbc625ec65985e98", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9986475857375388, "res": {"Yes": 0.9986475857375388, "No": 0.0013523567064883254}, "ground_truth": 0}, {"key": "e8bbd5a77edf96611a4b8efcfbc625ec65985e98", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.994580943349677, "res": {"Yes": 0.994580943349677, "No": 0.005418982021945452}, "ground_truth": 0}, {"key": "4141e012912dbf29c5e792b8654a9a36094b468c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.11296841642512467, "res": {"No": 0.8870314550612798, "Yes": 0.11296841642512467}, "ground_truth": 0}, {"key": "4141e012912dbf29c5e792b8654a9a36094b468c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9975240848465854, "res": {"Yes": 0.9975240848465854, "No": 0.002475937170216817}, "ground_truth": 0}, {"key": "4141e012912dbf29c5e792b8654a9a36094b468c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9981937688024932, "res": {"Yes": 0.9981937688024932, "No": 0.0018062182213040613}, "ground_truth": 1}, {"key": "4141e012912dbf29c5e792b8654a9a36094b468c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995395277199169, "res": {"Yes": 0.9995395277199169, "No": 0.0004603795500002445}, "ground_truth": 0}, {"key": "4141e012912dbf29c5e792b8654a9a36094b468c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9994559327582866, "res": {"Yes": 0.9994559327582866, "No": 0.0005439809663759171}, "ground_truth": 0}, {"key": "d80fa94dc14601e4b82321e2cf749d6915251a70", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.8358768294047093, "res": {"Yes": 0.8358768294047093, "No": 0.16412305453273035}, "ground_truth": 0}, {"key": "d80fa94dc14601e4b82321e2cf749d6915251a70", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994656984899161, "res": {"Yes": 0.9994656984899161, "No": 0.0005341935134028529}, "ground_truth": 0}, {"key": "d80fa94dc14601e4b82321e2cf749d6915251a70", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996722169326118, "res": {"Yes": 0.9996722169326118, "No": 0.0003277403103195192}, "ground_truth": 1}, {"key": "d80fa94dc14601e4b82321e2cf749d6915251a70", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999192279722359, "res": {"Yes": 0.9999192279722359, "No": 8.067450143215813e-05}, "ground_truth": 0}, {"key": "d80fa94dc14601e4b82321e2cf749d6915251a70", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996750768989482, "res": {"Yes": 0.9996750768989482, "No": 0.00032480290686801657}, "ground_truth": 0}, {"key": "11e57b8ed79c4f4b110d81a5cd7152d42411c7a6", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.950086635768331, "res": {"Yes": 0.950086635768331, "No": 0.04991327897014151}, "ground_truth": 0}, {"key": "11e57b8ed79c4f4b110d81a5cd7152d42411c7a6", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.996889398975389, "res": {"Yes": 0.996889398975389, "No": 0.003110537618643094}, "ground_truth": 0}, {"key": "11e57b8ed79c4f4b110d81a5cd7152d42411c7a6", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.991018106381687, "res": {"Yes": 0.991018106381687, "No": 0.008981826687843714}, "ground_truth": 1}, {"key": "11e57b8ed79c4f4b110d81a5cd7152d42411c7a6", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9968203448447296, "res": {"Yes": 0.9968203448447296, "No": 0.0031796997655383274}, "ground_truth": 0}, {"key": "11e57b8ed79c4f4b110d81a5cd7152d42411c7a6", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.997223350043176, "res": {"Yes": 0.997223350043176, "No": 0.0027766864129227303}, "ground_truth": 0}, {"key": "b55c390e81c5d1d4fa077338d9daaed8978f710b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9638674151326576, "res": {"Yes": 0.9638674151326576, "No": 0.03613253402386946}, "ground_truth": 0}, {"key": "b55c390e81c5d1d4fa077338d9daaed8978f710b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9991660623194697, "res": {"Yes": 0.9991660623194697, "No": 0.0008338336799613606}, "ground_truth": 0}, {"key": "b55c390e81c5d1d4fa077338d9daaed8978f710b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999485501335897, "res": {"Yes": 0.9999485501335897, "No": 5.140279549087852e-05}, "ground_truth": 1}, {"key": "b55c390e81c5d1d4fa077338d9daaed8978f710b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999531988512079, "res": {"Yes": 0.9999531988512079, "No": 4.67799437147234e-05}, "ground_truth": 0}, {"key": "b55c390e81c5d1d4fa077338d9daaed8978f710b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999818067994983, "res": {"Yes": 0.9999818067994983, "No": 1.8080509917434967e-05}, "ground_truth": 0}, {"key": "d46e1849cbbf875ccd018fd778fa6c433cab4694", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9931703547613957, "res": {"Yes": 0.9931703547613957, "No": 0.006829618933508986}, "ground_truth": 0}, {"key": "d46e1849cbbf875ccd018fd778fa6c433cab4694", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9975697448632282, "res": {"Yes": 0.9975697448632282, "No": 0.0024302625067380698}, "ground_truth": 0}, {"key": "d46e1849cbbf875ccd018fd778fa6c433cab4694", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994017517219044, "res": {"Yes": 0.9994017517219044, "No": 0.0005982074247659496}, "ground_truth": 1}, {"key": "d46e1849cbbf875ccd018fd778fa6c433cab4694", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999186320055549, "res": {"Yes": 0.9999186320055549, "No": 8.126663656429714e-05}, "ground_truth": 0}, {"key": "d46e1849cbbf875ccd018fd778fa6c433cab4694", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9983935904592808, "res": {"Yes": 0.9983935904592808, "No": 0.0016063202096308416}, "ground_truth": 0}, {"key": "09b582acc63514f671628e1766ff7e99b258fe9a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.965344772740738, "res": {"Yes": 0.965344772740738, "No": 0.034655189341997655}, "ground_truth": 0}, {"key": "09b582acc63514f671628e1766ff7e99b258fe9a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9962286705932522, "res": {"Yes": 0.9962286705932522, "No": 0.0037713716907387617}, "ground_truth": 0}, {"key": "09b582acc63514f671628e1766ff7e99b258fe9a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.996525368451889, "res": {"Yes": 0.996525368451889, "No": 0.0034745892791537888}, "ground_truth": 1}, {"key": "09b582acc63514f671628e1766ff7e99b258fe9a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9981739271036011, "res": {"Yes": 0.9981739271036011, "No": 0.0018259881252125033}, "ground_truth": 0}, {"key": "09b582acc63514f671628e1766ff7e99b258fe9a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9986507961949579, "res": {"Yes": 0.9986507961949579, "No": 0.0013491817249086005}, "ground_truth": 0}, {"key": "c4afe4db222c094c40bb3f271e37e006bcbebf88", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9998864501472726, "res": {"Yes": 0.9998864501472726, "No": 0.00011352970922595488}, "ground_truth": 0}, {"key": "c4afe4db222c094c40bb3f271e37e006bcbebf88", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.999768477225535, "res": {"Yes": 0.999768477225535, "No": 0.0002313918417213855}, "ground_truth": 0}, {"key": "c4afe4db222c094c40bb3f271e37e006bcbebf88", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999959918780326, "res": {"Yes": 0.9999959918780326, "No": 3.909465085904218e-06}, "ground_truth": 1}, {"key": "c4afe4db222c094c40bb3f271e37e006bcbebf88", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999897933310884, "res": {"Yes": 0.9999897933310884, "No": 1.0115739305506895e-05}, "ground_truth": 0}, {"key": "c4afe4db222c094c40bb3f271e37e006bcbebf88", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999937270200753, "res": {"Yes": 0.9999937270200753, "No": 6.1717940034536e-06}, "ground_truth": 0}, {"key": "e21871892c63ec9638bf1bd3c3a1c3ebdae88796", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9962860524269901, "res": {"Yes": 0.9962860524269901, "No": 0.003713937664938844}, "ground_truth": 0}, {"key": "e21871892c63ec9638bf1bd3c3a1c3ebdae88796", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994096107483599, "res": {"Yes": 0.9994096107483599, "No": 0.0005903659258572074}, "ground_truth": 0}, {"key": "e21871892c63ec9638bf1bd3c3a1c3ebdae88796", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9991878440473199, "res": {"Yes": 0.9991878440473199, "No": 0.0008121593444883092}, "ground_truth": 1}, {"key": "e21871892c63ec9638bf1bd3c3a1c3ebdae88796", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9992954417877803, "res": {"Yes": 0.9992954417877803, "No": 0.0007044576091331058}, "ground_truth": 0}, {"key": "e21871892c63ec9638bf1bd3c3a1c3ebdae88796", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999468813708443, "res": {"Yes": 0.9999468813708443, "No": 5.309652895139279e-05}, "ground_truth": 0}, {"key": "065378e265a6d5ff38b2e6748ba66490cf8670a9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9902727547013632, "res": {"Yes": 0.9902727547013632, "No": 0.009727162978172804}, "ground_truth": 0}, {"key": "065378e265a6d5ff38b2e6748ba66490cf8670a9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9972311735902131, "res": {"Yes": 0.9972311735902131, "No": 0.0027688486361798856}, "ground_truth": 0}, {"key": "065378e265a6d5ff38b2e6748ba66490cf8670a9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9993715144798129, "res": {"Yes": 0.9993715144798129, "No": 0.0006284079503049282}, "ground_truth": 1}, {"key": "065378e265a6d5ff38b2e6748ba66490cf8670a9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9979286188769912, "res": {"Yes": 0.9979286188769912, "No": 0.00207135437130875}, "ground_truth": 0}, {"key": "065378e265a6d5ff38b2e6748ba66490cf8670a9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997987447445565, "res": {"Yes": 0.9997987447445565, "No": 0.00020117165688796372}, "ground_truth": 0}, {"key": "5852e143b34cc30e94aed29193fc2e1dce37b1b1", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.997315120154302, "res": {"Yes": 0.997315120154302, "No": 0.0026848285709650576}, "ground_truth": 0}, {"key": "5852e143b34cc30e94aed29193fc2e1dce37b1b1", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998211470960879, "res": {"Yes": 0.9998211470960879, "No": 0.00017883696092589702}, "ground_truth": 0}, {"key": "5852e143b34cc30e94aed29193fc2e1dce37b1b1", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998905026252752, "res": {"Yes": 0.9998905026252752, "No": 0.00010946321513353857}, "ground_truth": 1}, {"key": "5852e143b34cc30e94aed29193fc2e1dce37b1b1", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999564172037576, "res": {"Yes": 0.9999564172037576, "No": 4.3538672144202217e-05}, "ground_truth": 0}, {"key": "5852e143b34cc30e94aed29193fc2e1dce37b1b1", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9966366531652169, "res": {"Yes": 0.9966366531652169, "No": 0.003363365060763953}, "ground_truth": 0}, {"key": "af76b4c33b65666487dcc07f75d37de03ff61207", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.999548218284107, "res": {"Yes": 0.999548218284107, "No": 0.0004516752995980615}, "ground_truth": 0}, {"key": "af76b4c33b65666487dcc07f75d37de03ff61207", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997818250336202, "res": {"Yes": 0.9997818250336202, "No": 0.00021807969251091733}, "ground_truth": 0}, {"key": "af76b4c33b65666487dcc07f75d37de03ff61207", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999506956924973, "res": {"Yes": 0.9999506956924973, "No": 4.9202825856877785e-05}, "ground_truth": 1}, {"key": "af76b4c33b65666487dcc07f75d37de03ff61207", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999629731405111, "res": {"Yes": 0.9999629731405111, "No": 3.690893113592127e-05}, "ground_truth": 0}, {"key": "af76b4c33b65666487dcc07f75d37de03ff61207", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9992761519345011, "res": {"Yes": 0.9992761519345011, "No": 0.0007237535674283208}, "ground_truth": 0}, {"key": "9918924dbaed44429c58ea3638c3847317b71127", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9986656509364357, "res": {"Yes": 0.9986656509364357, "No": 0.0013342923534366846}, "ground_truth": 0}, {"key": "9918924dbaed44429c58ea3638c3847317b71127", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998354490699897, "res": {"Yes": 0.9998354490699897, "No": 0.00016451973917053626}, "ground_truth": 0}, {"key": "9918924dbaed44429c58ea3638c3847317b71127", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.99910501863494, "res": {"Yes": 0.99910501863494, "No": 0.0008949622673174458}, "ground_truth": 1}, {"key": "9918924dbaed44429c58ea3638c3847317b71127", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9972334287313389, "res": {"Yes": 0.9972334287313389, "No": 0.0027665118838751564}, "ground_truth": 0}, {"key": "9918924dbaed44429c58ea3638c3847317b71127", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9977067854433055, "res": {"Yes": 0.9977067854433055, "No": 0.0022931634396434245}, "ground_truth": 0}, {"key": "c87e99f422b46c03ce77e7cd064bff87c076e1ed", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9790314787352148, "res": {"Yes": 0.9790314787352148, "No": 0.02096854544686123}, "ground_truth": 0}, {"key": "c87e99f422b46c03ce77e7cd064bff87c076e1ed", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9867188896830458, "res": {"Yes": 0.9867188896830458, "No": 0.013280984548415782}, "ground_truth": 0}, {"key": "c87e99f422b46c03ce77e7cd064bff87c076e1ed", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9902530019277301, "res": {"Yes": 0.9902530019277301, "No": 0.009746914554537517}, "ground_truth": 1}, {"key": "c87e99f422b46c03ce77e7cd064bff87c076e1ed", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9894830093815256, "res": {"Yes": 0.9894830093815256, "No": 0.010516918845296245}, "ground_truth": 0}, {"key": "c87e99f422b46c03ce77e7cd064bff87c076e1ed", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9864574704547379, "res": {"Yes": 0.9864574704547379, "No": 0.013542437575072216}, "ground_truth": 0}, {"key": "aaca402152fbea1b16e14999374f6394520289a8", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9449215507629471, "res": {"Yes": 0.9449215507629471, "No": 0.05507829833638878}, "ground_truth": 0}, {"key": "aaca402152fbea1b16e14999374f6394520289a8", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9927809345429452, "res": {"Yes": 0.9927809345429452, "No": 0.007218998843697583}, "ground_truth": 0}, {"key": "aaca402152fbea1b16e14999374f6394520289a8", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9728138827043423, "res": {"Yes": 0.9728138827043423, "No": 0.02718597401207446}, "ground_truth": 1}, {"key": "aaca402152fbea1b16e14999374f6394520289a8", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9968405874810053, "res": {"Yes": 0.9968405874810053, "No": 0.003159367909597793}, "ground_truth": 0}, {"key": "aaca402152fbea1b16e14999374f6394520289a8", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9956714925095507, "res": {"Yes": 0.9956714925095507, "No": 0.00432852604599797}, "ground_truth": 0}, {"key": "259d8cccfcb9b9edc00d757ec6efecde6fc06110", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.8310085960137374, "res": {"Yes": 0.8310085960137374, "No": 0.16899137514607238}, "ground_truth": 0}, {"key": "259d8cccfcb9b9edc00d757ec6efecde6fc06110", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9941699219496324, "res": {"Yes": 0.9941699219496324, "No": 0.005830011535364474}, "ground_truth": 0}, {"key": "259d8cccfcb9b9edc00d757ec6efecde6fc06110", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.996273630012906, "res": {"Yes": 0.996273630012906, "No": 0.0037264225777081522}, "ground_truth": 1}, {"key": "259d8cccfcb9b9edc00d757ec6efecde6fc06110", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9983084014683522, "res": {"Yes": 0.9983084014683522, "No": 0.001691517878815636}, "ground_truth": 0}, {"key": "259d8cccfcb9b9edc00d757ec6efecde6fc06110", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9961081277420053, "res": {"Yes": 0.9961081277420053, "No": 0.003891848661192858}, "ground_truth": 0}, {"key": "d076dbce6e84b20153db61c809d0f01f46ef1f45", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9996167097544815, "res": {"Yes": 0.9996167097544815, "No": 0.0003832396678006608}, "ground_truth": 0}, {"key": "d076dbce6e84b20153db61c809d0f01f46ef1f45", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.999919585553415, "res": {"Yes": 0.999919585553415, "No": 8.032698027361069e-05}, "ground_truth": 0}, {"key": "d076dbce6e84b20153db61c809d0f01f46ef1f45", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999561788061766, "res": {"Yes": 0.9999561788061766, "No": 4.370377828452635e-05}, "ground_truth": 1}, {"key": "d076dbce6e84b20153db61c809d0f01f46ef1f45", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999275715930637, "res": {"Yes": 0.9999275715930637, "No": 7.240486968441868e-05}, "ground_truth": 0}, {"key": "d076dbce6e84b20153db61c809d0f01f46ef1f45", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999244725263433, "res": {"Yes": 0.9999244725263433, "No": 7.545681893213714e-05}, "ground_truth": 0}, {"key": "c74e983d24450ed9c4ba3a97e345bba707d26853", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9986842000248985, "res": {"Yes": 0.9986842000248985, "No": 0.001315807149171847}, "ground_truth": 0}, {"key": "c74e983d24450ed9c4ba3a97e345bba707d26853", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9764929542511852, "res": {"Yes": 0.9764929542511852, "No": 0.023507000701607703}, "ground_truth": 0}, {"key": "c74e983d24450ed9c4ba3a97e345bba707d26853", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997950501750854, "res": {"Yes": 0.9997950501750854, "No": 0.00020489620994226207}, "ground_truth": 1}, {"key": "c74e983d24450ed9c4ba3a97e345bba707d26853", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995678744931329, "res": {"Yes": 0.9995678744931329, "No": 0.0004320488056758404}, "ground_truth": 0}, {"key": "c74e983d24450ed9c4ba3a97e345bba707d26853", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9995750163111616, "res": {"Yes": 0.9995750163111616, "No": 0.00042486703364633886}, "ground_truth": 0}, {"key": "0922357b2d0439cf4ba6ec9dc143e11a38b49d8a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9944560732966173, "res": {"Yes": 0.9944560732966173, "No": 0.0055439557429487534}, "ground_truth": 0}, {"key": "0922357b2d0439cf4ba6ec9dc143e11a38b49d8a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9976746328648244, "res": {"Yes": 0.9976746328648244, "No": 0.002325356310166827}, "ground_truth": 0}, {"key": "0922357b2d0439cf4ba6ec9dc143e11a38b49d8a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9984764246211988, "res": {"Yes": 0.9984764246211988, "No": 0.0015235730681090623}, "ground_truth": 1}, {"key": "0922357b2d0439cf4ba6ec9dc143e11a38b49d8a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9922771337637294, "res": {"Yes": 0.9922771337637294, "No": 0.007722843915074952}, "ground_truth": 0}, {"key": "0922357b2d0439cf4ba6ec9dc143e11a38b49d8a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9770813434618246, "res": {"Yes": 0.9770813434618246, "No": 0.02291861079525017}, "ground_truth": 0}, {"key": "c62ea6aaf31a7f5b6a7c2eac7780c86f9baed786", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9922199792546417, "res": {"Yes": 0.9922199792546417, "No": 0.0077799972053609915}, "ground_truth": 0}, {"key": "c62ea6aaf31a7f5b6a7c2eac7780c86f9baed786", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.999549648098874, "res": {"Yes": 0.999549648098874, "No": 0.0004502565619499874}, "ground_truth": 0}, {"key": "c62ea6aaf31a7f5b6a7c2eac7780c86f9baed786", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9990875225609593, "res": {"Yes": 0.9990875225609593, "No": 0.0009124726003245843}, "ground_truth": 1}, {"key": "c62ea6aaf31a7f5b6a7c2eac7780c86f9baed786", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999315439518515, "res": {"Yes": 0.999315439518515, "No": 0.0006845369046046991}, "ground_truth": 0}, {"key": "c62ea6aaf31a7f5b6a7c2eac7780c86f9baed786", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.999901944987707, "res": {"Yes": 0.999901944987707, "No": 9.796326465763329e-05}, "ground_truth": 0}, {"key": "6bb04883f35820a450ff821eda46a4002c0fc7e9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.848210875640628, "res": {"Yes": 0.848210875640628, "No": 0.151788936311527}, "ground_truth": 0}, {"key": "6bb04883f35820a450ff821eda46a4002c0fc7e9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9687470562779722, "res": {"Yes": 0.9687470562779722, "No": 0.031252903347353915}, "ground_truth": 0}, {"key": "6bb04883f35820a450ff821eda46a4002c0fc7e9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9444457986275889, "res": {"Yes": 0.9444457986275889, "No": 0.055553908796415974}, "ground_truth": 1}, {"key": "6bb04883f35820a450ff821eda46a4002c0fc7e9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9979525938012931, "res": {"Yes": 0.9979525938012931, "No": 0.002047428425231414}, "ground_truth": 0}, {"key": "6bb04883f35820a450ff821eda46a4002c0fc7e9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9901897644097797, "res": {"Yes": 0.9901897644097797, "No": 0.009810150147170758}, "ground_truth": 0}, {"key": "71f87a7cbda73f1c0656d811b0026437e4ee4aa0", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9965221649269362, "res": {"Yes": 0.9965221649269362, "No": 0.003477881273714548}, "ground_truth": 0}, {"key": "71f87a7cbda73f1c0656d811b0026437e4ee4aa0", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9979397716897642, "res": {"Yes": 0.9979397716897642, "No": 0.0020601798379816043}, "ground_truth": 0}, {"key": "71f87a7cbda73f1c0656d811b0026437e4ee4aa0", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9921778465200112, "res": {"Yes": 0.9921778465200112, "No": 0.007822071983450248}, "ground_truth": 1}, {"key": "71f87a7cbda73f1c0656d811b0026437e4ee4aa0", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9985684055104425, "res": {"Yes": 0.9985684055104425, "No": 0.0014315986816006594}, "ground_truth": 0}, {"key": "71f87a7cbda73f1c0656d811b0026437e4ee4aa0", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9841453238451584, "res": {"Yes": 0.9841453238451584, "No": 0.015854641221134825}, "ground_truth": 0}, {"key": "fa5089d759a9fb810d28c140cf60e3a5fffff8ba", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9690394576028717, "res": {"Yes": 0.9690394576028717, "No": 0.03096040961157814}, "ground_truth": 0}, {"key": "fa5089d759a9fb810d28c140cf60e3a5fffff8ba", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.99812571886272, "res": {"Yes": 0.99812571886272, "No": 0.0018742981680588287}, "ground_truth": 0}, {"key": "fa5089d759a9fb810d28c140cf60e3a5fffff8ba", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9991066860426969, "res": {"Yes": 0.9991066860426969, "No": 0.0008932482680441983}, "ground_truth": 1}, {"key": "fa5089d759a9fb810d28c140cf60e3a5fffff8ba", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996078921243943, "res": {"Yes": 0.9996078921243943, "No": 0.00039200890562529357}, "ground_truth": 0}, {"key": "fa5089d759a9fb810d28c140cf60e3a5fffff8ba", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9990976420699977, "res": {"Yes": 0.9990976420699977, "No": 0.0009023708323951524}, "ground_truth": 0}, {"key": "1f9aed77465301d7f3787a29555754a03728c4b7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9984498005024577, "res": {"Yes": 0.9984498005024577, "No": 0.0015502067032604537}, "ground_truth": 0}, {"key": "1f9aed77465301d7f3787a29555754a03728c4b7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.995654834268297, "res": {"Yes": 0.995654834268297, "No": 0.004345129570982388}, "ground_truth": 0}, {"key": "1f9aed77465301d7f3787a29555754a03728c4b7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996522011260557, "res": {"Yes": 0.9996522011260557, "No": 0.00034771627933016195}, "ground_truth": 1}, {"key": "1f9aed77465301d7f3787a29555754a03728c4b7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9964349242268928, "res": {"Yes": 0.9964349242268928, "No": 0.0035650721032672505}, "ground_truth": 0}, {"key": "1f9aed77465301d7f3787a29555754a03728c4b7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9883077943575143, "res": {"Yes": 0.9883077943575143, "No": 0.011692058831171859}, "ground_truth": 0}, {"key": "7237201bcb7291f2ededb818f901a451356db34b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9958805968149098, "res": {"Yes": 0.9958805968149098, "No": 0.00411935657060541}, "ground_truth": 0}, {"key": "7237201bcb7291f2ededb818f901a451356db34b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9974069027002582, "res": {"Yes": 0.9974069027002582, "No": 0.002593030526079289}, "ground_truth": 0}, {"key": "7237201bcb7291f2ededb818f901a451356db34b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9985617543680145, "res": {"Yes": 0.9985617543680145, "No": 0.0014382396190666324}, "ground_truth": 1}, {"key": "7237201bcb7291f2ededb818f901a451356db34b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9992321197710343, "res": {"Yes": 0.9992321197710343, "No": 0.0007678043575826252}, "ground_truth": 0}, {"key": "7237201bcb7291f2ededb818f901a451356db34b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9972165925814844, "res": {"Yes": 0.9972165925814844, "No": 0.00278333814512932}, "ground_truth": 0}, {"key": "38d13d78fedc889784ffb3cb213a758c758be145", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9975760328644185, "res": {"Yes": 0.9975760328644185, "No": 0.0024239447595482137}, "ground_truth": 0}, {"key": "38d13d78fedc889784ffb3cb213a758c758be145", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9989056261642988, "res": {"Yes": 0.9989056261642988, "No": 0.0010943731996716061}, "ground_truth": 0}, {"key": "38d13d78fedc889784ffb3cb213a758c758be145", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9991097789320968, "res": {"Yes": 0.9991097789320968, "No": 0.0008901723164411822}, "ground_truth": 1}, {"key": "38d13d78fedc889784ffb3cb213a758c758be145", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9686281469689594, "res": {"Yes": 0.9686281469689594, "No": 0.031371733453066576}, "ground_truth": 0}, {"key": "38d13d78fedc889784ffb3cb213a758c758be145", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9983111311467462, "res": {"Yes": 0.9983111311467462, "No": 0.0016888467152693625}, "ground_truth": 0}, {"key": "98fd4b91ec12241a96493471a3e21c9ab96ec523", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9959398361439351, "res": {"Yes": 0.9959398361439351, "No": 0.004060160800980362}, "ground_truth": 0}, {"key": "98fd4b91ec12241a96493471a3e21c9ab96ec523", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997728867541188, "res": {"Yes": 0.9997728867541188, "No": 0.00022706369162323333}, "ground_truth": 0}, {"key": "98fd4b91ec12241a96493471a3e21c9ab96ec523", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9976601530195837, "res": {"Yes": 0.9976601530195837, "No": 0.0023397808683543546}, "ground_truth": 1}, {"key": "98fd4b91ec12241a96493471a3e21c9ab96ec523", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9992247385405741, "res": {"Yes": 0.9992247385405741, "No": 0.0007752419374293013}, "ground_truth": 0}, {"key": "98fd4b91ec12241a96493471a3e21c9ab96ec523", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997825401078307, "res": {"Yes": 0.9997825401078307, "No": 0.00021737969215301654}, "ground_truth": 0}, {"key": "8cefdd23a829375abeb18b9bd44bb053a67dfdf8", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9987453188713707, "res": {"Yes": 0.9987453188713707, "No": 0.0012547043648562207}, "ground_truth": 0}, {"key": "8cefdd23a829375abeb18b9bd44bb053a67dfdf8", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9987990632024664, "res": {"Yes": 0.9987990632024664, "No": 0.0012009072093817717}, "ground_truth": 0}, {"key": "8cefdd23a829375abeb18b9bd44bb053a67dfdf8", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997220179541707, "res": {"Yes": 0.9997220179541707, "No": 0.0002778739400978274}, "ground_truth": 1}, {"key": "8cefdd23a829375abeb18b9bd44bb053a67dfdf8", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999751565364996, "res": {"Yes": 0.999751565364996, "No": 0.00024831387177874593}, "ground_truth": 0}, {"key": "8cefdd23a829375abeb18b9bd44bb053a67dfdf8", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997907597337986, "res": {"Yes": 0.9997907597337986, "No": 0.0002091400363472451}, "ground_truth": 0}, {"key": "3de916a84cae6c1dee0076f4e5d6e26fc95c3242", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9961717561807765, "res": {"Yes": 0.9961717561807765, "No": 0.0038282787091840336}, "ground_truth": 0}, {"key": "3de916a84cae6c1dee0076f4e5d6e26fc95c3242", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9989900912014915, "res": {"Yes": 0.9989900912014915, "No": 0.0010098201836190553}, "ground_truth": 0}, {"key": "3de916a84cae6c1dee0076f4e5d6e26fc95c3242", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9992289074447792, "res": {"Yes": 0.9992289074447792, "No": 0.0007710039971834896}, "ground_truth": 1}, {"key": "3de916a84cae6c1dee0076f4e5d6e26fc95c3242", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995924091877131, "res": {"Yes": 0.9995924091877131, "No": 0.0004075267431804726}, "ground_truth": 0}, {"key": "3de916a84cae6c1dee0076f4e5d6e26fc95c3242", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9975610935768866, "res": {"Yes": 0.9975610935768866, "No": 0.0024389442932624154}, "ground_truth": 0}, {"key": "24611abacb038c28a84c76b10165e474ac0fdd7e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.991674066184202, "res": {"Yes": 0.991674066184202, "No": 0.008325913661570733}, "ground_truth": 0}, {"key": "24611abacb038c28a84c76b10165e474ac0fdd7e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9955560487035052, "res": {"Yes": 0.9955560487035052, "No": 0.004443930706158166}, "ground_truth": 0}, {"key": "24611abacb038c28a84c76b10165e474ac0fdd7e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9730858542981631, "res": {"Yes": 0.9730858542981631, "No": 0.026913999506472264}, "ground_truth": 1}, {"key": "24611abacb038c28a84c76b10165e474ac0fdd7e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995586998615626, "res": {"Yes": 0.9995586998615626, "No": 0.00044126233298487474}, "ground_truth": 0}, {"key": "24611abacb038c28a84c76b10165e474ac0fdd7e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.990240023756933, "res": {"Yes": 0.990240023756933, "No": 0.009759954894821876}, "ground_truth": 0}, {"key": "4f3178963b84a0e3625f7486b045872c665073d7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9972546523781824, "res": {"Yes": 0.9972546523781824, "No": 0.0027453868087874813}, "ground_truth": 0}, {"key": "4f3178963b84a0e3625f7486b045872c665073d7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998559452768782, "res": {"Yes": 0.9998559452768782, "No": 0.00014397728699935112}, "ground_truth": 0}, {"key": "4f3178963b84a0e3625f7486b045872c665073d7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995690660187321, "res": {"Yes": 0.9995690660187321, "No": 0.00043091345003918447}, "ground_truth": 1}, {"key": "4f3178963b84a0e3625f7486b045872c665073d7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999796611748367, "res": {"Yes": 0.9999796611748367, "No": 2.0227712253619587e-05}, "ground_truth": 0}, {"key": "4f3178963b84a0e3625f7486b045872c665073d7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999520068687072, "res": {"Yes": 0.9999520068687072, "No": 4.7915380933840276e-05}, "ground_truth": 0}, {"key": "6ff0470115b556c80229f4305c23afa683c9f8da", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.998744247318093, "res": {"Yes": 0.998744247318093, "No": 0.001255770940225413}, "ground_truth": 0}, {"key": "6ff0470115b556c80229f4305c23afa683c9f8da", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996835339659427, "res": {"Yes": 0.9996835339659427, "No": 0.00031637858742778054}, "ground_truth": 0}, {"key": "6ff0470115b556c80229f4305c23afa683c9f8da", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9993184175329278, "res": {"Yes": 0.9993184175329278, "No": 0.000681511113193679}, "ground_truth": 1}, {"key": "6ff0470115b556c80229f4305c23afa683c9f8da", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996908031112136, "res": {"Yes": 0.9996908031112136, "No": 0.00030910269190990113}, "ground_truth": 0}, {"key": "6ff0470115b556c80229f4305c23afa683c9f8da", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.997096994194123, "res": {"Yes": 0.997096994194123, "No": 0.0029030211743840094}, "ground_truth": 0}, {"key": "4be9a7e5e9115eb681541c4a8d247ac84f0eb774", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9976053394850405, "res": {"Yes": 0.9976053394850405, "No": 0.0023946157485733805}, "ground_truth": 0}, {"key": "4be9a7e5e9115eb681541c4a8d247ac84f0eb774", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995421489757839, "res": {"Yes": 0.9995421489757839, "No": 0.0004578046468976749}, "ground_truth": 0}, {"key": "4be9a7e5e9115eb681541c4a8d247ac84f0eb774", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999168440936441, "res": {"Yes": 0.9999168440936441, "No": 8.307506178486238e-05}, "ground_truth": 1}, {"key": "4be9a7e5e9115eb681541c4a8d247ac84f0eb774", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998951510670336, "res": {"Yes": 0.9998951510670336, "No": 0.0001047667088697122}, "ground_truth": 0}, {"key": "4be9a7e5e9115eb681541c4a8d247ac84f0eb774", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999841908319662, "res": {"Yes": 0.9999841908319662, "No": 1.5684148583614014e-05}, "ground_truth": 0}, {"key": "de3ba4e29501a10effcbb5a61e7b9c0ab3721669", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.996705219241913, "res": {"Yes": 0.996705219241913, "No": 0.003294702840736607}, "ground_truth": 0}, {"key": "de3ba4e29501a10effcbb5a61e7b9c0ab3721669", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9962801324132267, "res": {"Yes": 0.9962801324132267, "No": 0.0037198557065916842}, "ground_truth": 0}, {"key": "de3ba4e29501a10effcbb5a61e7b9c0ab3721669", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9986327200800869, "res": {"Yes": 0.9986327200800869, "No": 0.0013672183563731347}, "ground_truth": 1}, {"key": "de3ba4e29501a10effcbb5a61e7b9c0ab3721669", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9980925787444661, "res": {"Yes": 0.9980925787444661, "No": 0.0019073412926945287}, "ground_truth": 0}, {"key": "de3ba4e29501a10effcbb5a61e7b9c0ab3721669", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.8616113155876924, "res": {"Yes": 0.8616113155876924, "No": 0.13838851823081627}, "ground_truth": 0}, {"key": "d23090b75384c925e7f20b1ec5218394b52eae38", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9903420791914433, "res": {"Yes": 0.9903420791914433, "No": 0.009657881125535926}, "ground_truth": 0}, {"key": "d23090b75384c925e7f20b1ec5218394b52eae38", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9956447825860912, "res": {"Yes": 0.9956447825860912, "No": 0.004355255509586387}, "ground_truth": 0}, {"key": "d23090b75384c925e7f20b1ec5218394b52eae38", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9898982236818498, "res": {"Yes": 0.9898982236818498, "No": 0.010101721743598578}, "ground_truth": 1}, {"key": "d23090b75384c925e7f20b1ec5218394b52eae38", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9931489541571622, "res": {"Yes": 0.9931489541571622, "No": 0.006850972952040775}, "ground_truth": 0}, {"key": "d23090b75384c925e7f20b1ec5218394b52eae38", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9940880393232338, "res": {"Yes": 0.9940880393232338, "No": 0.005911898286902802}, "ground_truth": 0}, {"key": "98008fdd5ea99bc7c437d6e1aa86c750bf667415", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9949737792334243, "res": {"Yes": 0.9949737792334243, "No": 0.0050262228570807505}, "ground_truth": 0}, {"key": "98008fdd5ea99bc7c437d6e1aa86c750bf667415", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996894922775044, "res": {"Yes": 0.9996894922775044, "No": 0.0003104480442187751}, "ground_truth": 0}, {"key": "98008fdd5ea99bc7c437d6e1aa86c750bf667415", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995793058166245, "res": {"Yes": 0.9995793058166245, "No": 0.0004206693183604931}, "ground_truth": 1}, {"key": "98008fdd5ea99bc7c437d6e1aa86c750bf667415", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998241266575298, "res": {"Yes": 0.9998241266575298, "No": 0.00017582471442002603}, "ground_truth": 0}, {"key": "98008fdd5ea99bc7c437d6e1aa86c750bf667415", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.999202959705496, "res": {"Yes": 0.999202959705496, "No": 0.0007969855578325772}, "ground_truth": 0}, {"key": "fd13c81373473f862d0f3559ffbc2a03fa95f492", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9980983977409722, "res": {"Yes": 0.9980983977409722, "No": 0.0019015622382467599}, "ground_truth": 0}, {"key": "fd13c81373473f862d0f3559ffbc2a03fa95f492", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9993309144376394, "res": {"Yes": 0.9993309144376394, "No": 0.0006690675040577637}, "ground_truth": 0}, {"key": "fd13c81373473f862d0f3559ffbc2a03fa95f492", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998379519412053, "res": {"Yes": 0.9998379519412053, "No": 0.0001620134826435896}, "ground_truth": 1}, {"key": "fd13c81373473f862d0f3559ffbc2a03fa95f492", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995664446722851, "res": {"Yes": 0.9995664446722851, "No": 0.00043354559001300484}, "ground_truth": 0}, {"key": "fd13c81373473f862d0f3559ffbc2a03fa95f492", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997052187165428, "res": {"Yes": 0.9997052187165428, "No": 0.00029476091141862864}, "ground_truth": 0}, {"key": "ad275932d9fb03762cc781e06955fb088e165ad9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9847540661702671, "res": {"Yes": 0.9847540661702671, "No": 0.015245931475727165}, "ground_truth": 0}, {"key": "ad275932d9fb03762cc781e06955fb088e165ad9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995721639879392, "res": {"Yes": 0.9995721639879392, "No": 0.00042778663629970486}, "ground_truth": 0}, {"key": "ad275932d9fb03762cc781e06955fb088e165ad9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997257085847705, "res": {"Yes": 0.9997257085847705, "No": 0.00027422928361083037}, "ground_truth": 1}, {"key": "ad275932d9fb03762cc781e06955fb088e165ad9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999853828508316, "res": {"Yes": 0.9999853828508316, "No": 1.4532863860147707e-05}, "ground_truth": 0}, {"key": "ad275932d9fb03762cc781e06955fb088e165ad9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9979688619583021, "res": {"Yes": 0.9979688619583021, "No": 0.0020311482610668743}, "ground_truth": 0}, {"key": "f9fedde4734d8ac3431c0ed5d338efcd799b1aea", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9475760418454292, "res": {"Yes": 0.9475760418454292, "No": 0.05242380483865207}, "ground_truth": 0}, {"key": "f9fedde4734d8ac3431c0ed5d338efcd799b1aea", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.983389328205332, "res": {"Yes": 0.983389328205332, "No": 0.016610612178759625}, "ground_truth": 0}, {"key": "f9fedde4734d8ac3431c0ed5d338efcd799b1aea", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9969513675463925, "res": {"Yes": 0.9969513675463925, "No": 0.0030486573451215682}, "ground_truth": 1}, {"key": "f9fedde4734d8ac3431c0ed5d338efcd799b1aea", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9986222629523095, "res": {"Yes": 0.9986222629523095, "No": 0.0013776804876542105}, "ground_truth": 0}, {"key": "f9fedde4734d8ac3431c0ed5d338efcd799b1aea", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9945508716549, "res": {"Yes": 0.9945508716549, "No": 0.005449080500748657}, "ground_truth": 0}, {"key": "bbe84a85e38aac4e2cff76da7ce4ed349474db25", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9750483806849971, "res": {"Yes": 0.9750483806849971, "No": 0.02495148099572914}, "ground_truth": 0}, {"key": "bbe84a85e38aac4e2cff76da7ce4ed349474db25", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9939592901974783, "res": {"Yes": 0.9939592901974783, "No": 0.006040678130959064}, "ground_truth": 0}, {"key": "bbe84a85e38aac4e2cff76da7ce4ed349474db25", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9925760652570595, "res": {"Yes": 0.9925760652570595, "No": 0.007423955333625282}, "ground_truth": 1}, {"key": "bbe84a85e38aac4e2cff76da7ce4ed349474db25", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.992193220434852, "res": {"Yes": 0.992193220434852, "No": 0.007806754175836273}, "ground_truth": 0}, {"key": "bbe84a85e38aac4e2cff76da7ce4ed349474db25", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.998969508221699, "res": {"Yes": 0.998969508221699, "No": 0.0010304350368893255}, "ground_truth": 0}, {"key": "45884af9aff9abf54fec27f510c3f9ed000fc6e1", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9994984442211723, "res": {"Yes": 0.9994984442211723, "No": 0.0005014832253875378}, "ground_truth": 0}, {"key": "45884af9aff9abf54fec27f510c3f9ed000fc6e1", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998635732369773, "res": {"Yes": 0.9998635732369773, "No": 0.00013630193809916063}, "ground_truth": 0}, {"key": "45884af9aff9abf54fec27f510c3f9ed000fc6e1", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997666895911075, "res": {"Yes": 0.9997666895911075, "No": 0.00023318929717899996}, "ground_truth": 1}, {"key": "45884af9aff9abf54fec27f510c3f9ed000fc6e1", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994478349592552, "res": {"Yes": 0.9994478349592552, "No": 0.0005521025775810844}, "ground_truth": 0}, {"key": "45884af9aff9abf54fec27f510c3f9ed000fc6e1", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.998585885104082, "res": {"Yes": 0.998585885104082, "No": 0.0014140434267388134}, "ground_truth": 0}, {"key": "59e60ba3c7ea5f5991db8ed4afd368c69e697dde", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9767815407893552, "res": {"Yes": 0.9767815407893552, "No": 0.02321837704170259}, "ground_truth": 0}, {"key": "59e60ba3c7ea5f5991db8ed4afd368c69e697dde", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9979913019396359, "res": {"Yes": 0.9979913019396359, "No": 0.0020087018727596335}, "ground_truth": 0}, {"key": "59e60ba3c7ea5f5991db8ed4afd368c69e697dde", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9980655073475437, "res": {"Yes": 0.9980655073475437, "No": 0.0019344523805930657}, "ground_truth": 1}, {"key": "59e60ba3c7ea5f5991db8ed4afd368c69e697dde", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995168933536139, "res": {"Yes": 0.9995168933536139, "No": 0.00048305405280686653}, "ground_truth": 0}, {"key": "59e60ba3c7ea5f5991db8ed4afd368c69e697dde", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9966778619528393, "res": {"Yes": 0.9966778619528393, "No": 0.00332216201055223}, "ground_truth": 0}, {"key": "265ad4cbd8cbad94527db327a39c2d8f33db33ce", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.987613604788481, "res": {"Yes": 0.987613604788481, "No": 0.012386303169221437}, "ground_truth": 0}, {"key": "265ad4cbd8cbad94527db327a39c2d8f33db33ce", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9988707822467415, "res": {"Yes": 0.9988707822467415, "No": 0.001129223557147433}, "ground_truth": 0}, {"key": "265ad4cbd8cbad94527db327a39c2d8f33db33ce", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999826391131764, "res": {"Yes": 0.999826391131764, "No": 0.00017349652284801576}, "ground_truth": 1}, {"key": "265ad4cbd8cbad94527db327a39c2d8f33db33ce", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999278099798249, "res": {"Yes": 0.9999278099798249, "No": 7.216134812085966e-05}, "ground_truth": 0}, {"key": "265ad4cbd8cbad94527db327a39c2d8f33db33ce", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9992435472547458, "res": {"Yes": 0.9992435472547458, "No": 0.0007563967635074547}, "ground_truth": 0}, {"key": "54e68f9b7cdd209c09b2a47feef62b9082346031", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.8844667004244872, "res": {"Yes": 0.8844667004244872, "No": 0.1155331935806864}, "ground_truth": 0}, {"key": "54e68f9b7cdd209c09b2a47feef62b9082346031", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9938854563758359, "res": {"Yes": 0.9938854563758359, "No": 0.006114509656850564}, "ground_truth": 0}, {"key": "54e68f9b7cdd209c09b2a47feef62b9082346031", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996367138244089, "res": {"Yes": 0.9996367138244089, "No": 0.0003632418834560978}, "ground_truth": 1}, {"key": "54e68f9b7cdd209c09b2a47feef62b9082346031", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994203291740881, "res": {"Yes": 0.9994203291740881, "No": 0.0005795876242334132}, "ground_truth": 0}, {"key": "54e68f9b7cdd209c09b2a47feef62b9082346031", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9971689499019004, "res": {"Yes": 0.9971689499019004, "No": 0.002831004735849033}, "ground_truth": 0}, {"key": "3b5e3dff560e2233e8370c87e9d92461200f8bd4", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9987601823098048, "res": {"Yes": 0.9987601823098048, "No": 0.001239750288359723}, "ground_truth": 0}, {"key": "3b5e3dff560e2233e8370c87e9d92461200f8bd4", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996078921243943, "res": {"Yes": 0.9996078921243943, "No": 0.0003920508528223304}, "ground_truth": 0}, {"key": "3b5e3dff560e2233e8370c87e9d92461200f8bd4", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994420009094303, "res": {"Yes": 0.9994420009094303, "No": 0.00055790012447464}, "ground_truth": 1}, {"key": "3b5e3dff560e2233e8370c87e9d92461200f8bd4", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998885955719342, "res": {"Yes": 0.9998885955719342, "No": 0.0001112996994550986}, "ground_truth": 0}, {"key": "3b5e3dff560e2233e8370c87e9d92461200f8bd4", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998956278331537, "res": {"Yes": 0.9998956278331537, "No": 0.00010434800948441028}, "ground_truth": 0}, {"key": "242c51d129c998a2585feee12e109a2058eb7c2b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.999199509263793, "res": {"Yes": 0.999199509263793, "No": 0.0008004379840871238}, "ground_truth": 0}, {"key": "242c51d129c998a2585feee12e109a2058eb7c2b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994728432459723, "res": {"Yes": 0.9994728432459723, "No": 0.0005270561334551926}, "ground_truth": 0}, {"key": "242c51d129c998a2585feee12e109a2058eb7c2b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996909222743645, "res": {"Yes": 0.9996909222743645, "No": 0.00030896239113845916}, "ground_truth": 1}, {"key": "242c51d129c998a2585feee12e109a2058eb7c2b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998023201312406, "res": {"Yes": 0.9998023201312406, "No": 0.00019765736620125704}, "ground_truth": 0}, {"key": "242c51d129c998a2585feee12e109a2058eb7c2b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.999790402198689, "res": {"Yes": 0.999790402198689, "No": 0.00020953432280841665}, "ground_truth": 0}, {"key": "5ce92746bc1f4f5cbb25956c1289ffcd7f50f07e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9969583501185283, "res": {"Yes": 0.9969583501185283, "No": 0.0030416839074348403}, "ground_truth": 0}, {"key": "5ce92746bc1f4f5cbb25956c1289ffcd7f50f07e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9934434727400581, "res": {"Yes": 0.9934434727400581, "No": 0.0065564931484138386}, "ground_truth": 0}, {"key": "5ce92746bc1f4f5cbb25956c1289ffcd7f50f07e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998174524538312, "res": {"Yes": 0.9998174524538312, "No": 0.00018244514599724795}, "ground_truth": 1}, {"key": "5ce92746bc1f4f5cbb25956c1289ffcd7f50f07e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9973639700446791, "res": {"Yes": 0.9973639700446791, "No": 0.0026360536756796656}, "ground_truth": 0}, {"key": "5ce92746bc1f4f5cbb25956c1289ffcd7f50f07e", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9974399907269648, "res": {"Yes": 0.9974399907269648, "No": 0.002559964212706251}, "ground_truth": 0}, {"key": "2f32a9eebbfa707a264d82a3bef5557f6b3b5f20", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9935275983133968, "res": {"Yes": 0.9935275983133968, "No": 0.006472377970826818}, "ground_truth": 0}, {"key": "2f32a9eebbfa707a264d82a3bef5557f6b3b5f20", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9982068318505434, "res": {"Yes": 0.9982068318505434, "No": 0.001793162783744937}, "ground_truth": 0}, {"key": "2f32a9eebbfa707a264d82a3bef5557f6b3b5f20", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9925420042042035, "res": {"Yes": 0.9925420042042035, "No": 0.0074579328576100746}, "ground_truth": 1}, {"key": "2f32a9eebbfa707a264d82a3bef5557f6b3b5f20", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9982326164649488, "res": {"Yes": 0.9982326164649488, "No": 0.001767334852857871}, "ground_truth": 0}, {"key": "2f32a9eebbfa707a264d82a3bef5557f6b3b5f20", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9987281934323398, "res": {"Yes": 0.9987281934323398, "No": 0.0012717410650483225}, "ground_truth": 0}, {"key": "57a373fa7d17600396fcee99164e79c0edccad14", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9996930672734899, "res": {"Yes": 0.9996930672734899, "No": 0.00030682899043256537}, "ground_truth": 0}, {"key": "57a373fa7d17600396fcee99164e79c0edccad14", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998474830718894, "res": {"Yes": 0.9998474830718894, "No": 0.00015243550033618213}, "ground_truth": 0}, {"key": "57a373fa7d17600396fcee99164e79c0edccad14", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9983225408100328, "res": {"Yes": 0.9983225408100328, "No": 0.0016773859156257113}, "ground_truth": 1}, {"key": "57a373fa7d17600396fcee99164e79c0edccad14", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9977307586299589, "res": {"Yes": 0.9977307586299589, "No": 0.002269270193141537}, "ground_truth": 0}, {"key": "57a373fa7d17600396fcee99164e79c0edccad14", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9994408095352976, "res": {"Yes": 0.9994408095352976, "No": 0.0005591716867681787}, "ground_truth": 0}, {"key": "9b087c5572cc091c519625d37d1a614b97b8ed30", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9783278883247946, "res": {"Yes": 0.9783278883247946, "No": 0.02167211863717288}, "ground_truth": 0}, {"key": "9b087c5572cc091c519625d37d1a614b97b8ed30", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9929002044804173, "res": {"Yes": 0.9929002044804173, "No": 0.007099804716723208}, "ground_truth": 0}, {"key": "9b087c5572cc091c519625d37d1a614b97b8ed30", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.993121319178851, "res": {"Yes": 0.993121319178851, "No": 0.006878646335200453}, "ground_truth": 1}, {"key": "9b087c5572cc091c519625d37d1a614b97b8ed30", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9819357239460347, "res": {"Yes": 0.9819357239460347, "No": 0.018064330060743095}, "ground_truth": 0}, {"key": "9b087c5572cc091c519625d37d1a614b97b8ed30", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9989386960817931, "res": {"Yes": 0.9989386960817931, "No": 0.001061259891255558}, "ground_truth": 0}, {"key": "b5211115ff68e12e759a474a02d3b163211af563", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9914426979354987, "res": {"Yes": 0.9914426979354987, "No": 0.008557236326606366}, "ground_truth": 0}, {"key": "b5211115ff68e12e759a474a02d3b163211af563", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996888964427507, "res": {"Yes": 0.9996888964427507, "No": 0.0003109886940446583}, "ground_truth": 0}, {"key": "b5211115ff68e12e759a474a02d3b163211af563", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998449838462458, "res": {"Yes": 0.9998449838462458, "No": 0.0001549751153597585}, "ground_truth": 1}, {"key": "b5211115ff68e12e759a474a02d3b163211af563", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999825220097418, "res": {"Yes": 0.9999825220097418, "No": 1.7437020447142415e-05}, "ground_truth": 0}, {"key": "b5211115ff68e12e759a474a02d3b163211af563", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998654802485932, "res": {"Yes": 0.9998654802485932, "No": 0.00013445499830518783}, "ground_truth": 0}, {"key": "6b75abea2de5c9c9952af0427ccec78910ac811f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9938093867973011, "res": {"Yes": 0.9938093867973011, "No": 0.00619054841054018}, "ground_truth": 0}, {"key": "6b75abea2de5c9c9952af0427ccec78910ac811f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9981191785655625, "res": {"Yes": 0.9981191785655625, "No": 0.0018807388011524981}, "ground_truth": 0}, {"key": "6b75abea2de5c9c9952af0427ccec78910ac811f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9979238681493164, "res": {"Yes": 0.9979238681493164, "No": 0.002076155319006475}, "ground_truth": 1}, {"key": "6b75abea2de5c9c9952af0427ccec78910ac811f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997767004150644, "res": {"Yes": 0.9997767004150644, "No": 0.00022317440655588935}, "ground_truth": 0}, {"key": "6b75abea2de5c9c9952af0427ccec78910ac811f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9994430731413672, "res": {"Yes": 0.9994430731413672, "No": 0.0005569205318160764}, "ground_truth": 0}, {"key": "259cc1db3d4c814dafc02ed071327f65a7b9eaad", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.8247840866927523, "res": {"Yes": 0.8247840866927523, "No": 0.17521589144473895}, "ground_truth": 0}, {"key": "259cc1db3d4c814dafc02ed071327f65a7b9eaad", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9802159984714167, "res": {"Yes": 0.9802159984714167, "No": 0.019784050424791883}, "ground_truth": 0}, {"key": "259cc1db3d4c814dafc02ed071327f65a7b9eaad", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9951651105886608, "res": {"Yes": 0.9951651105886608, "No": 0.004834933036085435}, "ground_truth": 1}, {"key": "259cc1db3d4c814dafc02ed071327f65a7b9eaad", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9959672699833385, "res": {"Yes": 0.9959672699833385, "No": 0.0040327812684773385}, "ground_truth": 0}, {"key": "259cc1db3d4c814dafc02ed071327f65a7b9eaad", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9972030779949129, "res": {"Yes": 0.9972030779949129, "No": 0.002796966057946141}, "ground_truth": 0}, {"key": "3bb1cda1e01cb6e14f6190ced3b1ec3f7ef50de9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9959307315027575, "res": {"Yes": 0.9959307315027575, "No": 0.00406925639561541}, "ground_truth": 0}, {"key": "3bb1cda1e01cb6e14f6190ced3b1ec3f7ef50de9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9982113462511485, "res": {"Yes": 0.9982113462511485, "No": 0.0017886350355332914}, "ground_truth": 0}, {"key": "3bb1cda1e01cb6e14f6190ced3b1ec3f7ef50de9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996260004552724, "res": {"Yes": 0.9996260004552724, "No": 0.0003739159568898585}, "ground_truth": 1}, {"key": "3bb1cda1e01cb6e14f6190ced3b1ec3f7ef50de9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995076146614673, "res": {"Yes": 0.9995076146614673, "No": 0.0004923680756181588}, "ground_truth": 0}, {"key": "3bb1cda1e01cb6e14f6190ced3b1ec3f7ef50de9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998223389235985, "res": {"Yes": 0.9998223389235985, "No": 0.0001775843234792556}, "ground_truth": 0}, {"key": "7afbe903f1f7cdc1af509583649babf481d01729", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.999983237218497, "res": {"Yes": 0.999983237218497, "No": 1.6691415710688587e-05}, "ground_truth": 0}, {"key": "7afbe903f1f7cdc1af509583649babf481d01729", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9973480825621228, "res": {"Yes": 0.9973480825621228, "No": 0.002651916480973551}, "ground_truth": 0}, {"key": "7afbe903f1f7cdc1af509583649babf481d01729", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996263579215942, "res": {"Yes": 0.9996263579215942, "No": 0.0003735524256706629}, "ground_truth": 1}, {"key": "7afbe903f1f7cdc1af509583649babf481d01729", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998829936457775, "res": {"Yes": 0.9998829936457775, "No": 0.0001169563230649678}, "ground_truth": 0}, {"key": "7afbe903f1f7cdc1af509583649babf481d01729", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997047420472112, "res": {"Yes": 0.9997047420472112, "No": 0.00029523438254039125}, "ground_truth": 0}, {"key": "79d625a3311f4ff11598a3ae4fca63103e84e9a1", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9993466315381913, "res": {"Yes": 0.9993466315381913, "No": 0.0006533120966915998}, "ground_truth": 0}, {"key": "79d625a3311f4ff11598a3ae4fca63103e84e9a1", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.99793122137843, "res": {"Yes": 0.99793122137843, "No": 0.00206880175837044}, "ground_truth": 0}, {"key": "79d625a3311f4ff11598a3ae4fca63103e84e9a1", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9976732054927482, "res": {"Yes": 0.9976732054927482, "No": 0.002326746358402829}, "ground_truth": 1}, {"key": "79d625a3311f4ff11598a3ae4fca63103e84e9a1", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9982404554168182, "res": {"Yes": 0.9982404554168182, "No": 0.0017595256910035936}, "ground_truth": 0}, {"key": "79d625a3311f4ff11598a3ae4fca63103e84e9a1", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9992992463026356, "res": {"Yes": 0.9992992463026356, "No": 0.0007007455628085516}, "ground_truth": 0}, {"key": "ea6c4267f17d55f1bd9f0950c31ef59565bdc30b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9826668281591218, "res": {"Yes": 0.9826668281591218, "No": 0.017333122718472455}, "ground_truth": 0}, {"key": "ea6c4267f17d55f1bd9f0950c31ef59565bdc30b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9961378112103706, "res": {"Yes": 0.9961378112103706, "No": 0.003862134766747923}, "ground_truth": 0}, {"key": "ea6c4267f17d55f1bd9f0950c31ef59565bdc30b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9969253037899339, "res": {"Yes": 0.9969253037899339, "No": 0.0030747502516429433}, "ground_truth": 1}, {"key": "ea6c4267f17d55f1bd9f0950c31ef59565bdc30b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9958826077986294, "res": {"Yes": 0.9958826077986294, "No": 0.004117416398768359}, "ground_truth": 0}, {"key": "ea6c4267f17d55f1bd9f0950c31ef59565bdc30b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9936764787483299, "res": {"Yes": 0.9936764787483299, "No": 0.006323515344903488}, "ground_truth": 0}, {"key": "851d02b689bec14607574096bb77b9acf53c15ee", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9961800426707607, "res": {"Yes": 0.9961800426707607, "No": 0.0038198944027687063}, "ground_truth": 0}, {"key": "851d02b689bec14607574096bb77b9acf53c15ee", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.999645650926468, "res": {"Yes": 0.999645650926468, "No": 0.00035430444510858026}, "ground_truth": 0}, {"key": "851d02b689bec14607574096bb77b9acf53c15ee", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997258277620793, "res": {"Yes": 0.9997258277620793, "No": 0.0002741017966369699}, "ground_truth": 1}, {"key": "851d02b689bec14607574096bb77b9acf53c15ee", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997690731077205, "res": {"Yes": 0.9997690731077205, "No": 0.00023087364233082632}, "ground_truth": 0}, {"key": "851d02b689bec14607574096bb77b9acf53c15ee", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998200744485439, "res": {"Yes": 0.9998200744485439, "No": 0.00017989395852114674}, "ground_truth": 0}, {"key": "f2d00c8c396698becc18fd2b41ba595c7a3e1cc6", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9994885626686809, "res": {"Yes": 0.9994885626686809, "No": 0.0005114079242592092}, "ground_truth": 0}, {"key": "f2d00c8c396698becc18fd2b41ba595c7a3e1cc6", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.999330437956773, "res": {"Yes": 0.999330437956773, "No": 0.0006694681277604041}, "ground_truth": 0}, {"key": "f2d00c8c396698becc18fd2b41ba595c7a3e1cc6", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998229348378865, "res": {"Yes": 0.9998229348378865, "No": 0.00017694740790901272}, "ground_truth": 1}, {"key": "f2d00c8c396698becc18fd2b41ba595c7a3e1cc6", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998033927597371, "res": {"Yes": 0.9998033927597371, "No": 0.00019652301673560944}, "ground_truth": 0}, {"key": "f2d00c8c396698becc18fd2b41ba595c7a3e1cc6", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9982080219134376, "res": {"Yes": 0.9982080219134376, "No": 0.0017919551368190713}, "ground_truth": 0}, {"key": "70c6ca3234f9b1221c8aea7671dd78e1ca9d9139", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9993273480318359, "res": {"Yes": 0.9993273480318359, "No": 0.0006726173254965812}, "ground_truth": 0}, {"key": "70c6ca3234f9b1221c8aea7671dd78e1ca9d9139", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9990495686462644, "res": {"Yes": 0.9990495686462644, "No": 0.0009503935461937539}, "ground_truth": 0}, {"key": "70c6ca3234f9b1221c8aea7671dd78e1ca9d9139", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994993973733282, "res": {"Yes": 0.9994993973733282, "No": 0.0005005433748134905}, "ground_truth": 1}, {"key": "70c6ca3234f9b1221c8aea7671dd78e1ca9d9139", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998277021549788, "res": {"Yes": 0.9998277021549788, "No": 0.00017228043031779395}, "ground_truth": 0}, {"key": "70c6ca3234f9b1221c8aea7671dd78e1ca9d9139", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9992485463827934, "res": {"Yes": 0.9992485463827934, "No": 0.0007514479595035452}, "ground_truth": 0}, {"key": "86eaa90c419d5ed93341248fecd8c25d820f2b82", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9542463083543653, "res": {"Yes": 0.9542463083543653, "No": 0.04575360923453219}, "ground_truth": 0}, {"key": "86eaa90c419d5ed93341248fecd8c25d820f2b82", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9760415390233227, "res": {"Yes": 0.9760415390233227, "No": 0.023958366121119148}, "ground_truth": 0}, {"key": "86eaa90c419d5ed93341248fecd8c25d820f2b82", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9954094386840479, "res": {"Yes": 0.9954094386840479, "No": 0.004590498294081092}, "ground_truth": 1}, {"key": "86eaa90c419d5ed93341248fecd8c25d820f2b82", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9918582728649352, "res": {"Yes": 0.9918582728649352, "No": 0.008141696544369892}, "ground_truth": 0}, {"key": "86eaa90c419d5ed93341248fecd8c25d820f2b82", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9953484983984001, "res": {"Yes": 0.9953484983984001, "No": 0.00465145130256487}, "ground_truth": 0}, {"key": "a1841cf61c53937fffc54c0e81d8f7cfcad2f0c2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9727205914080784, "res": {"Yes": 0.9727205914080784, "No": 0.02727924015365}, "ground_truth": 0}, {"key": "a1841cf61c53937fffc54c0e81d8f7cfcad2f0c2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9956425387039732, "res": {"Yes": 0.9956425387039732, "No": 0.00435749163609223}, "ground_truth": 0}, {"key": "a1841cf61c53937fffc54c0e81d8f7cfcad2f0c2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9916506168406445, "res": {"Yes": 0.9916506168406445, "No": 0.00834931816783631}, "ground_truth": 1}, {"key": "a1841cf61c53937fffc54c0e81d8f7cfcad2f0c2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995573928294658, "res": {"Yes": 0.9995573928294658, "No": 0.0004425987298935484}, "ground_truth": 0}, {"key": "a1841cf61c53937fffc54c0e81d8f7cfcad2f0c2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9986063301610697, "res": {"Yes": 0.9986063301610697, "No": 0.0013936899623840956}, "ground_truth": 0}, {"key": "5d221c86fdadca378eeaabc39c881cc74efcff07", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.973830071635725, "res": {"Yes": 0.973830071635725, "No": 0.026169805359780782}, "ground_truth": 0}, {"key": "5d221c86fdadca378eeaabc39c881cc74efcff07", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9974309766032683, "res": {"Yes": 0.9974309766032683, "No": 0.002568960337141109}, "ground_truth": 0}, {"key": "5d221c86fdadca378eeaabc39c881cc74efcff07", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9944308478460205, "res": {"Yes": 0.9944308478460205, "No": 0.005569145766999849}, "ground_truth": 1}, {"key": "5d221c86fdadca378eeaabc39c881cc74efcff07", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995634659087445, "res": {"Yes": 0.9995634659087445, "No": 0.00043641641069649056}, "ground_truth": 0}, {"key": "5d221c86fdadca378eeaabc39c881cc74efcff07", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9742006825342038, "res": {"Yes": 0.9742006825342038, "No": 0.02579926179656892}, "ground_truth": 0}, {"key": "7a49f5bc1d336309f57076bceba91885ee5368b5", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9992135531115887, "res": {"Yes": 0.9992135531115887, "No": 0.0007864054962720753}, "ground_truth": 0}, {"key": "7a49f5bc1d336309f57076bceba91885ee5368b5", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999003954908585, "res": {"Yes": 0.9999003954908585, "No": 9.956209879320541e-05}, "ground_truth": 0}, {"key": "7a49f5bc1d336309f57076bceba91885ee5368b5", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997658553661344, "res": {"Yes": 0.9997658553661344, "No": 0.00023406071744594357}, "ground_truth": 1}, {"key": "7a49f5bc1d336309f57076bceba91885ee5368b5", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998134039412105, "res": {"Yes": 0.9998134039412105, "No": 0.00018657784621341276}, "ground_truth": 0}, {"key": "7a49f5bc1d336309f57076bceba91885ee5368b5", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998186442669394, "res": {"Yes": 0.9998186442669394, "No": 0.00018131295257155068}, "ground_truth": 0}, {"key": "dd46893fd318dce4e209f9a335612659ebf8fd24", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9924403132624212, "res": {"Yes": 0.9924403132624212, "No": 0.007559619656825503}, "ground_truth": 0}, {"key": "dd46893fd318dce4e209f9a335612659ebf8fd24", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9986234533107564, "res": {"Yes": 0.9986234533107564, "No": 0.001376499632999838}, "ground_truth": 0}, {"key": "dd46893fd318dce4e209f9a335612659ebf8fd24", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9804457086156372, "res": {"Yes": 0.9804457086156372, "No": 0.019554336525254123}, "ground_truth": 1}, {"key": "dd46893fd318dce4e209f9a335612659ebf8fd24", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9978528810646754, "res": {"Yes": 0.9978528810646754, "No": 0.0021471175731226817}, "ground_truth": 0}, {"key": "dd46893fd318dce4e209f9a335612659ebf8fd24", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9943648354414057, "res": {"Yes": 0.9943648354414057, "No": 0.005635150633961715}, "ground_truth": 0}, {"key": "acda8749fba5e9553e9f47cef89ef85eae5ad4e3", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.996247591147833, "res": {"Yes": 0.996247591147833, "No": 0.003752414423174421}, "ground_truth": 0}, {"key": "acda8749fba5e9553e9f47cef89ef85eae5ad4e3", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9950011609901039, "res": {"Yes": 0.9950011609901039, "No": 0.00499878697990576}, "ground_truth": 0}, {"key": "acda8749fba5e9553e9f47cef89ef85eae5ad4e3", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9990753896158485, "res": {"Yes": 0.9990753896158485, "No": 0.0009246128024794581}, "ground_truth": 1}, {"key": "acda8749fba5e9553e9f47cef89ef85eae5ad4e3", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9988316495534654, "res": {"Yes": 0.9988316495534654, "No": 0.0011682761415027774}, "ground_truth": 0}, {"key": "acda8749fba5e9553e9f47cef89ef85eae5ad4e3", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9982219181638569, "res": {"Yes": 0.9982219181638569, "No": 0.0017780276336959136}, "ground_truth": 0}, {"key": "20b11518df6520f552d691d5f6c0d02755d11809", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9973741725318569, "res": {"Yes": 0.9973741725318569, "No": 0.002625772868967831}, "ground_truth": 0}, {"key": "20b11518df6520f552d691d5f6c0d02755d11809", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.996764904534789, "res": {"Yes": 0.996764904534789, "No": 0.003235117597717084}, "ground_truth": 0}, {"key": "20b11518df6520f552d691d5f6c0d02755d11809", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9971335044663768, "res": {"Yes": 0.9971335044663768, "No": 0.002866461318608788}, "ground_truth": 1}, {"key": "20b11518df6520f552d691d5f6c0d02755d11809", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9845420822718348, "res": {"Yes": 0.9845420822718348, "No": 0.015457896590923986}, "ground_truth": 0}, {"key": "20b11518df6520f552d691d5f6c0d02755d11809", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9995642999448483, "res": {"Yes": 0.9995642999448483, "No": 0.00043560020338406953}, "ground_truth": 0}, {"key": "51fe083191bb5568bf6a5fea1610a8e19663d7b2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9980453261679899, "res": {"Yes": 0.9980453261679899, "No": 0.001954626797866075}, "ground_truth": 0}, {"key": "51fe083191bb5568bf6a5fea1610a8e19663d7b2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.924416828468696, "res": {"Yes": 0.924416828468696, "No": 0.0755828933379545}, "ground_truth": 0}, {"key": "51fe083191bb5568bf6a5fea1610a8e19663d7b2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9964539820242138, "res": {"Yes": 0.9964539820242138, "No": 0.003546070109829689}, "ground_truth": 1}, {"key": "51fe083191bb5568bf6a5fea1610a8e19663d7b2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9940653006146706, "res": {"Yes": 0.9940653006146706, "No": 0.005934629376285876}, "ground_truth": 0}, {"key": "51fe083191bb5568bf6a5fea1610a8e19663d7b2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.99345946204339, "res": {"Yes": 0.99345946204339, "No": 0.006540484720337156}, "ground_truth": 0}, {"key": "8af047c2d83a91e8b745adfcaa7c282dfe1030a2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9845357270732052, "res": {"Yes": 0.9845357270732052, "No": 0.015464229442247042}, "ground_truth": 0}, {"key": "8af047c2d83a91e8b745adfcaa7c282dfe1030a2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.994400320978088, "res": {"Yes": 0.994400320978088, "No": 0.005599694143302619}, "ground_truth": 0}, {"key": "8af047c2d83a91e8b745adfcaa7c282dfe1030a2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9872548062560563, "res": {"Yes": 0.9872548062560563, "No": 0.012745156014463367}, "ground_truth": 1}, {"key": "8af047c2d83a91e8b745adfcaa7c282dfe1030a2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997768195884542, "res": {"Yes": 0.9997768195884542, "No": 0.00022310389458489593}, "ground_truth": 0}, {"key": "8af047c2d83a91e8b745adfcaa7c282dfe1030a2", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9989225165026564, "res": {"Yes": 0.9989225165026564, "No": 0.0010773833518398282}, "ground_truth": 0}, {"key": "dfcacd955a9168b2cf599c57a539dbfce43ea2a9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9998281788829392, "res": {"Yes": 0.9998281788829392, "No": 0.00017180284304106963}, "ground_truth": 0}, {"key": "dfcacd955a9168b2cf599c57a539dbfce43ea2a9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.999581450616242, "res": {"Yes": 0.999581450616242, "No": 0.0004185109096147357}, "ground_truth": 0}, {"key": "dfcacd955a9168b2cf599c57a539dbfce43ea2a9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997647827679119, "res": {"Yes": 0.9997647827679119, "No": 0.00023519890442679314}, "ground_truth": 1}, {"key": "dfcacd955a9168b2cf599c57a539dbfce43ea2a9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9983723241035851, "res": {"Yes": 0.9983723241035851, "No": 0.0016276402703116553}, "ground_truth": 0}, {"key": "dfcacd955a9168b2cf599c57a539dbfce43ea2a9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9861332362163501, "res": {"Yes": 0.9861332362163501, "No": 0.01386669995201795}, "ground_truth": 0}, {"key": "f2463954bc7215d8a1f98cf0d85f280fd2affa58", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9609602308102921, "res": {"Yes": 0.9609602308102921, "No": 0.03903970007390215}, "ground_truth": 0}, {"key": "f2463954bc7215d8a1f98cf0d85f280fd2affa58", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9985348973123895, "res": {"Yes": 0.9985348973123895, "No": 0.0014650344813974855}, "ground_truth": 0}, {"key": "f2463954bc7215d8a1f98cf0d85f280fd2affa58", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994968953496083, "res": {"Yes": 0.9994968953496083, "No": 0.0005030305772252136}, "ground_truth": 1}, {"key": "f2463954bc7215d8a1f98cf0d85f280fd2affa58", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.996144669244353, "res": {"Yes": 0.996144669244353, "No": 0.003855375001670464}, "ground_truth": 0}, {"key": "f2463954bc7215d8a1f98cf0d85f280fd2affa58", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9995147487424925, "res": {"Yes": 0.9995147487424925, "No": 0.00048517835129292916}, "ground_truth": 0}, {"key": "0e7a92a6f06e94cdd613860d4a3f4b92a06a2b43", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.981230836262301, "res": {"Yes": 0.981230836262301, "No": 0.018769159655574342}, "ground_truth": 0}, {"key": "0e7a92a6f06e94cdd613860d4a3f4b92a06a2b43", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9984185430230619, "res": {"Yes": 0.9984185430230619, "No": 0.0015814201603335306}, "ground_truth": 0}, {"key": "0e7a92a6f06e94cdd613860d4a3f4b92a06a2b43", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9914656689374737, "res": {"Yes": 0.9914656689374737, "No": 0.008534316567913536}, "ground_truth": 1}, {"key": "0e7a92a6f06e94cdd613860d4a3f4b92a06a2b43", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9910911689116504, "res": {"Yes": 0.9910911689116504, "No": 0.008908746785262047}, "ground_truth": 0}, {"key": "0e7a92a6f06e94cdd613860d4a3f4b92a06a2b43", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9885183564883819, "res": {"Yes": 0.9885183564883819, "No": 0.011481491212124684}, "ground_truth": 0}, {"key": "2905a15c5947042e42f4b52dc201d24822af20f1", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9962784773940453, "res": {"Yes": 0.9962784773940453, "No": 0.003721553836219877}, "ground_truth": 0}, {"key": "2905a15c5947042e42f4b52dc201d24822af20f1", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.99389205033077, "res": {"Yes": 0.99389205033077, "No": 0.006107903443213293}, "ground_truth": 0}, {"key": "2905a15c5947042e42f4b52dc201d24822af20f1", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9584415490941244, "res": {"Yes": 0.9584415490941244, "No": 0.041558342318996536}, "ground_truth": 1}, {"key": "2905a15c5947042e42f4b52dc201d24822af20f1", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998490324867421, "res": {"Yes": 0.9998490324867421, "No": 0.00015087332206004556}, "ground_truth": 0}, {"key": "2905a15c5947042e42f4b52dc201d24822af20f1", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9977321784018386, "res": {"Yes": 0.9977321784018386, "No": 0.002267750291336093}, "ground_truth": 0}, {"key": "808a3d788317af05686ee71a71d2fbc5c19267bc", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9989747442342014, "res": {"Yes": 0.9989747442342014, "No": 0.0010252610662925543}, "ground_truth": 0}, {"key": "808a3d788317af05686ee71a71d2fbc5c19267bc", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996847256294127, "res": {"Yes": 0.9996847256294127, "No": 0.0003151442775598785}, "ground_truth": 0}, {"key": "808a3d788317af05686ee71a71d2fbc5c19267bc", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999645650926468, "res": {"Yes": 0.999645650926468, "No": 0.0003543030278936344}, "ground_truth": 1}, {"key": "808a3d788317af05686ee71a71d2fbc5c19267bc", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9981090910252479, "res": {"Yes": 0.9981090910252479, "No": 0.0018909063775949743}, "ground_truth": 0}, {"key": "808a3d788317af05686ee71a71d2fbc5c19267bc", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998937207730372, "res": {"Yes": 0.9998937207730372, "No": 0.00010618974679539921}, "ground_truth": 0}, {"key": "5a1bec30205b6938392f3119366e27d15b4849e8", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9688189855108915, "res": {"Yes": 0.9688189855108915, "No": 0.031180910947997922}, "ground_truth": 0}, {"key": "5a1bec30205b6938392f3119366e27d15b4849e8", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994703413187088, "res": {"Yes": 0.9994703413187088, "No": 0.0005296103555508879}, "ground_truth": 0}, {"key": "5a1bec30205b6938392f3119366e27d15b4849e8", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9972730308537927, "res": {"Yes": 0.9972730308537927, "No": 0.002727003204550539}, "ground_truth": 1}, {"key": "5a1bec30205b6938392f3119366e27d15b4849e8", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9972821590353469, "res": {"Yes": 0.9972821590353469, "No": 0.002717858567802024}, "ground_truth": 0}, {"key": "5a1bec30205b6938392f3119366e27d15b4849e8", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9880483215290845, "res": {"Yes": 0.9880483215290845, "No": 0.011951545242123395}, "ground_truth": 0}, {"key": "7ff39e1220c0f084434786ed145e15c6abe48def", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9948286357827517, "res": {"Yes": 0.9948286357827517, "No": 0.005171404313377928}, "ground_truth": 0}, {"key": "7ff39e1220c0f084434786ed145e15c6abe48def", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.999546073575787, "res": {"Yes": 0.999546073575787, "No": 0.00045390773838095537}, "ground_truth": 0}, {"key": "7ff39e1220c0f084434786ed145e15c6abe48def", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996612574862052, "res": {"Yes": 0.9996612574862052, "No": 0.0003386269157963407}, "ground_truth": 1}, {"key": "7ff39e1220c0f084434786ed145e15c6abe48def", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9991023985853745, "res": {"Yes": 0.9991023985853745, "No": 0.0008975446406078006}, "ground_truth": 0}, {"key": "7ff39e1220c0f084434786ed145e15c6abe48def", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9994647453399125, "res": {"Yes": 0.9994647453399125, "No": 0.0005352088036576228}, "ground_truth": 0}, {"key": "7551503e7e57bd519913b0df90ca1e80d5305b05", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9044960275774923, "res": {"Yes": 0.9044960275774923, "No": 0.09550389948742612}, "ground_truth": 0}, {"key": "7551503e7e57bd519913b0df90ca1e80d5305b05", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9921221095185606, "res": {"Yes": 0.9921221095185606, "No": 0.007877807105068085}, "ground_truth": 0}, {"key": "7551503e7e57bd519913b0df90ca1e80d5305b05", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9960780851754796, "res": {"Yes": 0.9960780851754796, "No": 0.003921925683541813}, "ground_truth": 1}, {"key": "7551503e7e57bd519913b0df90ca1e80d5305b05", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9986962092746093, "res": {"Yes": 0.9986962092746093, "No": 0.0013037611492604361}, "ground_truth": 0}, {"key": "7551503e7e57bd519913b0df90ca1e80d5305b05", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9978487251158656, "res": {"Yes": 0.9978487251158656, "No": 0.002151224637855381}, "ground_truth": 0}, {"key": "c34a0cbf6db61323766fc86b1b89b399ff6b0666", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9727405664305159, "res": {"Yes": 0.9727405664305159, "No": 0.02725930904173383}, "ground_truth": 0}, {"key": "c34a0cbf6db61323766fc86b1b89b399ff6b0666", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9949682269967357, "res": {"Yes": 0.9949682269967357, "No": 0.005031757260081662}, "ground_truth": 0}, {"key": "c34a0cbf6db61323766fc86b1b89b399ff6b0666", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9967823198643089, "res": {"Yes": 0.9967823198643089, "No": 0.0032176229703542772}, "ground_truth": 1}, {"key": "c34a0cbf6db61323766fc86b1b89b399ff6b0666", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9982101635710461, "res": {"Yes": 0.9982101635710461, "No": 0.0017898132395852996}, "ground_truth": 0}, {"key": "c34a0cbf6db61323766fc86b1b89b399ff6b0666", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9968968706894346, "res": {"Yes": 0.9968968706894346, "No": 0.003103071039392988}, "ground_truth": 0}, {"key": "d07a61368a195444b14bd4b382e0fc3d64890b22", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9955909047264949, "res": {"Yes": 0.9955909047264949, "No": 0.004409063369276727}, "ground_truth": 0}, {"key": "d07a61368a195444b14bd4b382e0fc3d64890b22", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9981467346209733, "res": {"Yes": 0.9981467346209733, "No": 0.00185323596062334}, "ground_truth": 0}, {"key": "d07a61368a195444b14bd4b382e0fc3d64890b22", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9993592553146048, "res": {"Yes": 0.9993592553146048, "No": 0.0006406693940559134}, "ground_truth": 1}, {"key": "d07a61368a195444b14bd4b382e0fc3d64890b22", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997240438828956, "res": {"Yes": 0.9997240438828956, "No": 0.0002758975380838356}, "ground_truth": 0}, {"key": "d07a61368a195444b14bd4b382e0fc3d64890b22", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9882386320332635, "res": {"Yes": 0.9882386320332635, "No": 0.011761332912909503}, "ground_truth": 0}, {"key": "aa68dee4bc246e5aae9507342f8520a2a301a310", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.5702629720905519, "res": {"Yes": 0.5702629720905519, "No": 0.42973682179762457}, "ground_truth": 0}, {"key": "aa68dee4bc246e5aae9507342f8520a2a301a310", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9861958404066323, "res": {"Yes": 0.9861958404066323, "No": 0.013804032767233926}, "ground_truth": 0}, {"key": "aa68dee4bc246e5aae9507342f8520a2a301a310", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9857604492265848, "res": {"Yes": 0.9857604492265848, "No": 0.014239516887691222}, "ground_truth": 1}, {"key": "aa68dee4bc246e5aae9507342f8520a2a301a310", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9967452502191475, "res": {"Yes": 0.9967452502191475, "No": 0.0032547969051481104}, "ground_truth": 0}, {"key": "aa68dee4bc246e5aae9507342f8520a2a301a310", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9926672110514623, "res": {"Yes": 0.9926672110514623, "No": 0.007332707198206035}, "ground_truth": 0}, {"key": "87ef15064035db73cb29f5cec2f0442f2c527ef0", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9963195419499511, "res": {"Yes": 0.9963195419499511, "No": 0.0036804950919295965}, "ground_truth": 0}, {"key": "87ef15064035db73cb29f5cec2f0442f2c527ef0", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9811672516206787, "res": {"Yes": 0.9811672516206787, "No": 0.01883270486602906}, "ground_truth": 0}, {"key": "87ef15064035db73cb29f5cec2f0442f2c527ef0", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9871048334045263, "res": {"Yes": 0.9871048334045263, "No": 0.012895098513647948}, "ground_truth": 1}, {"key": "87ef15064035db73cb29f5cec2f0442f2c527ef0", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9939810790178942, "res": {"Yes": 0.9939810790178942, "No": 0.006018888928905766}, "ground_truth": 0}, {"key": "87ef15064035db73cb29f5cec2f0442f2c527ef0", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9962228661834353, "res": {"Yes": 0.9962228661834353, "No": 0.0037771376184531805}, "ground_truth": 0}, {"key": "a577b934e210ef7dce07390711f03db8bb2e005a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9990838415896174, "res": {"Yes": 0.9990838415896174, "No": 0.0009161348389772249}, "ground_truth": 0}, {"key": "a577b934e210ef7dce07390711f03db8bb2e005a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.999963211539712, "res": {"Yes": 0.999963211539712, "No": 3.674317734561734e-05}, "ground_truth": 0}, {"key": "a577b934e210ef7dce07390711f03db8bb2e005a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998635732369773, "res": {"Yes": 0.9998635732369773, "No": 0.00013639560970314668}, "ground_truth": 1}, {"key": "a577b934e210ef7dce07390711f03db8bb2e005a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998116162264519, "res": {"Yes": 0.9998116162264519, "No": 0.00018826970916593485}, "ground_truth": 0}, {"key": "a577b934e210ef7dce07390711f03db8bb2e005a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.999721898777316, "res": {"Yes": 0.999721898777316, "No": 0.0002779937295787881}, "ground_truth": 0}, {"key": "c9414bf55859cb9ca7e925a4a09b416b6f66446a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.8665771737761322, "res": {"Yes": 0.8665771737761322, "No": 0.13342269934082246}, "ground_truth": 0}, {"key": "c9414bf55859cb9ca7e925a4a09b416b6f66446a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9959620682590646, "res": {"Yes": 0.9959620682590646, "No": 0.004037885964821679}, "ground_truth": 0}, {"key": "c9414bf55859cb9ca7e925a4a09b416b6f66446a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9972011795215001, "res": {"Yes": 0.9972011795215001, "No": 0.002798775161019203}, "ground_truth": 1}, {"key": "c9414bf55859cb9ca7e925a4a09b416b6f66446a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995427374164156, "res": {"Yes": 0.9995427374164156, "No": 0.00045714176833581993}, "ground_truth": 0}, {"key": "c9414bf55859cb9ca7e925a4a09b416b6f66446a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9984619185615334, "res": {"Yes": 0.9984619185615334, "No": 0.001538050459375906}, "ground_truth": 0}, {"key": "f12cda150b96acfa1499b2afb0a81029545abadf", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9893346801760207, "res": {"Yes": 0.9893346801760207, "No": 0.010665261584290278}, "ground_truth": 0}, {"key": "f12cda150b96acfa1499b2afb0a81029545abadf", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9823927062859639, "res": {"Yes": 0.9823927062859639, "No": 0.0176072769380649}, "ground_truth": 0}, {"key": "f12cda150b96acfa1499b2afb0a81029545abadf", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9966079984794313, "res": {"Yes": 0.9966079984794313, "No": 0.0033919998678900174}, "ground_truth": 1}, {"key": "f12cda150b96acfa1499b2afb0a81029545abadf", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.978444858329132, "res": {"Yes": 0.978444858329132, "No": 0.021555154566782586}, "ground_truth": 0}, {"key": "f12cda150b96acfa1499b2afb0a81029545abadf", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998473675595163, "res": {"Yes": 0.9998473675595163, "No": 0.00015255246321446976}, "ground_truth": 0}, {"key": "7359741b0e60b4402f069963a26c7aa766f45e7c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9998722739576655, "res": {"Yes": 0.9998722739576655, "No": 0.00012770112311696096}, "ground_truth": 0}, {"key": "7359741b0e60b4402f069963a26c7aa766f45e7c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9918160563078682, "res": {"Yes": 0.9918160563078682, "No": 0.008183789267436889}, "ground_truth": 0}, {"key": "7359741b0e60b4402f069963a26c7aa766f45e7c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9984256729550195, "res": {"Yes": 0.9984256729550195, "No": 0.0015743471509024323}, "ground_truth": 1}, {"key": "7359741b0e60b4402f069963a26c7aa766f45e7c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994000838217127, "res": {"Yes": 0.9994000838217127, "No": 0.0005999021496286567}, "ground_truth": 0}, {"key": "7359741b0e60b4402f069963a26c7aa766f45e7c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9992203387938686, "res": {"Yes": 0.9992203387938686, "No": 0.0007796571392357963}, "ground_truth": 0}, {"key": "b10c71f49dbeabad23325d625e8e7b8529429607", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9370343011466836, "res": {"Yes": 0.9370343011466836, "No": 0.06296518023910223}, "ground_truth": 0}, {"key": "b10c71f49dbeabad23325d625e8e7b8529429607", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9942824477731566, "res": {"Yes": 0.9942824477731566, "No": 0.005717524196896204}, "ground_truth": 0}, {"key": "b10c71f49dbeabad23325d625e8e7b8529429607", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.987873437634442, "res": {"Yes": 0.987873437634442, "No": 0.012126448322666579}, "ground_truth": 1}, {"key": "b10c71f49dbeabad23325d625e8e7b8529429607", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9990559886093725, "res": {"Yes": 0.9990559886093725, "No": 0.0009439965938830672}, "ground_truth": 0}, {"key": "b10c71f49dbeabad23325d625e8e7b8529429607", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9979398906441921, "res": {"Yes": 0.9979398906441921, "No": 0.002060078067611357}, "ground_truth": 0}, {"key": "c5a1e04f5f84d406ffedfc2f312c314d1bc03c46", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9995796633062313, "res": {"Yes": 0.9995796633062313, "No": 0.0004202747054063045}, "ground_truth": 0}, {"key": "c5a1e04f5f84d406ffedfc2f312c314d1bc03c46", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9832511304138918, "res": {"Yes": 0.9832511304138918, "No": 0.01674882788792255}, "ground_truth": 0}, {"key": "c5a1e04f5f84d406ffedfc2f312c314d1bc03c46", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996012267012159, "res": {"Yes": 0.9996012267012159, "No": 0.0003987373613665552}, "ground_truth": 1}, {"key": "c5a1e04f5f84d406ffedfc2f312c314d1bc03c46", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9972381760723763, "res": {"Yes": 0.9972381760723763, "No": 0.002761843189125881}, "ground_truth": 0}, {"key": "c5a1e04f5f84d406ffedfc2f312c314d1bc03c46", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.998722724511599, "res": {"Yes": 0.998722724511599, "No": 0.0012772427832428758}, "ground_truth": 0}, {"key": "8a5035d3527c4db4357dd1db9fb2712008dccea9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9856217043365372, "res": {"Yes": 0.9856217043365372, "No": 0.014378273449449617}, "ground_truth": 0}, {"key": "8a5035d3527c4db4357dd1db9fb2712008dccea9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9987403221610398, "res": {"Yes": 0.9987403221610398, "No": 0.0012597004807091913}, "ground_truth": 0}, {"key": "8a5035d3527c4db4357dd1db9fb2712008dccea9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9975194548492906, "res": {"Yes": 0.9975194548492906, "No": 0.0024805481872146205}, "ground_truth": 1}, {"key": "8a5035d3527c4db4357dd1db9fb2712008dccea9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9984801070090432, "res": {"Yes": 0.9984801070090432, "No": 0.0015198980784856628}, "ground_truth": 0}, {"key": "8a5035d3527c4db4357dd1db9fb2712008dccea9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9985213499817778, "res": {"Yes": 0.9985213499817778, "No": 0.0014786696212853493}, "ground_truth": 0}, {"key": "3f33d4cb25cd121619ba99eeb02709b72716d622", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9984231809874922, "res": {"Yes": 0.9984231809874922, "No": 0.0015768195557931712}, "ground_truth": 0}, {"key": "3f33d4cb25cd121619ba99eeb02709b72716d622", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9986278541541499, "res": {"Yes": 0.9986278541541499, "No": 0.00137214880102739}, "ground_truth": 0}, {"key": "3f33d4cb25cd121619ba99eeb02709b72716d622", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994178273782804, "res": {"Yes": 0.9994178273782804, "No": 0.0005820811424114204}, "ground_truth": 1}, {"key": "3f33d4cb25cd121619ba99eeb02709b72716d622", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996130158676828, "res": {"Yes": 0.9996130158676828, "No": 0.00038686916385104}, "ground_truth": 0}, {"key": "3f33d4cb25cd121619ba99eeb02709b72716d622", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996699528075618, "res": {"Yes": 0.9996699528075618, "No": 0.000329966309876088}, "ground_truth": 0}, {"key": "e6f451d6667f142a64818a9d05dbc49115e7cbf9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9982630278851994, "res": {"Yes": 0.9982630278851994, "No": 0.0017369174800833504}, "ground_truth": 0}, {"key": "e6f451d6667f142a64818a9d05dbc49115e7cbf9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998306817359556, "res": {"Yes": 0.9998306817359556, "No": 0.0001691886827245381}, "ground_truth": 0}, {"key": "e6f451d6667f142a64818a9d05dbc49115e7cbf9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997957652387589, "res": {"Yes": 0.9997957652387589, "No": 0.00020421606141774815}, "ground_truth": 1}, {"key": "e6f451d6667f142a64818a9d05dbc49115e7cbf9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999043288267786, "res": {"Yes": 0.9999043288267786, "No": 9.558219500215103e-05}, "ground_truth": 0}, {"key": "e6f451d6667f142a64818a9d05dbc49115e7cbf9", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9936102087985491, "res": {"Yes": 0.9936102087985491, "No": 0.006389775143643615}, "ground_truth": 0}, {"key": "39622c01625247af942a6439699ff529db1d54d0", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9998978924633294, "res": {"Yes": 0.9998978924633294, "No": 0.00010197335543802863}, "ground_truth": 0}, {"key": "39622c01625247af942a6439699ff529db1d54d0", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998943167248708, "res": {"Yes": 0.9998943167248708, "No": 0.00010563709182738408}, "ground_truth": 0}, {"key": "39622c01625247af942a6439699ff529db1d54d0", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998611895050684, "res": {"Yes": 0.9998611895050684, "No": 0.0001386848451682463}, "ground_truth": 1}, {"key": "39622c01625247af942a6439699ff529db1d54d0", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999759659438225, "res": {"Yes": 0.9999759659438225, "No": 2.392625898158315e-05}, "ground_truth": 0}, {"key": "39622c01625247af942a6439699ff529db1d54d0", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9952753932336688, "res": {"Yes": 0.9952753932336688, "No": 0.004724625407571543}, "ground_truth": 0}, {"key": "02927c88776bcc84fc883435a5037b4c8ca70e83", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9835945721561783, "res": {"Yes": 0.9835945721561783, "No": 0.016405370963389832}, "ground_truth": 0}, {"key": "02927c88776bcc84fc883435a5037b4c8ca70e83", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9973747669670409, "res": {"Yes": 0.9973747669670409, "No": 0.0026251732109281137}, "ground_truth": 0}, {"key": "02927c88776bcc84fc883435a5037b4c8ca70e83", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9980256173627083, "res": {"Yes": 0.9980256173627083, "No": 0.001974363916695254}, "ground_truth": 1}, {"key": "02927c88776bcc84fc883435a5037b4c8ca70e83", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999704622872416, "res": {"Yes": 0.999704622872416, "No": 0.0002953616559847234}, "ground_truth": 0}, {"key": "02927c88776bcc84fc883435a5037b4c8ca70e83", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998702477485516, "res": {"Yes": 0.9998702477485516, "No": 0.00012969305690929637}, "ground_truth": 0}, {"key": "3ee31d8987c07ba4ea9423d39c5e333525316958", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.998590047618161, "res": {"Yes": 0.998590047618161, "No": 0.0014098856178145183}, "ground_truth": 0}, {"key": "3ee31d8987c07ba4ea9423d39c5e333525316958", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.999923042191404, "res": {"Yes": 0.999923042191404, "No": 7.684193793414876e-05}, "ground_truth": 0}, {"key": "3ee31d8987c07ba4ea9423d39c5e333525316958", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998847814881626, "res": {"Yes": 0.9998847814881626, "No": 0.00011515471338143905}, "ground_truth": 1}, {"key": "3ee31d8987c07ba4ea9423d39c5e333525316958", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995596530611845, "res": {"Yes": 0.9995596530611845, "No": 0.0004402287851225192}, "ground_truth": 0}, {"key": "3ee31d8987c07ba4ea9423d39c5e333525316958", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999450934134217, "res": {"Yes": 0.9999450934134217, "No": 5.477646100472649e-05}, "ground_truth": 0}, {"key": "f8ddc6d36f378757d27a244529f8fe11672baa41", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9810618033935928, "res": {"Yes": 0.9810618033935928, "No": 0.018938236601881632}, "ground_truth": 0}, {"key": "f8ddc6d36f378757d27a244529f8fe11672baa41", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9952970018923032, "res": {"Yes": 0.9952970018923032, "No": 0.004702951446569054}, "ground_truth": 0}, {"key": "f8ddc6d36f378757d27a244529f8fe11672baa41", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9952216687227311, "res": {"Yes": 0.9952216687227311, "No": 0.004778273453139961}, "ground_truth": 1}, {"key": "f8ddc6d36f378757d27a244529f8fe11672baa41", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9984965065803889, "res": {"Yes": 0.9984965065803889, "No": 0.001503460241273612}, "ground_truth": 0}, {"key": "f8ddc6d36f378757d27a244529f8fe11672baa41", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.99951713165846, "res": {"Yes": 0.99951713165846, "No": 0.00048277763178628306}, "ground_truth": 0}, {"key": "4e97228ceef1be54ab9151fc6759017e6a54a18d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.989047610972443, "res": {"Yes": 0.989047610972443, "No": 0.010952297799836797}, "ground_truth": 0}, {"key": "4e97228ceef1be54ab9151fc6759017e6a54a18d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9778239396116509, "res": {"Yes": 0.9778239396116509, "No": 0.022176067769397265}, "ground_truth": 0}, {"key": "4e97228ceef1be54ab9151fc6759017e6a54a18d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9974308574102737, "res": {"Yes": 0.9974308574102737, "No": 0.0025691273249900705}, "ground_truth": 1}, {"key": "4e97228ceef1be54ab9151fc6759017e6a54a18d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9952208416938679, "res": {"Yes": 0.9952208416938679, "No": 0.00477910685670087}, "ground_truth": 0}, {"key": "4e97228ceef1be54ab9151fc6759017e6a54a18d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9959931648714021, "res": {"Yes": 0.9959931648714021, "No": 0.004006801447363263}, "ground_truth": 0}, {"key": "0e649ccabaf0e650e9757ee256c2749d37486ba6", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9965890599182617, "res": {"Yes": 0.9965890599182617, "No": 0.0034109978699868033}, "ground_truth": 0}, {"key": "0e649ccabaf0e650e9757ee256c2749d37486ba6", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.999265807051635, "res": {"Yes": 0.999265807051635, "No": 0.0007341347229051482}, "ground_truth": 0}, {"key": "0e649ccabaf0e650e9757ee256c2749d37486ba6", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9987864537435287, "res": {"Yes": 0.9987864537435287, "No": 0.0012134818374358312}, "ground_truth": 1}, {"key": "0e649ccabaf0e650e9757ee256c2749d37486ba6", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9987812189176857, "res": {"Yes": 0.9987812189176857, "No": 0.0012187689342232166}, "ground_truth": 0}, {"key": "0e649ccabaf0e650e9757ee256c2749d37486ba6", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9938057364427465, "res": {"Yes": 0.9938057364427465, "No": 0.0061943047363844555}, "ground_truth": 0}, {"key": "9eccfcc231c7c0d65cb0c333cb3d1731a5fe2f3f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9970461631859309, "res": {"Yes": 0.9970461631859309, "No": 0.0029538437223886717}, "ground_truth": 0}, {"key": "9eccfcc231c7c0d65cb0c333cb3d1731a5fe2f3f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9815821748805107, "res": {"Yes": 0.9815821748805107, "No": 0.018417853240121504}, "ground_truth": 0}, {"key": "9eccfcc231c7c0d65cb0c333cb3d1731a5fe2f3f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9980966129430131, "res": {"Yes": 0.9980966129430131, "No": 0.001903366725672614}, "ground_truth": 1}, {"key": "9eccfcc231c7c0d65cb0c333cb3d1731a5fe2f3f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999353192918872, "res": {"Yes": 0.9999353192918872, "No": 6.459600443112123e-05}, "ground_truth": 0}, {"key": "9eccfcc231c7c0d65cb0c333cb3d1731a5fe2f3f", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997610883439361, "res": {"Yes": 0.9997610883439361, "No": 0.00023885186736724478}, "ground_truth": 0}, {"key": "ddf109b3de6022a4ecd62ee2cd31d2c2ac51035b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9936690637633762, "res": {"Yes": 0.9936690637633762, "No": 0.006330924518596922}, "ground_truth": 0}, {"key": "ddf109b3de6022a4ecd62ee2cd31d2c2ac51035b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994190187350116, "res": {"Yes": 0.9994190187350116, "No": 0.0005809836166772795}, "ground_truth": 0}, {"key": "ddf109b3de6022a4ecd62ee2cd31d2c2ac51035b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.998312083236526, "res": {"Yes": 0.998312083236526, "No": 0.001687907977488958}, "ground_truth": 1}, {"key": "ddf109b3de6022a4ecd62ee2cd31d2c2ac51035b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998951510670336, "res": {"Yes": 0.9998951510670336, "No": 0.00010481465060485594}, "ground_truth": 0}, {"key": "ddf109b3de6022a4ecd62ee2cd31d2c2ac51035b", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9969272013393631, "res": {"Yes": 0.9969272013393631, "No": 0.0030728331278282043}, "ground_truth": 0}, {"key": "bd92eab23374d95ac4430e2e1a8bf7561c662309", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9452433191961467, "res": {"Yes": 0.9452433191961467, "No": 0.05475636288284164}, "ground_truth": 0}, {"key": "bd92eab23374d95ac4430e2e1a8bf7561c662309", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9823430073137311, "res": {"Yes": 0.9823430073137311, "No": 0.017656964220976932}, "ground_truth": 0}, {"key": "bd92eab23374d95ac4430e2e1a8bf7561c662309", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9938603663523518, "res": {"Yes": 0.9938603663523518, "No": 0.006139620785804607}, "ground_truth": 1}, {"key": "bd92eab23374d95ac4430e2e1a8bf7561c662309", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9986296288172863, "res": {"Yes": 0.9986296288172863, "No": 0.0013703520518506956}, "ground_truth": 0}, {"key": "bd92eab23374d95ac4430e2e1a8bf7561c662309", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.999397701194789, "res": {"Yes": 0.999397701194789, "No": 0.0006022324846983366}, "ground_truth": 0}, {"key": "d84440effc748f9220c7dde068962e803f566bc7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9950483679499958, "res": {"Yes": 0.9950483679499958, "No": 0.0049516332862853335}, "ground_truth": 0}, {"key": "d84440effc748f9220c7dde068962e803f566bc7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9925449368723959, "res": {"Yes": 0.9925449368723959, "No": 0.007455024830897673}, "ground_truth": 0}, {"key": "d84440effc748f9220c7dde068962e803f566bc7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9953254643099786, "res": {"Yes": 0.9953254643099786, "No": 0.004674558309295579}, "ground_truth": 1}, {"key": "d84440effc748f9220c7dde068962e803f566bc7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9965900028912763, "res": {"Yes": 0.9965900028912763, "No": 0.0034099327816742033}, "ground_truth": 0}, {"key": "d84440effc748f9220c7dde068962e803f566bc7", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9979519990217244, "res": {"Yes": 0.9979519990217244, "No": 0.002047911675361041}, "ground_truth": 0}, {"key": "e2866f596c85b0191ce812260d23855c70a01192", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9945972207945938, "res": {"Yes": 0.9945972207945938, "No": 0.00540276348409837}, "ground_truth": 0}, {"key": "e2866f596c85b0191ce812260d23855c70a01192", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998280596834308, "res": {"Yes": 0.9998280596834308, "No": 0.00017184940792078993}, "ground_truth": 0}, {"key": "e2866f596c85b0191ce812260d23855c70a01192", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9982965158789399, "res": {"Yes": 0.9982965158789399, "No": 0.0017034703948978077}, "ground_truth": 1}, {"key": "e2866f596c85b0191ce812260d23855c70a01192", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996654245628489, "res": {"Yes": 0.9996654245628489, "No": 0.00033445473798599207}, "ground_truth": 0}, {"key": "e2866f596c85b0191ce812260d23855c70a01192", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9993692510657952, "res": {"Yes": 0.9993692510657952, "No": 0.0006306446549051068}, "ground_truth": 0}, {"key": "4ebd9195bc8c12272b6281a5a610a7b5fb49b8cd", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9910882412326616, "res": {"Yes": 0.9910882412326616, "No": 0.008911678245153114}, "ground_truth": 0}, {"key": "4ebd9195bc8c12272b6281a5a610a7b5fb49b8cd", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9941074757237687, "res": {"Yes": 0.9941074757237687, "No": 0.005892560240242798}, "ground_truth": 0}, {"key": "4ebd9195bc8c12272b6281a5a610a7b5fb49b8cd", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9953499147803211, "res": {"Yes": 0.9953499147803211, "No": 0.004650056076468479}, "ground_truth": 1}, {"key": "4ebd9195bc8c12272b6281a5a610a7b5fb49b8cd", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9945215165267829, "res": {"Yes": 0.9945215165267829, "No": 0.005478516096507634}, "ground_truth": 0}, {"key": "4ebd9195bc8c12272b6281a5a610a7b5fb49b8cd", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9991145428487468, "res": {"Yes": 0.9991145428487468, "No": 0.0008853879034523166}, "ground_truth": 0}, {"key": "4bd37aa7004cd1fdf1ccb506d2159a2cf26995e0", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9999651187283657, "res": {"Yes": 0.9999651187283657, "No": 3.479509950227159e-05}, "ground_truth": 0}, {"key": "4bd37aa7004cd1fdf1ccb506d2159a2cf26995e0", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998253184785939, "res": {"Yes": 0.9998253184785939, "No": 0.00017465307154616096}, "ground_truth": 0}, {"key": "4bd37aa7004cd1fdf1ccb506d2159a2cf26995e0", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999760851449647, "res": {"Yes": 0.9999760851449647, "No": 2.3856448495629585e-05}, "ground_truth": 1}, {"key": "4bd37aa7004cd1fdf1ccb506d2159a2cf26995e0", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997876610970966, "res": {"Yes": 0.9997876610970966, "No": 0.00021222298924985873}, "ground_truth": 0}, {"key": "4bd37aa7004cd1fdf1ccb506d2159a2cf26995e0", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.999689611440499, "res": {"Yes": 0.999689611440499, "No": 0.000310381925827215}, "ground_truth": 0}, {"key": "29b87dc549b8c64b35517e684e79b1da6316bd88", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9977239939385749, "res": {"Yes": 0.9977239939385749, "No": 0.002275972830908451}, "ground_truth": 0}, {"key": "29b87dc549b8c64b35517e684e79b1da6316bd88", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996659012234302, "res": {"Yes": 0.9996659012234302, "No": 0.0003340329223283177}, "ground_truth": 0}, {"key": "29b87dc549b8c64b35517e684e79b1da6316bd88", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9953741290517633, "res": {"Yes": 0.9953741290517633, "No": 0.0046258427888612}, "ground_truth": 1}, {"key": "29b87dc549b8c64b35517e684e79b1da6316bd88", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9983142214236357, "res": {"Yes": 0.9983142214236357, "No": 0.0016857434430345615}, "ground_truth": 0}, {"key": "29b87dc549b8c64b35517e684e79b1da6316bd88", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9981734512742034, "res": {"Yes": 0.9981734512742034, "No": 0.0018265445885671423}, "ground_truth": 0}, {"key": "d2f2fe0b8797b416bae71305c9d1c874e2fdd0cc", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9946699728711482, "res": {"Yes": 0.9946699728711482, "No": 0.005329971968561076}, "ground_truth": 0}, {"key": "d2f2fe0b8797b416bae71305c9d1c874e2fdd0cc", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9952752743977938, "res": {"Yes": 0.9952752743977938, "No": 0.004724715176307087}, "ground_truth": 0}, {"key": "d2f2fe0b8797b416bae71305c9d1c874e2fdd0cc", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9992159316023927, "res": {"Yes": 0.9992159316023927, "No": 0.0007840082615327994}, "ground_truth": 1}, {"key": "d2f2fe0b8797b416bae71305c9d1c874e2fdd0cc", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9958514028039542, "res": {"Yes": 0.9958514028039542, "No": 0.004148599796935282}, "ground_truth": 0}, {"key": "d2f2fe0b8797b416bae71305c9d1c874e2fdd0cc", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9645379216412335, "res": {"Yes": 0.9645379216412335, "No": 0.03546197086256535}, "ground_truth": 0}, {"key": "0269d39abbea3edadd225fa97d818f5a789b4c72", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9936498770932963, "res": {"Yes": 0.9936498770932963, "No": 0.006350127420870775}, "ground_truth": 0}, {"key": "0269d39abbea3edadd225fa97d818f5a789b4c72", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.997366819517612, "res": {"Yes": 0.997366819517612, "No": 0.002633166933457757}, "ground_truth": 0}, {"key": "0269d39abbea3edadd225fa97d818f5a789b4c72", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999523644646081, "res": {"Yes": 0.9999523644646081, "No": 4.7554839360904456e-05}, "ground_truth": 1}, {"key": "0269d39abbea3edadd225fa97d818f5a789b4c72", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997902830136822, "res": {"Yes": 0.9997902830136822, "No": 0.0002096590328280394}, "ground_truth": 0}, {"key": "0269d39abbea3edadd225fa97d818f5a789b4c72", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9994225890658914, "res": {"Yes": 0.9994225890658914, "No": 0.0005773447401151263}, "ground_truth": 0}, {"key": "394f20c9bee9bc8d0e6a65bac48de57bd9f4733d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9950816535711481, "res": {"Yes": 0.9950816535711481, "No": 0.004918308049502524}, "ground_truth": 0}, {"key": "394f20c9bee9bc8d0e6a65bac48de57bd9f4733d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9895463868084022, "res": {"Yes": 0.9895463868084022, "No": 0.010453556746117311}, "ground_truth": 0}, {"key": "394f20c9bee9bc8d0e6a65bac48de57bd9f4733d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9875429208495065, "res": {"Yes": 0.9875429208495065, "No": 0.012457015759088877}, "ground_truth": 1}, {"key": "394f20c9bee9bc8d0e6a65bac48de57bd9f4733d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997726484082909, "res": {"Yes": 0.9997726484082909, "No": 0.00022722144212958828}, "ground_truth": 0}, {"key": "394f20c9bee9bc8d0e6a65bac48de57bd9f4733d", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999361536682638, "res": {"Yes": 0.9999361536682638, "No": 6.374447782695132e-05}, "ground_truth": 0}, {"key": "6e63c8b1ae01ab91b4832222edaf4bb461b6c568", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.8700927620250837, "res": {"Yes": 0.8700927620250837, "No": 0.1299072428161061}, "ground_truth": 0}, {"key": "6e63c8b1ae01ab91b4832222edaf4bb461b6c568", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.884161589055411, "res": {"Yes": 0.884161589055411, "No": 0.1158383609155299}, "ground_truth": 0}, {"key": "6e63c8b1ae01ab91b4832222edaf4bb461b6c568", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.991559418927214, "res": {"Yes": 0.991559418927214, "No": 0.008440582130033484}, "ground_truth": 1}, {"key": "6e63c8b1ae01ab91b4832222edaf4bb461b6c568", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.994741693250563, "res": {"Yes": 0.994741693250563, "No": 0.005258255296159849}, "ground_truth": 0}, {"key": "6e63c8b1ae01ab91b4832222edaf4bb461b6c568", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.997788080507253, "res": {"Yes": 0.997788080507253, "No": 0.0022119555288994124}, "ground_truth": 0}, {"key": "355875856932fd634aaf7c6b26155403aaf15904", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9844628249018517, "res": {"Yes": 0.9844628249018517, "No": 0.015537166291416394}, "ground_truth": 0}, {"key": "355875856932fd634aaf7c6b26155403aaf15904", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9977078521919708, "res": {"Yes": 0.9977078521919708, "No": 0.0022920817803161013}, "ground_truth": 0}, {"key": "355875856932fd634aaf7c6b26155403aaf15904", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9911370783089619, "res": {"Yes": 0.9911370783089619, "No": 0.0088628552792565}, "ground_truth": 1}, {"key": "355875856932fd634aaf7c6b26155403aaf15904", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998765647587563, "res": {"Yes": 0.9998765647587563, "No": 0.00012340351034716295}, "ground_truth": 0}, {"key": "355875856932fd634aaf7c6b26155403aaf15904", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9875185120527892, "res": {"Yes": 0.9875185120527892, "No": 0.012481393045931564}, "ground_truth": 0}, {"key": "999523913d281ab57977d79b644f3879ac4d2e37", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9995439289020551, "res": {"Yes": 0.9995439289020551, "No": 0.00045600672534274297}, "ground_truth": 0}, {"key": "999523913d281ab57977d79b644f3879ac4d2e37", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.999396632699302, "res": {"Yes": 0.999396632699302, "No": 0.0006033399835347329}, "ground_truth": 0}, {"key": "999523913d281ab57977d79b644f3879ac4d2e37", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996277878481368, "res": {"Yes": 0.9996277878481368, "No": 0.0003721122507054739}, "ground_truth": 1}, {"key": "999523913d281ab57977d79b644f3879ac4d2e37", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997562021635237, "res": {"Yes": 0.9997562021635237, "No": 0.0002437642471373947}, "ground_truth": 0}, {"key": "999523913d281ab57977d79b644f3879ac4d2e37", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9986555505829857, "res": {"Yes": 0.9986555505829857, "No": 0.0013444463316634905}, "ground_truth": 0}, {"key": "5f7424604ec11358dc6fdfa47732f90422949041", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9886494264005612, "res": {"Yes": 0.9886494264005612, "No": 0.011350487323332681}, "ground_truth": 0}, {"key": "5f7424604ec11358dc6fdfa47732f90422949041", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9990667031926357, "res": {"Yes": 0.9990667031926357, "No": 0.0009332013231077618}, "ground_truth": 0}, {"key": "5f7424604ec11358dc6fdfa47732f90422949041", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9981198924206542, "res": {"Yes": 0.9981198924206542, "No": 0.001880036664381241}, "ground_truth": 1}, {"key": "5f7424604ec11358dc6fdfa47732f90422949041", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9990059110341375, "res": {"Yes": 0.9990059110341375, "No": 0.0009940888191678492}, "ground_truth": 0}, {"key": "5f7424604ec11358dc6fdfa47732f90422949041", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9974147241963751, "res": {"Yes": 0.9974147241963751, "No": 0.002585238014548164}, "ground_truth": 0}, {"key": "fdb2b65b9cb99b7d52559ae647cae293cb5910d3", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9934575814264084, "res": {"Yes": 0.9934575814264084, "No": 0.006542417719158526}, "ground_truth": 0}, {"key": "fdb2b65b9cb99b7d52559ae647cae293cb5910d3", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9954272758842171, "res": {"Yes": 0.9954272758842171, "No": 0.004572734016017986}, "ground_truth": 0}, {"key": "fdb2b65b9cb99b7d52559ae647cae293cb5910d3", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9902605986827736, "res": {"Yes": 0.9902605986827736, "No": 0.009739368491805027}, "ground_truth": 1}, {"key": "fdb2b65b9cb99b7d52559ae647cae293cb5910d3", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9927641280953176, "res": {"Yes": 0.9927641280953176, "No": 0.007235831486007836}, "ground_truth": 0}, {"key": "fdb2b65b9cb99b7d52559ae647cae293cb5910d3", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9945258734356381, "res": {"Yes": 0.9945258734356381, "No": 0.005474069347061874}, "ground_truth": 0}, {"key": "f974fe1b3777ffc5756322c0bb3f31a9b23ec09a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9998176908203386, "res": {"Yes": 0.9998176908203386, "No": 0.00018222944100419313}, "ground_truth": 0}, {"key": "f974fe1b3777ffc5756322c0bb3f31a9b23ec09a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9991203676535549, "res": {"Yes": 0.9991203676535549, "No": 0.0008795826714206073}, "ground_truth": 0}, {"key": "f974fe1b3777ffc5756322c0bb3f31a9b23ec09a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9693912992580396, "res": {"Yes": 0.9693912992580396, "No": 0.03060860807326808}, "ground_truth": 1}, {"key": "f974fe1b3777ffc5756322c0bb3f31a9b23ec09a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999081429891136, "res": {"Yes": 0.9999081429891136, "No": 9.173413513875187e-05}, "ground_truth": 0}, {"key": "f974fe1b3777ffc5756322c0bb3f31a9b23ec09a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9961049365155084, "res": {"Yes": 0.9961049365155084, "No": 0.003895030769294997}, "ground_truth": 0}, {"key": "2930e1a7ac24bce649bfbf5f5971e74515e38a0a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9684249694683215, "res": {"Yes": 0.9684249694683215, "No": 0.03157499718179189}, "ground_truth": 0}, {"key": "2930e1a7ac24bce649bfbf5f5971e74515e38a0a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994963032977977, "res": {"Yes": 0.9994963032977977, "No": 0.0005036502898887262}, "ground_truth": 0}, {"key": "2930e1a7ac24bce649bfbf5f5971e74515e38a0a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.99662137892858, "res": {"Yes": 0.99662137892858, "No": 0.003378667087754681}, "ground_truth": 1}, {"key": "2930e1a7ac24bce649bfbf5f5971e74515e38a0a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998043462027011, "res": {"Yes": 0.9998043462027011, "No": 0.00019552721129791626}, "ground_truth": 0}, {"key": "2930e1a7ac24bce649bfbf5f5971e74515e38a0a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9901070132533054, "res": {"Yes": 0.9901070132533054, "No": 0.009892909406662724}, "ground_truth": 0}, {"key": "f5f7571a69e96c1d288d939861b7216c2bdc0ae4", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9995408383270004, "res": {"Yes": 0.9995408383270004, "No": 0.0004591052827605402}, "ground_truth": 0}, {"key": "f5f7571a69e96c1d288d939861b7216c2bdc0ae4", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9988242749069718, "res": {"Yes": 0.9988242749069718, "No": 0.0011756489335103609}, "ground_truth": 0}, {"key": "f5f7571a69e96c1d288d939861b7216c2bdc0ae4", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9969424764746581, "res": {"Yes": 0.9969424764746581, "No": 0.0030574929384151635}, "ground_truth": 1}, {"key": "f5f7571a69e96c1d288d939861b7216c2bdc0ae4", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9909941066913031, "res": {"Yes": 0.9909941066913031, "No": 0.009005833904799077}, "ground_truth": 0}, {"key": "f5f7571a69e96c1d288d939861b7216c2bdc0ae4", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9994178273782804, "res": {"Yes": 0.9994178273782804, "No": 0.0005821410998568443}, "ground_truth": 0}, {"key": "5f02aa32bd1dc95e47355755398e31550b232f8a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9979720666410571, "res": {"Yes": 0.9979720666410571, "No": 0.0020279344829758172}, "ground_truth": 0}, {"key": "5f02aa32bd1dc95e47355755398e31550b232f8a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9915563783149178, "res": {"Yes": 0.9915563783149178, "No": 0.008443548671784833}, "ground_truth": 0}, {"key": "5f02aa32bd1dc95e47355755398e31550b232f8a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9987863346881904, "res": {"Yes": 0.9987863346881904, "No": 0.0012136068325022226}, "ground_truth": 1}, {"key": "5f02aa32bd1dc95e47355755398e31550b232f8a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9984751153199217, "res": {"Yes": 0.9984751153199217, "No": 0.0015248915289650883}, "ground_truth": 0}, {"key": "5f02aa32bd1dc95e47355755398e31550b232f8a", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9935884353610858, "res": {"Yes": 0.9935884353610858, "No": 0.006411583517160363}, "ground_truth": 0}, {"key": "9177e5ac94f038749e8d4eb526a65461e0f6df4c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9950848415193483, "res": {"Yes": 0.9950848415193483, "No": 0.00491515494969989}, "ground_truth": 0}, {"key": "9177e5ac94f038749e8d4eb526a65461e0f6df4c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9985675829899856, "res": {"Yes": 0.9985675829899856, "No": 0.0014323511844106373}, "ground_truth": 0}, {"key": "9177e5ac94f038749e8d4eb526a65461e0f6df4c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9971397786495528, "res": {"Yes": 0.9971397786495528, "No": 0.0028602524180083794}, "ground_truth": 1}, {"key": "9177e5ac94f038749e8d4eb526a65461e0f6df4c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995471459193763, "res": {"Yes": 0.9995471459193763, "No": 0.0004527288395031521}, "ground_truth": 0}, {"key": "9177e5ac94f038749e8d4eb526a65461e0f6df4c", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9984352986236468, "res": {"Yes": 0.9984352986236468, "No": 0.0015646689670747987}, "ground_truth": 0}, {"key": "f12e4bbb07211de7d43b4e331dc73404aa804562", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9408506294208479, "res": {"Yes": 0.9408506294208479, "No": 0.059148845909449424}, "ground_truth": 0}, {"key": "f12e4bbb07211de7d43b4e331dc73404aa804562", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9923959306369042, "res": {"Yes": 0.9923959306369042, "No": 0.007603557873376667}, "ground_truth": 0}, {"key": "f12e4bbb07211de7d43b4e331dc73404aa804562", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.99869977712319, "res": {"Yes": 0.99869977712319, "No": 0.001300224418237405}, "ground_truth": 1}, {"key": "f12e4bbb07211de7d43b4e331dc73404aa804562", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9981233391342179, "res": {"Yes": 0.9981233391342179, "No": 0.0018766475726162658}, "ground_truth": 0}, {"key": "f12e4bbb07211de7d43b4e331dc73404aa804562", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9994942778505762, "res": {"Yes": 0.9994942778505762, "No": 0.000505676510719019}, "ground_truth": 0}]