[{"key": "33773576", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.010880179930079023, "res": {"No": 0.9891196874760974, "Yes": 0.010880179930079023}, "ground_truth": 0}, {"key": "33773576", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999696483206215, "res": {"Yes": 0.9999696483206215, "No": 3.0228813470783267e-05}, "ground_truth": 0}, {"key": "33773576", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998740617908821, "res": {"Yes": 0.9998740617908821, "No": 0.00012586734790435278}, "ground_truth": 1}, {"key": "33773576", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999304322739465, "res": {"Yes": 0.9999304322739465, "No": 6.943975610021806e-05}, "ground_truth": 0}, {"key": "33773576", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9994525968198198, "res": {"Yes": 0.9994525968198198, "No": 0.0005473971323747139}, "ground_truth": 0}, {"key": "37642631", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.174382171564604, "res": {"No": 0.8256172752455301, "Yes": 0.174382171564604}, "ground_truth": 0}, {"key": "37642631", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.999822696470129, "res": {"Yes": 0.999822696470129, "No": 0.00017720469936968462}, "ground_truth": 0}, {"key": "37642631", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997831359983979, "res": {"Yes": 0.9997831359983979, "No": 0.00021674977074443157}, "ground_truth": 1}, {"key": "37642631", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9956898126352852, "res": {"Yes": 0.9956898126352852, "No": 0.004310232206929532}, "ground_truth": 0}, {"key": "37642631", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9965162471972829, "res": {"Yes": 0.9965162471972829, "No": 0.003483729024990067}, "ground_truth": 0}, {"key": "36609836", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.20203512713324057, "res": {"No": 0.7979647224225875, "Yes": 0.20203512713324057}, "ground_truth": 0}, {"key": "36609836", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999800187796273, "res": {"Yes": 0.9999800187796273, "No": 1.98781980935752e-05}, "ground_truth": 0}, {"key": "36609836", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998210279174143, "res": {"Yes": 0.9998210279174143, "No": 0.00017890457407520707}, "ground_truth": 1}, {"key": "36609836", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998667913030664, "res": {"Yes": 0.9998667913030664, "No": 0.00013319118243294907}, "ground_truth": 0}, {"key": "36609836", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.999650532877865, "res": {"Yes": 0.999650532877865, "No": 0.0003493378683212572}, "ground_truth": 0}, {"key": "41035610", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.8971449820957014, "res": {"Yes": 0.8971449820957014, "No": 0.10285497424281752}, "ground_truth": 0}, {"key": "41035610", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9986620833096368, "res": {"Yes": 0.9986620833096368, "No": 0.0013378652932968555}, "ground_truth": 0}, {"key": "41035610", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9724510975981493, "res": {"Yes": 0.9724510975981493, "No": 0.027548769737677416}, "ground_truth": 1}, {"key": "41035610", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9956195014947419, "res": {"Yes": 0.9956195014947419, "No": 0.004380438699089263}, "ground_truth": 0}, {"key": "41035610", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9962907773247976, "res": {"Yes": 0.9962907773247976, "No": 0.003709155708848944}, "ground_truth": 0}, {"key": "37592684", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9972611686608214, "res": {"Yes": 0.9972611686608214, "No": 0.002738813723387105}, "ground_truth": 0}, {"key": "37592684", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999844292352256, "res": {"Yes": 0.9999844292352256, "No": 1.5518309082726945e-05}, "ground_truth": 0}, {"key": "37592684", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999964686909351, "res": {"Yes": 0.9999964686909351, "No": 3.487177110171837e-06}, "ground_truth": 1}, {"key": "37592684", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999967070975216, "res": {"Yes": 0.9999967070975216, "No": 3.2437970023825198e-06}, "ground_truth": 0}, {"key": "37592684", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9904778465697623, "res": {"Yes": 0.9904778465697623, "No": 0.009522051116231015}, "ground_truth": 0}, {"key": "38951040", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9942181088434194, "res": {"Yes": 0.9942181088434194, "No": 0.00578189844230523}, "ground_truth": 0}, {"key": "38951040", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9986348629484767, "res": {"Yes": 0.9986348629484767, "No": 0.0013650479810644873}, "ground_truth": 0}, {"key": "38951040", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9980327409955805, "res": {"Yes": 0.9980327409955805, "No": 0.001967258361910648}, "ground_truth": 1}, {"key": "38951040", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9991386943768117, "res": {"Yes": 0.9991386943768117, "No": 0.0008612148062859434}, "ground_truth": 0}, {"key": "38951040", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9995827613082826, "res": {"Yes": 0.9995827613082826, "No": 0.0004171915871790938}, "ground_truth": 0}, {"key": "40774469", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.5502532787159734, "res": {"Yes": 0.5502532787159734, "No": 0.44974653970494616}, "ground_truth": 0}, {"key": "40774469", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9741369865049814, "res": {"Yes": 0.9741369865049814, "No": 0.025862904386896146}, "ground_truth": 0}, {"key": "40774469", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9680336082903969, "res": {"Yes": 0.9680336082903969, "No": 0.0319663337598888}, "ground_truth": 1}, {"key": "40774469", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9716295291741835, "res": {"Yes": 0.9716295291741835, "No": 0.028370341297731198}, "ground_truth": 0}, {"key": "40774469", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9656655985249096, "res": {"Yes": 0.9656655985249096, "No": 0.03433440041002295}, "ground_truth": 0}, {"key": "40876288", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.15247384589053947, "res": {"No": 0.8475257980617241, "Yes": 0.15247384589053947}, "ground_truth": 0}, {"key": "40876288", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9954667334125822, "res": {"Yes": 0.9954667334125822, "No": 0.004533315539618942}, "ground_truth": 0}, {"key": "40876288", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9984468250266407, "res": {"Yes": 0.9984468250266407, "No": 0.001553185493345568}, "ground_truth": 1}, {"key": "40876288", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9942377843162132, "res": {"Yes": 0.9942377843162132, "No": 0.00576223994804265}, "ground_truth": 0}, {"key": "40876288", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9894916317740371, "res": {"Yes": 0.9894916317740371, "No": 0.010508312167461906}, "ground_truth": 0}, {"key": "40340131", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.2569725266488957, "res": {"No": 0.7430273683624764, "Yes": 0.2569725266488957}, "ground_truth": 0}, {"key": "40340131", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9817528907907107, "res": {"Yes": 0.9817528907907107, "No": 0.018247120358115095}, "ground_truth": 0}, {"key": "40340131", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9955276877067657, "res": {"Yes": 0.9955276877067657, "No": 0.004472340525038562}, "ground_truth": 1}, {"key": "40340131", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9932706726155662, "res": {"Yes": 0.9932706726155662, "No": 0.006729341402967836}, "ground_truth": 0}, {"key": "40340131", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.7185593674107436, "res": {"Yes": 0.7185593674107436, "No": 0.28144019644318724}, "ground_truth": 0}, {"key": "30121591", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.8678091255011822, "res": {"Yes": 0.8678091255011822, "No": 0.1321903169114735}, "ground_truth": 0}, {"key": "30121591", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9785882188299098, "res": {"Yes": 0.9785882188299098, "No": 0.02141160530448669}, "ground_truth": 0}, {"key": "30121591", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997464409015432, "res": {"Yes": 0.9997464409015432, "No": 0.00025354379208762765}, "ground_truth": 1}, {"key": "30121591", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9982956829006745, "res": {"Yes": 0.9982956829006745, "No": 0.0017043209796072205}, "ground_truth": 0}, {"key": "30121591", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9929519230202882, "res": {"Yes": 0.9929519230202882, "No": 0.00704803799127692}, "ground_truth": 0}, {"key": "35623366", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.8870159720623599, "res": {"Yes": 0.8870159720623599, "No": 0.11298370208591864}, "ground_truth": 0}, {"key": "35623366", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9980947202542005, "res": {"Yes": 0.9980947202542005, "No": 0.001905244370791051}, "ground_truth": 0}, {"key": "35623366", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9478982317161349, "res": {"Yes": 0.9478982317161349, "No": 0.05210165317083631}, "ground_truth": 1}, {"key": "35623366", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996050323502665, "res": {"Yes": 0.9996050323502665, "No": 0.0003949572605723382}, "ground_truth": 0}, {"key": "35623366", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9924020385549347, "res": {"Yes": 0.9924020385549347, "No": 0.007597864940798904}, "ground_truth": 0}, {"key": "41014093", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9781036235597853, "res": {"Yes": 0.9781036235597853, "No": 0.021896313273650384}, "ground_truth": 0}, {"key": "41014093", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999716747231683, "res": {"Yes": 0.9999716747231683, "No": 2.8235457829704337e-05}, "ground_truth": 0}, {"key": "41014093", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999934886141991, "res": {"Yes": 0.9999934886141991, "No": 6.3869289985735075e-06}, "ground_truth": 1}, {"key": "41014093", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9987900219145083, "res": {"Yes": 0.9987900219145083, "No": 0.001209885683842336}, "ground_truth": 0}, {"key": "41014093", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997357190083753, "res": {"Yes": 0.9997357190083753, "No": 0.00026426240239281983}, "ground_truth": 0}, {"key": "11387984", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.27985692179608873, "res": {"No": 0.72014295360947, "Yes": 0.27985692179608873}, "ground_truth": 0}, {"key": "11387984", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9977801262721592, "res": {"Yes": 0.9977801262721592, "No": 0.002219807052862752}, "ground_truth": 0}, {"key": "11387984", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9387693815422593, "res": {"Yes": 0.9387693815422593, "No": 0.061230570267787526}, "ground_truth": 1}, {"key": "11387984", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9784800095920683, "res": {"Yes": 0.9784800095920683, "No": 0.021520020308187254}, "ground_truth": 0}, {"key": "11387984", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9961418266499806, "res": {"Yes": 0.9961418266499806, "No": 0.0038581734769357204}, "ground_truth": 0}, {"key": "39508312", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.024338610016920236, "res": {"No": 0.97566123803181, "Yes": 0.024338610016920236}, "ground_truth": 0}, {"key": "39508312", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9981829591212436, "res": {"Yes": 0.9981829591212436, "No": 0.0018170548552732672}, "ground_truth": 0}, {"key": "39508312", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9950244028966686, "res": {"Yes": 0.9950244028966686, "No": 0.004975541848654008}, "ground_truth": 1}, {"key": "39508312", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9900824752402126, "res": {"Yes": 0.9900824752402126, "No": 0.00991748418731049}, "ground_truth": 0}, {"key": "39508312", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9994717709621076, "res": {"Yes": 0.9994717709621076, "No": 0.000528132372598739}, "ground_truth": 0}, {"key": "35815369", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.031076446313421734, "res": {"No": 0.9689233910700407, "Yes": 0.031076446313421734}, "ground_truth": 0}, {"key": "35815369", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9865366955421824, "res": {"Yes": 0.9865366955421824, "No": 0.013463270568428382}, "ground_truth": 0}, {"key": "35815369", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9920275340337975, "res": {"Yes": 0.9920275340337975, "No": 0.007972373709266073}, "ground_truth": 1}, {"key": "35815369", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9960965389878489, "res": {"Yes": 0.9960965389878489, "No": 0.0039035054352525174}, "ground_truth": 0}, {"key": "35815369", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9729595501134579, "res": {"Yes": 0.9729595501134579, "No": 0.027040371202459276}, "ground_truth": 0}, {"key": "35802823", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.10561593705665555, "res": {"No": 0.8943839426064598, "Yes": 0.10561593705665555}, "ground_truth": 0}, {"key": "35802823", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9978746011655819, "res": {"Yes": 0.9978746011655819, "No": 0.0021252845068312127}, "ground_truth": 0}, {"key": "35802823", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9926117651875888, "res": {"Yes": 0.9926117651875888, "No": 0.007387917377836315}, "ground_truth": 1}, {"key": "35802823", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.996275749487681, "res": {"Yes": 0.996275749487681, "No": 0.003724099132588114}, "ground_truth": 0}, {"key": "35802823", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9988637646048254, "res": {"Yes": 0.9988637646048254, "No": 0.001136111627348178}, "ground_truth": 0}, {"key": "38499968", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9414022838533809, "res": {"Yes": 0.9414022838533809, "No": 0.05859741476561896}, "ground_truth": 0}, {"key": "38499968", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.655508112372928, "res": {"Yes": 0.655508112372928, "No": 0.3444915903506884}, "ground_truth": 0}, {"key": "38499968", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994113977942826, "res": {"Yes": 0.9994113977942826, "No": 0.0005885276196969389}, "ground_truth": 1}, {"key": "38499968", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9937761876585577, "res": {"Yes": 0.9937761876585577, "No": 0.006223822569397131}, "ground_truth": 0}, {"key": "38499968", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.99286424923634, "res": {"Yes": 0.99286424923634, "No": 0.007135685193058913}, "ground_truth": 0}, {"key": "36926726", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.6688447824823867, "res": {"Yes": 0.6688447824823867, "No": 0.3311552103720279}, "ground_truth": 0}, {"key": "36926726", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9989899721218798, "res": {"Yes": 0.9989899721218798, "No": 0.0010099433891968444}, "ground_truth": 0}, {"key": "36926726", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9677113290223788, "res": {"Yes": 0.9677113290223788, "No": 0.03228862105954855}, "ground_truth": 1}, {"key": "36926726", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9984375600821592, "res": {"Yes": 0.9984375600821592, "No": 0.0015623882482991753}, "ground_truth": 0}, {"key": "36926726", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.965179713862798, "res": {"Yes": 0.965179713862798, "No": 0.03482019306497296}, "ground_truth": 0}, {"key": "40903712", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.03000526238108973, "res": {"No": 0.9699946198950877, "Yes": 0.03000526238108973}, "ground_truth": 0}, {"key": "40903712", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9943835803895986, "res": {"Yes": 0.9943835803895986, "No": 0.005616368490533063}, "ground_truth": 0}, {"key": "40903712", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9987888313575117, "res": {"Yes": 0.9987888313575117, "No": 0.0012110797038747438}, "ground_truth": 1}, {"key": "40903712", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9993081767201559, "res": {"Yes": 0.9993081767201559, "No": 0.0006917956150127591}, "ground_truth": 0}, {"key": "40903712", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9351600480098692, "res": {"Yes": 0.9351600480098692, "No": 0.06483990158038937}, "ground_truth": 0}, {"key": "19614862", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.7691701289037923, "res": {"Yes": 0.7691701289037923, "No": 0.2308296237765443}, "ground_truth": 0}, {"key": "19614862", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997709799291011, "res": {"Yes": 0.9997709799291011, "No": 0.00022891220779274078}, "ground_truth": 0}, {"key": "19614862", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9977288592517216, "res": {"Yes": 0.9977288592517216, "No": 0.0022711022333161864}, "ground_truth": 1}, {"key": "19614862", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995140338598144, "res": {"Yes": 0.9995140338598144, "No": 0.0004858393715056145}, "ground_truth": 0}, {"key": "19614862", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999791843696483, "res": {"Yes": 0.9999791843696483, "No": 2.0703816556038987e-05}, "ground_truth": 0}, {"key": "38861704", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 6.303357733382004e-05, "res": {"No": 0.9999368688428554, "Yes": 6.303357733382004e-05}, "ground_truth": 0}, {"key": "38861704", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9978600033648174, "res": {"Yes": 0.9978600033648174, "No": 0.0021399945428964852}, "ground_truth": 0}, {"key": "38861704", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999458085984071, "res": {"Yes": 0.9999458085984071, "No": 5.4145374949138684e-05}, "ground_truth": 1}, {"key": "38861704", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999819260003368, "res": {"Yes": 0.9999819260003368, "No": 1.800973703388296e-05}, "ground_truth": 0}, {"key": "38861704", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9989617754284589, "res": {"Yes": 0.9989617754284589, "No": 0.0010381603774078745}, "ground_truth": 0}, {"key": "34349607", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9186427499660036, "res": {"Yes": 0.9186427499660036, "No": 0.08135708958045267}, "ground_truth": 0}, {"key": "34349607", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9991040658888485, "res": {"Yes": 0.9991040658888485, "No": 0.0008958594689322413}, "ground_truth": 0}, {"key": "34349607", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9969933099732835, "res": {"Yes": 0.9969933099732835, "No": 0.0030066974357750875}, "ground_truth": 1}, {"key": "34349607", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9985452272093335, "res": {"Yes": 0.9985452272093335, "No": 0.0014547139436352388}, "ground_truth": 0}, {"key": "34349607", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996302901995175, "res": {"Yes": 0.9996302901995175, "No": 0.0003696063639003043}, "ground_truth": 0}, {"key": "20773800", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9945424985663581, "res": {"Yes": 0.9945424985663581, "No": 0.005457480732549518}, "ground_truth": 0}, {"key": "20773800", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9990974037852385, "res": {"Yes": 0.9990974037852385, "No": 0.0009024976244040336}, "ground_truth": 0}, {"key": "20773800", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9990833652665186, "res": {"Yes": 0.9990833652665186, "No": 0.0009165572745019001}, "ground_truth": 1}, {"key": "20773800", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998733466512126, "res": {"Yes": 0.9998733466512126, "No": 0.00012655810844140603}, "ground_truth": 0}, {"key": "20773800", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.996685792948813, "res": {"Yes": 0.996685792948813, "No": 0.0033142265527097967}, "ground_truth": 0}, {"key": "35545608", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9731745784724504, "res": {"Yes": 0.9731745784724504, "No": 0.02682522239265555}, "ground_truth": 0}, {"key": "35545608", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.998997467072261, "res": {"Yes": 0.998997467072261, "No": 0.0010024887708433995}, "ground_truth": 0}, {"key": "35545608", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998820401278129, "res": {"Yes": 0.9998820401278129, "No": 0.00011784610259785106}, "ground_truth": 1}, {"key": "35545608", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9993640131254411, "res": {"Yes": 0.9993640131254411, "No": 0.0006359099453572221}, "ground_truth": 0}, {"key": "35545608", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997460833622874, "res": {"Yes": 0.9997460833622874, "No": 0.00025382157459813225}, "ground_truth": 0}, {"key": "37258984", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.8723924316615571, "res": {"Yes": 0.8723924316615571, "No": 0.12760761897737166}, "ground_truth": 0}, {"key": "37258984", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995246378604004, "res": {"Yes": 0.9995246378604004, "No": 0.0004752493235276046}, "ground_truth": 0}, {"key": "37258984", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999760851449647, "res": {"Yes": 0.9999760851449647, "No": 2.3814712404924755e-05}, "ground_truth": 1}, {"key": "37258984", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998837087823479, "res": {"Yes": 0.9998837087823479, "No": 0.0001162558723423191}, "ground_truth": 0}, {"key": "37258984", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9992523580236433, "res": {"Yes": 0.9992523580236433, "No": 0.0007475595427641654}, "ground_truth": 0}, {"key": "37274562", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9993441336344336, "res": {"Yes": 0.9993441336344336, "No": 0.0006558350164273509}, "ground_truth": 0}, {"key": "37274562", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.999583118759142, "res": {"Yes": 0.999583118759142, "No": 0.00041685463252466554}, "ground_truth": 0}, {"key": "37274562", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.998215150242195, "res": {"Yes": 0.998215150242195, "No": 0.001784872669336404}, "ground_truth": 1}, {"key": "37274562", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995583423894487, "res": {"Yes": 0.9995583423894487, "No": 0.00044156867533246506}, "ground_truth": 0}, {"key": "37274562", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996874664287948, "res": {"Yes": 0.9996874664287948, "No": 0.00031243409719625777}, "ground_truth": 0}, {"key": "40828068", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9613773098070789, "res": {"Yes": 0.9613773098070789, "No": 0.03862224010150044}, "ground_truth": 0}, {"key": "40828068", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9561817920122869, "res": {"Yes": 0.9561817920122869, "No": 0.043817771582449046}, "ground_truth": 0}, {"key": "40828068", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9799430051685858, "res": {"Yes": 0.9799430051685858, "No": 0.020056545894167038}, "ground_truth": 1}, {"key": "40828068", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9884610502805787, "res": {"Yes": 0.9884610502805787, "No": 0.011538869341984224}, "ground_truth": 0}, {"key": "40828068", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9540315374085651, "res": {"Yes": 0.9540315374085651, "No": 0.04596809086711331}, "ground_truth": 0}, {"key": "37807180", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9851691191296521, "res": {"Yes": 0.9851691191296521, "No": 0.01483075817098272}, "ground_truth": 0}, {"key": "37807180", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.997762916708957, "res": {"Yes": 0.997762916708957, "No": 0.002237068445359749}, "ground_truth": 0}, {"key": "37807180", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9946612466697538, "res": {"Yes": 0.9946612466697538, "No": 0.005338695202329379}, "ground_truth": 1}, {"key": "37807180", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.988469897046568, "res": {"Yes": 0.988469897046568, "No": 0.011529953242499294}, "ground_truth": 0}, {"key": "37807180", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9578674296483518, "res": {"Yes": 0.9578674296483518, "No": 0.04213233840591342}, "ground_truth": 0}, {"key": "40748607", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9893682854778346, "res": {"Yes": 0.9893682854778346, "No": 0.01063158278268329}, "ground_truth": 0}, {"key": "40748607", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9901284017760223, "res": {"Yes": 0.9901284017760223, "No": 0.009871553912357228}, "ground_truth": 0}, {"key": "40748607", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9987759986015313, "res": {"Yes": 0.9987759986015313, "No": 0.0012239297381645249}, "ground_truth": 1}, {"key": "40748607", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9937713596074332, "res": {"Yes": 0.9937713596074332, "No": 0.006228662227369721}, "ground_truth": 0}, {"key": "40748607", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996942589483242, "res": {"Yes": 0.9996942589483242, "No": 0.0003056704491243088}, "ground_truth": 0}, {"key": "40123819", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.8894450180362704, "res": {"Yes": 0.8894450180362704, "No": 0.11055473956567412}, "ground_truth": 0}, {"key": "40123819", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9982988922267946, "res": {"Yes": 0.9982988922267946, "No": 0.0017010587970480403}, "ground_truth": 0}, {"key": "40123819", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9959028238232239, "res": {"Yes": 0.9959028238232239, "No": 0.004097226490130277}, "ground_truth": 1}, {"key": "40123819", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8856402900377381, "res": {"Yes": 0.8856402900377381, "No": 0.11435962393096341}, "ground_truth": 0}, {"key": "40123819", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9990252891395349, "res": {"Yes": 0.9990252891395349, "No": 0.0009746277058887777}, "ground_truth": 0}, {"key": "38453867", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.19226489429908244, "res": {"No": 0.8077349552127848, "Yes": 0.19226489429908244}, "ground_truth": 0}, {"key": "38453867", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.993397705555498, "res": {"Yes": 0.993397705555498, "No": 0.006602218830555208}, "ground_truth": 0}, {"key": "38453867", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9801372040348376, "res": {"Yes": 0.9801372040348376, "No": 0.01986276510892637}, "ground_truth": 1}, {"key": "38453867", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9762402119988888, "res": {"Yes": 0.9762402119988888, "No": 0.02375970976120029}, "ground_truth": 0}, {"key": "38453867", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9982864229525259, "res": {"Yes": 0.9982864229525259, "No": 0.0017135157928018852}, "ground_truth": 0}, {"key": "38944856", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.8970033879066347, "res": {"Yes": 0.8970033879066347, "No": 0.10299656920464657}, "ground_truth": 0}, {"key": "38944856", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9912044164671721, "res": {"Yes": 0.9912044164671721, "No": 0.008795556748092162}, "ground_truth": 0}, {"key": "38944856", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9362905855857488, "res": {"Yes": 0.9362905855857488, "No": 0.06370930829496675}, "ground_truth": 1}, {"key": "38944856", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9969489981945879, "res": {"Yes": 0.9969489981945879, "No": 0.0030509883246007796}, "ground_truth": 0}, {"key": "38944856", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.4453080228885127, "res": {"No": 0.5546918193008749, "Yes": 0.4453080228885127}, "ground_truth": 0}, {"key": "35778898", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9195436082643229, "res": {"Yes": 0.9195436082643229, "No": 0.08045586546480414}, "ground_truth": 0}, {"key": "35778898", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9961866637037993, "res": {"Yes": 0.9961866637037993, "No": 0.0038132669121224484}, "ground_truth": 0}, {"key": "35778898", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.993030675571408, "res": {"Yes": 0.993030675571408, "No": 0.0069693208359330175}, "ground_truth": 1}, {"key": "35778898", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9659405819789085, "res": {"Yes": 0.9659405819789085, "No": 0.034059359805659724}, "ground_truth": 0}, {"key": "35778898", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.8828664284316469, "res": {"Yes": 0.8828664284316469, "No": 0.11713308640449148}, "ground_truth": 0}, {"key": "32530125", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9171212690953787, "res": {"Yes": 0.9171212690953787, "No": 0.08287865739390834}, "ground_truth": 0}, {"key": "32530125", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999968263007362, "res": {"Yes": 0.9999968263007362, "No": 3.108920494210339e-06}, "ground_truth": 0}, {"key": "32530125", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999879060383785, "res": {"Yes": 0.999879060383785, "No": 0.00012091085734902192}, "ground_truth": 1}, {"key": "32530125", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999391335724361, "res": {"Yes": 0.9999391335724361, "No": 6.0810262020654016e-05}, "ground_truth": 0}, {"key": "32530125", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998173332655999, "res": {"Yes": 0.9998173332655999, "No": 0.0001826510602357301}, "ground_truth": 0}, {"key": "35010363", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.19204105532403676, "res": {"No": 0.8079587853437009, "Yes": 0.19204105532403676}, "ground_truth": 0}, {"key": "35010363", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9983132695314794, "res": {"Yes": 0.9983132695314794, "No": 0.0016866745676236432}, "ground_truth": 0}, {"key": "35010363", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9986572171407592, "res": {"Yes": 0.9986572171407592, "No": 0.0013427312408107479}, "ground_truth": 1}, {"key": "35010363", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996757918668189, "res": {"Yes": 0.9996757918668189, "No": 0.0003241186193722317}, "ground_truth": 0}, {"key": "35010363", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997485860097954, "res": {"Yes": 0.9997485860097954, "No": 0.0002512919870288748}, "ground_truth": 0}, {"key": "27514800", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9759699948726202, "res": {"Yes": 0.9759699948726202, "No": 0.02402992140361578}, "ground_truth": 0}, {"key": "27514800", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9918162923601177, "res": {"Yes": 0.9918162923601177, "No": 0.008183699246250066}, "ground_truth": 0}, {"key": "27514800", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9707981226005191, "res": {"Yes": 0.9707981226005191, "No": 0.029201832932996434}, "ground_truth": 1}, {"key": "27514800", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9981881799312278, "res": {"Yes": 0.9981881799312278, "No": 0.0018118178512625004}, "ground_truth": 0}, {"key": "27514800", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9952172970221086, "res": {"Yes": 0.9952172970221086, "No": 0.004782738445371978}, "ground_truth": 0}, {"key": "25725840", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9625243205842031, "res": {"Yes": 0.9625243205842031, "No": 0.03747533858791006}, "ground_truth": 0}, {"key": "25725840", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994778435111729, "res": {"Yes": 0.9994778435111729, "No": 0.0005220782902530646}, "ground_truth": 0}, {"key": "25725840", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999920581810364, "res": {"Yes": 0.9999920581810364, "No": 7.823394643233241e-06}, "ground_truth": 1}, {"key": "25725840", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999917005724405, "res": {"Yes": 0.9999917005724405, "No": 8.219789150630915e-06}, "ground_truth": 0}, {"key": "25725840", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.998523247174145, "res": {"Yes": 0.998523247174145, "No": 0.0014767164069436729}, "ground_truth": 0}, {"key": "38327225", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.992227490884428, "res": {"Yes": 0.992227490884428, "No": 0.007772402191719196}, "ground_truth": 0}, {"key": "38327225", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9991147809877264, "res": {"Yes": 0.9991147809877264, "No": 0.0008851181668545922}, "ground_truth": 0}, {"key": "38327225", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9990601567296046, "res": {"Yes": 0.9990601567296046, "No": 0.0009397969656684649}, "ground_truth": 1}, {"key": "38327225", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9980879497016297, "res": {"Yes": 0.9980879497016297, "No": 0.0019120383634276654}, "ground_truth": 0}, {"key": "38327225", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9962920837617505, "res": {"Yes": 0.9962920837617505, "No": 0.0037079430132186905}, "ground_truth": 0}, {"key": "11991724", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9200794720562335, "res": {"Yes": 0.9200794720562335, "No": 0.0799204591130945}, "ground_truth": 0}, {"key": "11991724", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9980676379202563, "res": {"Yes": 0.9980676379202563, "No": 0.001932354637991277}, "ground_truth": 0}, {"key": "11991724", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9988531749071008, "res": {"Yes": 0.9988531749071008, "No": 0.0011467585140620652}, "ground_truth": 1}, {"key": "11991724", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994229464594733, "res": {"Yes": 0.9994229464594733, "No": 0.0005770036301816322}, "ground_truth": 0}, {"key": "11991724", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998201936371021, "res": {"Yes": 0.9998201936371021, "No": 0.0001797465059389805}, "ground_truth": 0}, {"key": "32217545", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.28027761075766305, "res": {"No": 0.7197222250162497, "Yes": 0.28027761075766305}, "ground_truth": 0}, {"key": "32217545", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997066487358987, "res": {"Yes": 0.9997066487358987, "No": 0.0002932940114561261}, "ground_truth": 0}, {"key": "32217545", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998844239234294, "res": {"Yes": 0.9998844239234294, "No": 0.00011548117826802956}, "ground_truth": 1}, {"key": "32217545", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999219694395426, "res": {"Yes": 0.9999219694395426, "No": 7.791095390639125e-05}, "ground_truth": 0}, {"key": "32217545", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9995876466410251, "res": {"Yes": 0.9995876466410251, "No": 0.0004122891641774533}, "ground_truth": 0}, {"key": "12731847", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.5916448143001519, "res": {"Yes": 0.5916448143001519, "No": 0.4083550346508125}, "ground_truth": 0}, {"key": "12731847", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997623992715251, "res": {"Yes": 0.9997623992715251, "No": 0.00023757552144607495}, "ground_truth": 0}, {"key": "12731847", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9466015888788895, "res": {"Yes": 0.9466015888788895, "No": 0.05339830900982817}, "ground_truth": 1}, {"key": "12731847", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9963793005969825, "res": {"Yes": 0.9963793005969825, "No": 0.0036207213682479937}, "ground_truth": 0}, {"key": "12731847", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9440670951420775, "res": {"Yes": 0.9440670951420775, "No": 0.0559328172733661}, "ground_truth": 0}, {"key": "36827234", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.14123326413152149, "res": {"No": 0.8587665701237536, "Yes": 0.14123326413152149}, "ground_truth": 0}, {"key": "36827234", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9084970302509141, "res": {"Yes": 0.9084970302509141, "No": 0.09150248944697519}, "ground_truth": 0}, {"key": "36827234", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9285484394273238, "res": {"Yes": 0.9285484394273238, "No": 0.07145108423384372}, "ground_truth": 1}, {"key": "36827234", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9908838295528799, "res": {"Yes": 0.9908838295528799, "No": 0.009116050754371031}, "ground_truth": 0}, {"key": "36827234", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.6275684815416224, "res": {"Yes": 0.6275684815416224, "No": 0.37243101982076143}, "ground_truth": 0}, {"key": "29111539", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9729220802167889, "res": {"Yes": 0.9729220802167889, "No": 0.027077829108823063}, "ground_truth": 0}, {"key": "29111539", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9947885251007937, "res": {"Yes": 0.9947885251007937, "No": 0.005211416391455167}, "ground_truth": 0}, {"key": "29111539", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9573709098656713, "res": {"Yes": 0.9573709098656713, "No": 0.0426290002617028}, "ground_truth": 1}, {"key": "29111539", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9849363954915562, "res": {"Yes": 0.9849363954915562, "No": 0.015063570495740551}, "ground_truth": 0}, {"key": "29111539", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9958424143893421, "res": {"Yes": 0.9958424143893421, "No": 0.004157545096219648}, "ground_truth": 0}, {"key": "37763052", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.44919252363035583, "res": {"No": 0.5508074425301848, "Yes": 0.44919252363035583}, "ground_truth": 0}, {"key": "37763052", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.999253899271669, "res": {"Yes": 0.999253899271669, "No": 0.0007461021774861829}, "ground_truth": 0}, {"key": "37763052", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9892050660374316, "res": {"Yes": 0.9892050660374316, "No": 0.010794876020302832}, "ground_truth": 1}, {"key": "37763052", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9981415177208833, "res": {"Yes": 0.9981415177208833, "No": 0.001858443443410382}, "ground_truth": 0}, {"key": "37763052", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9461143243195286, "res": {"Yes": 0.9461143243195286, "No": 0.05388553523877137}, "ground_truth": 0}, {"key": "30682335", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.19918930998611115, "res": {"No": 0.800810579423568, "Yes": 0.19918930998611115}, "ground_truth": 0}, {"key": "30682335", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9993549667234665, "res": {"Yes": 0.9993549667234665, "No": 0.0006449456553432244}, "ground_truth": 0}, {"key": "30682335", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998390246079291, "res": {"Yes": 0.9998390246079291, "No": 0.00016088173952438466}, "ground_truth": 1}, {"key": "30682335", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999987335551019, "res": {"Yes": 0.9999987335551019, "No": 1.1909729260087439e-06}, "ground_truth": 0}, {"key": "30682335", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998328270349577, "res": {"Yes": 0.9998328270349577, "No": 0.00016711990653247793}, "ground_truth": 0}, {"key": "12261276", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9546152951154434, "res": {"Yes": 0.9546152951154434, "No": 0.04538463351776562}, "ground_truth": 0}, {"key": "12261276", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999064742888021, "res": {"Yes": 0.9999064742888021, "No": 9.350580855652735e-05}, "ground_truth": 0}, {"key": "12261276", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999714363229496, "res": {"Yes": 0.9999714363229496, "No": 2.852048377226942e-05}, "ground_truth": 1}, {"key": "12261276", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999737011318213, "res": {"Yes": 0.9999737011318213, "No": 2.6169629472642582e-05}, "ground_truth": 0}, {"key": "12261276", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999723899261651, "res": {"Yes": 0.9999723899261651, "No": 2.7575523732114863e-05}, "ground_truth": 0}, {"key": "36912979", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.6500879744130177, "res": {"Yes": 0.6500879744130177, "No": 0.34991174970228645}, "ground_truth": 0}, {"key": "36912979", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998636924207224, "res": {"Yes": 0.9998636924207224, "No": 0.00013623857243160397}, "ground_truth": 0}, {"key": "36912979", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996018224437325, "res": {"Yes": 0.9996018224437325, "No": 0.00039817060051961867}, "ground_truth": 1}, {"key": "36912979", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9992034360956046, "res": {"Yes": 0.9992034360956046, "No": 0.0007965178645915255}, "ground_truth": 0}, {"key": "36912979", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999396103605277, "res": {"Yes": 0.9999396103605277, "No": 6.0298881700507334e-05}, "ground_truth": 0}, {"key": "30205259", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9504570640842755, "res": {"Yes": 0.9504570640842755, "No": 0.049542455011611}, "ground_truth": 0}, {"key": "30205259", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9985528341558492, "res": {"Yes": 0.9985528341558492, "No": 0.001447186430947363}, "ground_truth": 0}, {"key": "30205259", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997945734529232, "res": {"Yes": 0.9997945734529232, "No": 0.0002052953261809584}, "ground_truth": 1}, {"key": "30205259", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.989768105658292, "res": {"Yes": 0.989768105658292, "No": 0.010231784257542157}, "ground_truth": 0}, {"key": "30205259", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9761576245942513, "res": {"Yes": 0.9761576245942513, "No": 0.023842315853429576}, "ground_truth": 0}, {"key": "39458032", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.8323837304537153, "res": {"Yes": 0.8323837304537153, "No": 0.1676158039056351}, "ground_truth": 0}, {"key": "39458032", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 1.1510258151759262e-06, "res": {"No": 0.9999987335551019, "Yes": 1.1510258151759262e-06}, "ground_truth": 0}, {"key": "39458032", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.994442402404055, "res": {"Yes": 0.994442402404055, "No": 0.005557558421446879}, "ground_truth": 1}, {"key": "39458032", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7084904848036137, "res": {"Yes": 0.7084904848036137, "No": 0.29150911627789533}, "ground_truth": 0}, {"key": "39458032", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9380220713770743, "res": {"Yes": 0.9380220713770743, "No": 0.0619772126127443}, "ground_truth": 0}, {"key": "35116452", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9781349771739558, "res": {"Yes": 0.9781349771739558, "No": 0.021864962700905557}, "ground_truth": 0}, {"key": "35116452", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9989479769646497, "res": {"Yes": 0.9989479769646497, "No": 0.0010519310590883402}, "ground_truth": 0}, {"key": "35116452", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999272140090287, "res": {"Yes": 0.9999272140090287, "No": 7.274510059163165e-05}, "ground_truth": 1}, {"key": "35116452", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998859733865285, "res": {"Yes": 0.9998859733865285, "No": 0.0001139796083149477}, "ground_truth": 0}, {"key": "35116452", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9762024869010654, "res": {"Yes": 0.9762024869010654, "No": 0.02379743445793943}, "ground_truth": 0}, {"key": "40107476", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9161705240566831, "res": {"Yes": 0.9161705240566831, "No": 0.08382942495603496}, "ground_truth": 0}, {"key": "40107476", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9946989956755599, "res": {"Yes": 0.9946989956755599, "No": 0.005300927890862711}, "ground_truth": 0}, {"key": "40107476", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9619027133540162, "res": {"Yes": 0.9619027133540162, "No": 0.03809710439000439}, "ground_truth": 1}, {"key": "40107476", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9325938678095038, "res": {"Yes": 0.9325938678095038, "No": 0.06740602994542386}, "ground_truth": 0}, {"key": "40107476", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.7733811909929547, "res": {"Yes": 0.7733811909929547, "No": 0.22661833956807734}, "ground_truth": 0}, {"key": "39501049", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9710732294549956, "res": {"Yes": 0.9710732294549956, "No": 0.028926677869389527}, "ground_truth": 0}, {"key": "39501049", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9750296774863417, "res": {"Yes": 0.9750296774863417, "No": 0.024970231590094632}, "ground_truth": 0}, {"key": "39501049", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9976933757556025, "res": {"Yes": 0.9976933757556025, "No": 0.0023065900820390638}, "ground_truth": 1}, {"key": "39501049", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9822685868442742, "res": {"Yes": 0.9822685868442742, "No": 0.017731421274189575}, "ground_truth": 0}, {"key": "39501049", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.8631148366063753, "res": {"Yes": 0.8631148366063753, "No": 0.13688483871114773}, "ground_truth": 0}, {"key": "39642178", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.4624216693226493, "res": {"No": 0.5375779748572614, "Yes": 0.4624216693226493}, "ground_truth": 0}, {"key": "39642178", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9979228010698947, "res": {"Yes": 0.9979228010698947, "No": 0.002077221698532297}, "ground_truth": 0}, {"key": "39642178", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9981481623710835, "res": {"Yes": 0.9981481623710835, "No": 0.001851811369994921}, "ground_truth": 1}, {"key": "39642178", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.979365817297544, "res": {"Yes": 0.979365817297544, "No": 0.020634212083613695}, "ground_truth": 0}, {"key": "39642178", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9971700049072075, "res": {"Yes": 0.9971700049072075, "No": 0.0028299181217198675}, "ground_truth": 0}, {"key": "38024796", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9938646098470172, "res": {"Yes": 0.9938646098470172, "No": 0.006135315965662348}, "ground_truth": 0}, {"key": "38024796", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996513670066106, "res": {"Yes": 0.9996513670066106, "No": 0.0003485434703365705}, "ground_truth": 0}, {"key": "38024796", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999830324176571, "res": {"Yes": 0.999830324176571, "No": 0.00016955961195896523}, "ground_truth": 1}, {"key": "38024796", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998815633641722, "res": {"Yes": 0.9998815633641722, "No": 0.00011838186671362455}, "ground_truth": 0}, {"key": "38024796", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997569172194078, "res": {"Yes": 0.9997569172194078, "No": 0.00024303283507176446}, "ground_truth": 0}, {"key": "36652079", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9883346934968086, "res": {"Yes": 0.9883346934968086, "No": 0.011665244658545161}, "ground_truth": 0}, {"key": "36652079", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9986371210641818, "res": {"Yes": 0.9986371210641818, "No": 0.001362871102098205}, "ground_truth": 0}, {"key": "36652079", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9954454654930182, "res": {"Yes": 0.9954454654930182, "No": 0.004554575258175094}, "ground_truth": 1}, {"key": "36652079", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9993597245138853, "res": {"Yes": 0.9993597245138853, "No": 0.0006401714671961851}, "ground_truth": 0}, {"key": "36652079", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9598494526251126, "res": {"Yes": 0.9598494526251126, "No": 0.040150366916039605}, "ground_truth": 0}, {"key": "32193402", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9279892502920905, "res": {"Yes": 0.9279892502920905, "No": 0.07201066830784644}, "ground_truth": 0}, {"key": "32193402", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9977134309907943, "res": {"Yes": 0.9977134309907943, "No": 0.0022865222131493836}, "ground_truth": 0}, {"key": "32193402", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9936705926237739, "res": {"Yes": 0.9936705926237739, "No": 0.006329436926134012}, "ground_truth": 1}, {"key": "32193402", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9975136429188702, "res": {"Yes": 0.9975136429188702, "No": 0.002486379357473098}, "ground_truth": 0}, {"key": "32193402", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9995095209541931, "res": {"Yes": 0.9995095209541931, "No": 0.000490404498627684}, "ground_truth": 0}, {"key": "32589706", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.34291941538073717, "res": {"No": 0.6570801329333018, "Yes": 0.34291941538073717}, "ground_truth": 0}, {"key": "32589706", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9977013220159073, "res": {"Yes": 0.9977013220159073, "No": 0.002298572502080754}, "ground_truth": 0}, {"key": "32589706", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9991578515062565, "res": {"Yes": 0.9991578515062565, "No": 0.0008420770833420737}, "ground_truth": 1}, {"key": "32589706", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9986093060123676, "res": {"Yes": 0.9986093060123676, "No": 0.0013906402866818413}, "ground_truth": 0}, {"key": "32589706", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9975682026215948, "res": {"Yes": 0.9975682026215948, "No": 0.0024316870579197494}, "ground_truth": 0}, {"key": "38590589", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.42872371007127796, "res": {"No": 0.5712760134742694, "Yes": 0.42872371007127796}, "ground_truth": 0}, {"key": "38590589", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9500075566982275, "res": {"Yes": 0.9500075566982275, "No": 0.0499923942562316}, "ground_truth": 0}, {"key": "38590589", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9478999483613869, "res": {"Yes": 0.9478999483613869, "No": 0.05209954830656675}, "ground_truth": 1}, {"key": "38590589", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9617518440128731, "res": {"Yes": 0.9617518440128731, "No": 0.038248092088226854}, "ground_truth": 0}, {"key": "38590589", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9943147380176774, "res": {"Yes": 0.9943147380176774, "No": 0.005685194156182613}, "ground_truth": 0}, {"key": "37045414", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.8012612662529457, "res": {"Yes": 0.8012612662529457, "No": 0.19873806054136192}, "ground_truth": 0}, {"key": "37045414", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9795284339239885, "res": {"Yes": 0.9795284339239885, "No": 0.02047125442588876}, "ground_truth": 0}, {"key": "37045414", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9929481607325795, "res": {"Yes": 0.9929481607325795, "No": 0.007051640459042589}, "ground_truth": 1}, {"key": "37045414", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9821043731226954, "res": {"Yes": 0.9821043731226954, "No": 0.01789553569062228}, "ground_truth": 0}, {"key": "37045414", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.611552116608035, "res": {"Yes": 0.611552116608035, "No": 0.38844748191432965}, "ground_truth": 0}, {"key": "33310095", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.0002690331370203353, "res": {"No": 0.999730832961949, "Yes": 0.0002690331370203353}, "ground_truth": 0}, {"key": "33310095", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 5.100025118785594e-05, "res": {"No": 0.999948788531352, "Yes": 5.100025118785594e-05}, "ground_truth": 0}, {"key": "33310095", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.20434803190887507, "res": {"No": 0.7956516536217558, "Yes": 0.20434803190887507}, "ground_truth": 1}, {"key": "33310095", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3810925195005123, "res": {"No": 0.6189070370795752, "Yes": 0.3810925195005123}, "ground_truth": 0}, {"key": "33310095", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.1283393694234637, "res": {"No": 0.8716604178425482, "Yes": 0.1283393694234637}, "ground_truth": 0}, {"key": "37934604", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.12635586848072164, "res": {"No": 0.8736440689824696, "Yes": 0.12635586848072164}, "ground_truth": 0}, {"key": "37934604", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994264014106056, "res": {"Yes": 0.9994264014106056, "No": 0.000573529605634081}, "ground_truth": 0}, {"key": "37934604", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9989206148556717, "res": {"Yes": 0.9989206148556717, "No": 0.0010792776545476375}, "ground_truth": 1}, {"key": "37934604", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999217310531738, "res": {"Yes": 0.9999217310531738, "No": 7.814761654706057e-05}, "ground_truth": 0}, {"key": "37934604", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9948528297124665, "res": {"Yes": 0.9948528297124665, "No": 0.005147145447584431}, "ground_truth": 0}, {"key": "39012181", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.6559410873104723, "res": {"Yes": 0.6559410873104723, "No": 0.3440583203765485}, "ground_truth": 0}, {"key": "39012181", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9902608348599545, "res": {"Yes": 0.9902608348599545, "No": 0.009739034437194833}, "ground_truth": 0}, {"key": "39012181", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9936395227193366, "res": {"Yes": 0.9936395227193366, "No": 0.006360463671268464}, "ground_truth": 1}, {"key": "39012181", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.986635084299852, "res": {"Yes": 0.986635084299852, "No": 0.013364777199171236}, "ground_truth": 0}, {"key": "39012181", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.971187340176721, "res": {"Yes": 0.971187340176721, "No": 0.0288121242363159}, "ground_truth": 0}, {"key": "40221674", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.04751193197562538, "res": {"No": 0.9524879099763998, "Yes": 0.04751193197562538}, "ground_truth": 0}, {"key": "40221674", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9962522115556255, "res": {"Yes": 0.9962522115556255, "No": 0.0037477823022657323}, "ground_truth": 0}, {"key": "40221674", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9949318794813948, "res": {"Yes": 0.9949318794813948, "No": 0.005068134894335004}, "ground_truth": 1}, {"key": "40221674", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9899819839988958, "res": {"Yes": 0.9899819839988958, "No": 0.01001788283062517}, "ground_truth": 0}, {"key": "40221674", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9689501434141825, "res": {"Yes": 0.9689501434141825, "No": 0.03104934083420913}, "ground_truth": 0}, {"key": "36884862", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.029802526714142, "res": {"No": 0.9701973321195926, "Yes": 0.029802526714142}, "ground_truth": 0}, {"key": "36884862", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9922764246827429, "res": {"Yes": 0.9922764246827429, "No": 0.007723399979854931}, "ground_truth": 0}, {"key": "36884862", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9979402475075608, "res": {"Yes": 0.9979402475075608, "No": 0.0020597725806863454}, "ground_truth": 1}, {"key": "36884862", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.997835313520421, "res": {"Yes": 0.997835313520421, "No": 0.0021645798522204143}, "ground_truth": 0}, {"key": "36884862", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9945677308274387, "res": {"Yes": 0.9945677308274387, "No": 0.005432072184837852}, "ground_truth": 0}, {"key": "39054429", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9957186492255152, "res": {"Yes": 0.9957186492255152, "No": 0.004281302471123486}, "ground_truth": 0}, {"key": "39054429", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9969786136026182, "res": {"Yes": 0.9969786136026182, "No": 0.0030213470620752054}, "ground_truth": 0}, {"key": "39054429", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9783330216246918, "res": {"Yes": 0.9783330216246918, "No": 0.021666941786429906}, "ground_truth": 1}, {"key": "39054429", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9814628541331366, "res": {"Yes": 0.9814628541331366, "No": 0.018537121310884568}, "ground_truth": 0}, {"key": "39054429", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.997793061378004, "res": {"Yes": 0.997793061378004, "No": 0.002206969437030159}, "ground_truth": 0}, {"key": "36753964", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.46686551464136083, "res": {"No": 0.5331344421198464, "Yes": 0.46686551464136083}, "ground_truth": 0}, {"key": "36753964", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9970390537736111, "res": {"Yes": 0.9970390537736111, "No": 0.0029609977196242815}, "ground_truth": 0}, {"key": "36753964", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9904726960983514, "res": {"Yes": 0.9904726960983514, "No": 0.009527270630258926}, "ground_truth": 1}, {"key": "36753964", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.988824817489426, "res": {"Yes": 0.988824817489426, "No": 0.011175046247637684}, "ground_truth": 0}, {"key": "36753964", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996967577872169, "res": {"Yes": 0.9996967577872169, "No": 0.0003031153899599473}, "ground_truth": 0}, {"key": "37612459", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9367004203601921, "res": {"Yes": 0.9367004203601921, "No": 0.06329911063061393}, "ground_truth": 0}, {"key": "37612459", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.994883981031973, "res": {"Yes": 0.994883981031973, "No": 0.0051160429503587955}, "ground_truth": 0}, {"key": "37612459", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9944103383190571, "res": {"Yes": 0.9944103383190571, "No": 0.005589633821240557}, "ground_truth": 1}, {"key": "37612459", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9955154012785201, "res": {"Yes": 0.9955154012785201, "No": 0.004484550551390592}, "ground_truth": 0}, {"key": "37612459", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9963390650226522, "res": {"Yes": 0.9963390650226522, "No": 0.0036609064437694502}, "ground_truth": 0}, {"key": "36805789", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.5751458374475267, "res": {"Yes": 0.5751458374475267, "No": 0.42485344704830336}, "ground_truth": 0}, {"key": "36805789", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9749373231493267, "res": {"Yes": 0.9749373231493267, "No": 0.02506214847371871}, "ground_truth": 0}, {"key": "36805789", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.995704230328156, "res": {"Yes": 0.995704230328156, "No": 0.0042957689749599775}, "ground_truth": 1}, {"key": "36805789", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9915780638900425, "res": {"Yes": 0.9915780638900425, "No": 0.008421923774864448}, "ground_truth": 0}, {"key": "36805789", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9944776460673103, "res": {"Yes": 0.9944776460673103, "No": 0.005521978825788989}, "ground_truth": 0}, {"key": "12757394", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.4320670692046293, "res": {"No": 0.5679327870053025, "Yes": 0.4320670692046293}, "ground_truth": 0}, {"key": "12757394", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9992231900731734, "res": {"Yes": 0.9992231900731734, "No": 0.0007767264723589072}, "ground_truth": 0}, {"key": "12757394", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994826054947233, "res": {"Yes": 0.9994826054947233, "No": 0.0005172971967355158}, "ground_truth": 1}, {"key": "12757394", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994616477336113, "res": {"Yes": 0.9994616477336113, "No": 0.0005382600232340079}, "ground_truth": 0}, {"key": "12757394", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9977435740649918, "res": {"Yes": 0.9977435740649918, "No": 0.0022564206599151315}, "ground_truth": 0}, {"key": "32192542", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9700878350367281, "res": {"Yes": 0.9700878350367281, "No": 0.029912144813535227}, "ground_truth": 0}, {"key": "32192542", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9950298333566685, "res": {"Yes": 0.9950298333566685, "No": 0.0049701065523439685}, "ground_truth": 0}, {"key": "32192542", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9977237560812031, "res": {"Yes": 0.9977237560812031, "No": 0.002276207268183347}, "ground_truth": 1}, {"key": "32192542", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9981457828885155, "res": {"Yes": 0.9981457828885155, "No": 0.001854188768757815}, "ground_truth": 0}, {"key": "32192542", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9988348606026137, "res": {"Yes": 0.9988348606026137, "No": 0.0011650491546755706}, "ground_truth": 0}, {"key": "34856060", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.2818979931221612, "res": {"No": 0.7181019201056449, "Yes": 0.2818979931221612}, "ground_truth": 0}, {"key": "34856060", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995292809529897, "res": {"Yes": 0.9995292809529897, "No": 0.0004706527485903981}, "ground_truth": 0}, {"key": "34856060", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999977753956447, "res": {"Yes": 0.999977753956447, "No": 2.2227617738292803e-05}, "ground_truth": 1}, {"key": "34856060", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998292515191815, "res": {"Yes": 0.9998292515191815, "No": 0.00017065339300139303}, "ground_truth": 0}, {"key": "34856060", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9715104788154629, "res": {"Yes": 0.9715104788154629, "No": 0.028489405433360586}, "ground_truth": 0}, {"key": "36083416", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9952952297675689, "res": {"Yes": 0.9952952297675689, "No": 0.004704701269954146}, "ground_truth": 0}, {"key": "36083416", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.978672595364927, "res": {"Yes": 0.978672595364927, "No": 0.02132736476662057}, "ground_truth": 0}, {"key": "36083416", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9980032918791608, "res": {"Yes": 0.9980032918791608, "No": 0.0019966400223718115}, "ground_truth": 1}, {"key": "36083416", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9993671104591267, "res": {"Yes": 0.9993671104591267, "No": 0.0006328867890991114}, "ground_truth": 0}, {"key": "36083416", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9982435419810781, "res": {"Yes": 0.9982435419810781, "No": 0.0017563989800179123}, "ground_truth": 0}, {"key": "33839050", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.7559241488841765, "res": {"Yes": 0.7559241488841765, "No": 0.2440756892371415}, "ground_truth": 0}, {"key": "33839050", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9875801282175004, "res": {"Yes": 0.9875801282175004, "No": 0.012419810006215129}, "ground_truth": 0}, {"key": "33839050", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995835953604866, "res": {"Yes": 0.9995835953604866, "No": 0.00041628751275396634}, "ground_truth": 1}, {"key": "33839050", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9989557096510783, "res": {"Yes": 0.9989557096510783, "No": 0.0010443104164001298}, "ground_truth": 0}, {"key": "33839050", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9993459168057359, "res": {"Yes": 0.9993459168057359, "No": 0.0006540125611762549}, "ground_truth": 0}, {"key": "18464690", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.34271842065486097, "res": {"No": 0.6572814405503224, "Yes": 0.34271842065486097}, "ground_truth": 0}, {"key": "18464690", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.99864912944817, "res": {"Yes": 0.99864912944817, "No": 0.0013508067127536052}, "ground_truth": 0}, {"key": "18464690", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9982642068345314, "res": {"Yes": 0.9982642068345314, "No": 0.0017358136725236662}, "ground_truth": 1}, {"key": "18464690", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996694761550452, "res": {"Yes": 0.9996694761550452, "No": 0.0003303968666061437}, "ground_truth": 0}, {"key": "18464690", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9971298306344609, "res": {"Yes": 0.9971298306344609, "No": 0.002870104037120064}, "ground_truth": 0}, {"key": "39212665", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9234135915408261, "res": {"Yes": 0.9234135915408261, "No": 0.07658635126736923}, "ground_truth": 0}, {"key": "39212665", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9946673791731087, "res": {"Yes": 0.9946673791731087, "No": 0.005332601892350541}, "ground_truth": 0}, {"key": "39212665", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9979654155785852, "res": {"Yes": 0.9979654155785852, "No": 0.0020346082189961726}, "ground_truth": 1}, {"key": "39212665", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9947960740821592, "res": {"Yes": 0.9947960740821592, "No": 0.005203946494474157}, "ground_truth": 0}, {"key": "39212665", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997951693506625, "res": {"Yes": 0.9997951693506625, "No": 0.0002047853910718906}, "ground_truth": 0}, {"key": "40094011", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9358365217581642, "res": {"Yes": 0.9358365217581642, "No": 0.06416339514342295}, "ground_truth": 0}, {"key": "40094011", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.31111109871490816, "res": {"No": 0.6888886031965121, "Yes": 0.31111109871490816}, "ground_truth": 0}, {"key": "40094011", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9752647874332434, "res": {"Yes": 0.9752647874332434, "No": 0.024735040424504504}, "ground_truth": 1}, {"key": "40094011", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9881353803258297, "res": {"Yes": 0.9881353803258297, "No": 0.011864194264464598}, "ground_truth": 0}, {"key": "40094011", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9553587830474004, "res": {"Yes": 0.9553587830474004, "No": 0.044641154445908854}, "ground_truth": 0}, {"key": "36036272", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.709792915358119, "res": {"Yes": 0.709792915358119, "No": 0.2902070341072487}, "ground_truth": 0}, {"key": "36036272", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.8007035021922307, "res": {"Yes": 0.8007035021922307, "No": 0.19929642294854422}, "ground_truth": 0}, {"key": "36036272", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9938198604108562, "res": {"Yes": 0.9938198604108562, "No": 0.006180090275086828}, "ground_truth": 1}, {"key": "36036272", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9936269227545292, "res": {"Yes": 0.9936269227545292, "No": 0.006373012507980596}, "ground_truth": 0}, {"key": "36036272", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9928559012688274, "res": {"Yes": 0.9928559012688274, "No": 0.0071440131121663485}, "ground_truth": 0}, {"key": "30681904", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.19076167593928317, "res": {"No": 0.8092381091101142, "Yes": 0.19076167593928317}, "ground_truth": 0}, {"key": "30681904", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9985728060113879, "res": {"Yes": 0.9985728060113879, "No": 0.001427149757933272}, "ground_truth": 0}, {"key": "30681904", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9989830689252257, "res": {"Yes": 0.9989830689252257, "No": 0.0010168638957764487}, "ground_truth": 1}, {"key": "30681904", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9984581173243992, "res": {"Yes": 0.9984581173243992, "No": 0.0015418159021133293}, "ground_truth": 0}, {"key": "30681904", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9993330586343296, "res": {"Yes": 0.9993330586343296, "No": 0.0006669309771080303}, "ground_truth": 0}, {"key": "27834240", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.16515343963887144, "res": {"No": 0.8348463859314623, "Yes": 0.16515343963887144}, "ground_truth": 0}, {"key": "27834240", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9988076246450894, "res": {"Yes": 0.9988076246450894, "No": 0.0011923632700673254}, "ground_truth": 0}, {"key": "27834240", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994901078392137, "res": {"Yes": 0.9994901078392137, "No": 0.0005098872169178606}, "ground_truth": 1}, {"key": "27834240", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997764620483319, "res": {"Yes": 0.9997764620483319, "No": 0.00022341556511715219}, "ground_truth": 0}, {"key": "27834240", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9989472624174173, "res": {"Yes": 0.9989472624174173, "No": 0.0010527010387069659}, "ground_truth": 0}, {"key": "35025075", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.867315994279792, "res": {"Yes": 0.867315994279792, "No": 0.13268390946096356}, "ground_truth": 0}, {"key": "35025075", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9831392378860804, "res": {"Yes": 0.9831392378860804, "No": 0.01686069677764365}, "ground_truth": 0}, {"key": "35025075", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9974337038825162, "res": {"Yes": 0.9974337038825162, "No": 0.0025663038652061925}, "ground_truth": 1}, {"key": "35025075", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9948404365100193, "res": {"Yes": 0.9948404365100193, "No": 0.005159506208989918}, "ground_truth": 0}, {"key": "35025075", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9861091260465331, "res": {"Yes": 0.9861091260465331, "No": 0.013890821231854925}, "ground_truth": 0}, {"key": "33316985", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9978832701390474, "res": {"Yes": 0.9978832701390474, "No": 0.002116733881945482}, "ground_truth": 0}, {"key": "33316985", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9984796308339937, "res": {"Yes": 0.9984796308339937, "No": 0.0015203381526770502}, "ground_truth": 0}, {"key": "33316985", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996711444548425, "res": {"Yes": 0.9996711444548425, "No": 0.0003288157481815282}, "ground_truth": 1}, {"key": "33316985", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998679831849867, "res": {"Yes": 0.9998679831849867, "No": 0.00013193225326076059}, "ground_truth": 0}, {"key": "33316985", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9992075976167174, "res": {"Yes": 0.9992075976167174, "No": 0.0007923059026515028}, "ground_truth": 0}, {"key": "17037056", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.42770570288258486, "res": {"No": 0.572294173767897, "Yes": 0.42770570288258486}, "ground_truth": 0}, {"key": "17037056", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996893731045271, "res": {"Yes": 0.9996893731045271, "No": 0.0003105629312479258}, "ground_truth": 0}, {"key": "17037056", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.998309941260418, "res": {"Yes": 0.998309941260418, "No": 0.0016899921415979623}, "ground_truth": 1}, {"key": "17037056", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996572059673049, "res": {"Yes": 0.9996572059673049, "No": 0.0003427375033989317}, "ground_truth": 0}, {"key": "17037056", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9987534035476998, "res": {"Yes": 0.9987534035476998, "No": 0.0012465090433911297}, "ground_truth": 0}, {"key": "34050457", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.023812143436561944, "res": {"No": 0.9761876077361544, "Yes": 0.023812143436561944}, "ground_truth": 0}, {"key": "34050457", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9611401461323998, "res": {"Yes": 0.9611401461323998, "No": 0.038859488412743795}, "ground_truth": 0}, {"key": "34050457", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9784061655138426, "res": {"Yes": 0.9784061655138426, "No": 0.021593638965529464}, "ground_truth": 1}, {"key": "34050457", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9580543814662433, "res": {"Yes": 0.9580543814662433, "No": 0.041945542884388495}, "ground_truth": 0}, {"key": "34050457", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.3469637400598738, "res": {"No": 0.6530360006901706, "Yes": 0.3469637400598738}, "ground_truth": 0}, {"key": "34713745", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.6597759772431501, "res": {"Yes": 0.6597759772431501, "No": 0.34022395058977095}, "ground_truth": 0}, {"key": "34713745", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9969135716432233, "res": {"Yes": 0.9969135716432233, "No": 0.0030864296783873466}, "ground_truth": 0}, {"key": "34713745", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998483173649743, "res": {"Yes": 0.9998483173649743, "No": 0.00015161684138440316}, "ground_truth": 1}, {"key": "34713745", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9829028567141014, "res": {"Yes": 0.9829028567141014, "No": 0.017097107189313548}, "ground_truth": 0}, {"key": "34713745", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9980505535438797, "res": {"Yes": 0.9980505535438797, "No": 0.0019494538939766092}, "ground_truth": 0}, {"key": "40856210", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.08297787275841094, "res": {"No": 0.9170218126868666, "Yes": 0.08297787275841094}, "ground_truth": 0}, {"key": "40856210", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9819833116681773, "res": {"Yes": 0.9819833116681773, "No": 0.018016692314581542}, "ground_truth": 0}, {"key": "40856210", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9938317568039329, "res": {"Yes": 0.9938317568039329, "No": 0.0061681519981766145}, "ground_truth": 1}, {"key": "40856210", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9769385309885249, "res": {"Yes": 0.9769385309885249, "No": 0.02306144277422912}, "ground_truth": 0}, {"key": "40856210", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.985586494122898, "res": {"Yes": 0.985586494122898, "No": 0.014413456926455997}, "ground_truth": 0}, {"key": "40848302", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.040543840858763475, "res": {"No": 0.959455886595337, "Yes": 0.040543840858763475}, "ground_truth": 0}, {"key": "40848302", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998866885302296, "res": {"Yes": 0.9998866885302296, "No": 0.00011317729347412492}, "ground_truth": 0}, {"key": "40848302", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9960390528275528, "res": {"Yes": 0.9960390528275528, "No": 0.003960892105185594}, "ground_truth": 1}, {"key": "40848302", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9970337163387311, "res": {"Yes": 0.9970337163387311, "No": 0.0029663186701180295}, "ground_truth": 0}, {"key": "40848302", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9980694225665953, "res": {"Yes": 0.9980694225665953, "No": 0.0019305236338275622}, "ground_truth": 0}, {"key": "40636168", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.37941051767575823, "res": {"No": 0.6205889701542694, "Yes": 0.37941051767575823}, "ground_truth": 0}, {"key": "40636168", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997588240176042, "res": {"Yes": 0.9997588240176042, "No": 0.00024112260967664023}, "ground_truth": 0}, {"key": "40636168", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999836879295629, "res": {"Yes": 0.999836879295629, "No": 0.00016307619702089914}, "ground_truth": 1}, {"key": "40636168", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997923090607499, "res": {"Yes": 0.9997923090607499, "No": 0.0002076623737935022}, "ground_truth": 0}, {"key": "40636168", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9995520310980441, "res": {"Yes": 0.9995520310980441, "No": 0.00044794997263952226}, "ground_truth": 0}, {"key": "34423311", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.09623197462269781, "res": {"No": 0.9037679138664451, "Yes": 0.09623197462269781}, "ground_truth": 0}, {"key": "34423311", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9990348125933007, "res": {"Yes": 0.9990348125933007, "No": 0.0009650917340032964}, "ground_truth": 0}, {"key": "34423311", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9988824364398056, "res": {"Yes": 0.9988824364398056, "No": 0.0011174991411753744}, "ground_truth": 1}, {"key": "34423311", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.998324552831309, "res": {"Yes": 0.998324552831309, "No": 0.0016753687310359438}, "ground_truth": 0}, {"key": "34423311", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9989059834729052, "res": {"Yes": 0.9989059834729052, "No": 0.0010940308238649172}, "ground_truth": 0}, {"key": "34833945", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.13448875357218273, "res": {"No": 0.8655107793207878, "Yes": 0.13448875357218273}, "ground_truth": 0}, {"key": "34833945", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9543638749242862, "res": {"Yes": 0.9543638749242862, "No": 0.04563565317903625}, "ground_truth": 0}, {"key": "34833945", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7710647400558256, "res": {"Yes": 0.7710647400558256, "No": 0.22893450973934684}, "ground_truth": 1}, {"key": "34833945", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6614570868620451, "res": {"Yes": 0.6614570868620451, "No": 0.3385423106182655}, "ground_truth": 0}, {"key": "34833945", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.6730074766256066, "res": {"Yes": 0.6730074766256066, "No": 0.32699195854263474}, "ground_truth": 0}, {"key": "21272328", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.5837613890730338, "res": {"Yes": 0.5837613890730338, "No": 0.4162384857216715}, "ground_truth": 0}, {"key": "21272328", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9986152468566614, "res": {"Yes": 0.9986152468566614, "No": 0.0013847622703948208}, "ground_truth": 0}, {"key": "21272328", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997830168142573, "res": {"Yes": 0.9997830168142573, "No": 0.0002169464519679483}, "ground_truth": 1}, {"key": "21272328", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999151753838112, "res": {"Yes": 0.9999151753838112, "No": 8.478066908977299e-05}, "ground_truth": 0}, {"key": "21272328", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9970769770746348, "res": {"Yes": 0.9970769770746348, "No": 0.0029230753934817045}, "ground_truth": 0}, {"key": "38648957", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.33302766978707543, "res": {"No": 0.6669722087343568, "Yes": 0.33302766978707543}, "ground_truth": 0}, {"key": "38648957", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.988701857847202, "res": {"Yes": 0.988701857847202, "No": 0.011297995029774388}, "ground_truth": 0}, {"key": "38648957", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9961645454176169, "res": {"Yes": 0.9961645454176169, "No": 0.003835457712176885}, "ground_truth": 1}, {"key": "38648957", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9986717149525454, "res": {"Yes": 0.9986717149525454, "No": 0.001328196462819055}, "ground_truth": 0}, {"key": "38648957", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9990093609072068, "res": {"Yes": 0.9990093609072068, "No": 0.0009905452587872952}, "ground_truth": 0}, {"key": "24942981", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.981916406514554, "res": {"Yes": 0.981916406514554, "No": 0.01808362945449453}, "ground_truth": 0}, {"key": "24942981", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9940244238994685, "res": {"Yes": 0.9940244238994685, "No": 0.005975592636846981}, "ground_truth": 0}, {"key": "24942981", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9993157967738484, "res": {"Yes": 0.9993157967738484, "No": 0.0006841039350895055}, "ground_truth": 1}, {"key": "24942981", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9988565087849064, "res": {"Yes": 0.9988565087849064, "No": 0.0011434426486256503}, "ground_truth": 0}, {"key": "24942981", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.991455237782044, "res": {"Yes": 0.991455237782044, "No": 0.008544749314009887}, "ground_truth": 0}, {"key": "35882366", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.1392687164820541, "res": {"No": 0.8607311125664124, "Yes": 0.1392687164820541}, "ground_truth": 0}, {"key": "35882366", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9983297853635005, "res": {"Yes": 0.9983297853635005, "No": 0.0016702339041433552}, "ground_truth": 0}, {"key": "35882366", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9972195571113773, "res": {"Yes": 0.9972195571113773, "No": 0.0027804068863307063}, "ground_truth": 1}, {"key": "35882366", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9957820043379105, "res": {"Yes": 0.9957820043379105, "No": 0.004218027825923771}, "ground_truth": 0}, {"key": "35882366", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.967775744935407, "res": {"Yes": 0.967775744935407, "No": 0.0322241985792713}, "ground_truth": 0}, {"key": "40559523", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.043652976887309146, "res": {"No": 0.9563469321733594, "Yes": 0.043652976887309146}, "ground_truth": 0}, {"key": "40559523", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9951958082236633, "res": {"Yes": 0.9951958082236633, "No": 0.004804230956719729}, "ground_truth": 0}, {"key": "40559523", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999937345628867, "res": {"Yes": 0.999937345628867, "No": 6.254235817360687e-05}, "ground_truth": 1}, {"key": "40559523", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999342227627159, "res": {"Yes": 0.999342227627159, "No": 0.0006576675483416153}, "ground_truth": 0}, {"key": "40559523", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9990859743761339, "res": {"Yes": 0.9990859743761339, "No": 0.000914000261870291}, "ground_truth": 0}, {"key": "24632722", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.025195980529104486, "res": {"No": 0.9748038555668873, "Yes": 0.025195980529104486}, "ground_truth": 0}, {"key": "24632722", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9936130369153112, "res": {"Yes": 0.9936130369153112, "No": 0.006386965538412919}, "ground_truth": 0}, {"key": "24632722", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995455969923327, "res": {"Yes": 0.9995455969923327, "No": 0.0004543198920513532}, "ground_truth": 1}, {"key": "24632722", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999727376998406, "res": {"Yes": 0.999727376998406, "No": 0.00027257684053396525}, "ground_truth": 0}, {"key": "24632722", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999763235462916, "res": {"Yes": 0.9999763235462916, "No": 2.3642538254937537e-05}, "ground_truth": 0}, {"key": "36002759", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9857296417366809, "res": {"Yes": 0.9857296417366809, "No": 0.01427028322654136}, "ground_truth": 0}, {"key": "36002759", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9971913500581986, "res": {"Yes": 0.9971913500581986, "No": 0.0028085987092525406}, "ground_truth": 0}, {"key": "36002759", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.997560740540078, "res": {"Yes": 0.997560740540078, "No": 0.002439296502247239}, "ground_truth": 1}, {"key": "36002759", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9981911546362553, "res": {"Yes": 0.9981911546362553, "No": 0.0018088463728009572}, "ground_truth": 0}, {"key": "36002759", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9948028004185541, "res": {"Yes": 0.9948028004185541, "No": 0.005197118196485525}, "ground_truth": 0}, {"key": "29508534", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.8539278615622895, "res": {"Yes": 0.8539278615622895, "No": 0.14607200411159166}, "ground_truth": 0}, {"key": "29508534", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9946992254510544, "res": {"Yes": 0.9946992254510544, "No": 0.005300789008371336}, "ground_truth": 0}, {"key": "29508534", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9844234265035232, "res": {"Yes": 0.9844234265035232, "No": 0.015576545339515385}, "ground_truth": 1}, {"key": "29508534", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999783493530909, "res": {"Yes": 0.999783493530909, "No": 0.0002164854972637253}, "ground_truth": 0}, {"key": "29508534", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.999435805817427, "res": {"Yes": 0.999435805817427, "No": 0.000564184518223852}, "ground_truth": 0}, {"key": "15631612", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9781864225577928, "res": {"Yes": 0.9781864225577928, "No": 0.02181354438611454}, "ground_truth": 0}, {"key": "15631612", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999177976487984, "res": {"Yes": 0.9999177976487984, "No": 8.212040779362059e-05}, "ground_truth": 0}, {"key": "15631612", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999214926618624, "res": {"Yes": 0.9999214926618624, "No": 7.843165102689214e-05}, "ground_truth": 1}, {"key": "15631612", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999197047538358, "res": {"Yes": 0.9999197047538358, "No": 8.023907036966725e-05}, "ground_truth": 0}, {"key": "15631612", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996937822942151, "res": {"Yes": 0.9996937822942151, "No": 0.0003062012476881417}, "ground_truth": 0}, {"key": "40731892", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.28854947998291897, "res": {"No": 0.7114503614098879, "Yes": 0.28854947998291897}, "ground_truth": 0}, {"key": "40731892", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.999846533247227, "res": {"Yes": 0.999846533247227, "No": 0.00015340900009338428}, "ground_truth": 0}, {"key": "40731892", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.998675874229165, "res": {"Yes": 0.998675874229165, "No": 0.0013241242254501478}, "ground_truth": 1}, {"key": "40731892", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9980444969922774, "res": {"Yes": 0.9980444969922774, "No": 0.001955520266454722}, "ground_truth": 0}, {"key": "40731892", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9992706762459522, "res": {"Yes": 0.9992706762459522, "No": 0.0007292954695835732}, "ground_truth": 0}, {"key": "35971910", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9814147440032159, "res": {"Yes": 0.9814147440032159, "No": 0.01858528017531569}, "ground_truth": 0}, {"key": "35971910", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9866006394970609, "res": {"Yes": 0.9866006394970609, "No": 0.013399278724551211}, "ground_truth": 0}, {"key": "35971910", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9990780059979048, "res": {"Yes": 0.9990780059979048, "No": 0.0009219210197068763}, "ground_truth": 1}, {"key": "35971910", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.969441824279811, "res": {"Yes": 0.969441824279811, "No": 0.030558053838709066}, "ground_truth": 0}, {"key": "35971910", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9841264028490503, "res": {"Yes": 0.9841264028490503, "No": 0.015873582968725833}, "ground_truth": 0}, {"key": "34428424", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 2.1891568396526136e-05, "res": {"No": 0.9999779923581718, "Yes": 2.1891568396526136e-05}, "ground_truth": 0}, {"key": "34428424", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.98903466640202, "res": {"Yes": 0.98903466640202, "No": 0.010965200629671676}, "ground_truth": 0}, {"key": "34428424", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9988687581370392, "res": {"Yes": 0.9988687581370392, "No": 0.001131010271616904}, "ground_truth": 1}, {"key": "34428424", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9777878126179502, "res": {"Yes": 0.9777878126179502, "No": 0.022212162050519995}, "ground_truth": 0}, {"key": "34428424", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.11098949741824535, "res": {"No": 0.8890102888727975, "Yes": 0.11098949741824535}, "ground_truth": 0}, {"key": "36971005", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9768899441589293, "res": {"Yes": 0.9768899441589293, "No": 0.023109613019261108}, "ground_truth": 0}, {"key": "36971005", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9338855404135317, "res": {"Yes": 0.9338855404135317, "No": 0.06611408953484212}, "ground_truth": 0}, {"key": "36971005", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9921019249995702, "res": {"Yes": 0.9921019249995702, "No": 0.007897959847599494}, "ground_truth": 1}, {"key": "36971005", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9988518688075432, "res": {"Yes": 0.9988518688075432, "No": 0.0011480789634008698}, "ground_truth": 0}, {"key": "36971005", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.0297235327033631, "res": {"No": 0.9702759911461813, "Yes": 0.0297235327033631}, "ground_truth": 0}, {"key": "34649067", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9959172521694902, "res": {"Yes": 0.9959172521694902, "No": 0.004082774488438239}, "ground_truth": 0}, {"key": "34649067", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994234229844478, "res": {"Yes": 0.9994234229844478, "No": 0.0005765374265889212}, "ground_truth": 0}, {"key": "34649067", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997338122342516, "res": {"Yes": 0.9997338122342516, "No": 0.0002661456081208935}, "ground_truth": 1}, {"key": "34649067", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995234463877612, "res": {"Yes": 0.9995234463877612, "No": 0.0004764575147096947}, "ground_truth": 0}, {"key": "34649067", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998905026252752, "res": {"Yes": 0.9998905026252752, "No": 0.00010947788418717122}, "ground_truth": 0}, {"key": "37355154", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.22447343689067484, "res": {"No": 0.7755260854401862, "Yes": 0.22447343689067484}, "ground_truth": 0}, {"key": "37355154", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9991634420498737, "res": {"Yes": 0.9991634420498737, "No": 0.0008364408929383809}, "ground_truth": 0}, {"key": "37355154", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997333355612835, "res": {"Yes": 0.9997333355612835, "No": 0.0002665965742705727}, "ground_truth": 1}, {"key": "37355154", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9990055537896876, "res": {"Yes": 0.9990055537896876, "No": 0.0009944447666759139}, "ground_truth": 0}, {"key": "37355154", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9923608225724139, "res": {"Yes": 0.9923608225724139, "No": 0.007639136543936963}, "ground_truth": 0}, {"key": "38674697", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.28552054243147756, "res": {"No": 0.7144793381106833, "Yes": 0.28552054243147756}, "ground_truth": 0}, {"key": "38674697", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9982594616967285, "res": {"Yes": 0.9982594616967285, "No": 0.0017405597884424771}, "ground_truth": 0}, {"key": "38674697", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9983743436131649, "res": {"Yes": 0.9983743436131649, "No": 0.001625589922254267}, "ground_truth": 1}, {"key": "38674697", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998828744557322, "res": {"Yes": 0.9998828744557322, "No": 0.00011709511555179887}, "ground_truth": 0}, {"key": "38674697", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9836848712662077, "res": {"Yes": 0.9836848712662077, "No": 0.016315136213473372}, "ground_truth": 0}, {"key": "40525767", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.663183867963015, "res": {"Yes": 0.663183867963015, "No": 0.33681552569742185}, "ground_truth": 0}, {"key": "40525767", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9862801302222339, "res": {"Yes": 0.9862801302222339, "No": 0.013719794979241785}, "ground_truth": 0}, {"key": "40525767", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9964415611034518, "res": {"Yes": 0.9964415611034518, "No": 0.0035583842482366217}, "ground_truth": 1}, {"key": "40525767", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9961903374477517, "res": {"Yes": 0.9961903374477517, "No": 0.003809702030956093}, "ground_truth": 0}, {"key": "40525767", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9994330693159652, "res": {"Yes": 0.9994330693159652, "No": 0.0005668516095732382}, "ground_truth": 0}, {"key": "27165110", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9316062240142609, "res": {"Yes": 0.9316062240142609, "No": 0.06839369752816558}, "ground_truth": 0}, {"key": "27165110", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9982178869526024, "res": {"Yes": 0.9982178869526024, "No": 0.001782026640980672}, "ground_truth": 0}, {"key": "27165110", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9937821924699357, "res": {"Yes": 0.9937821924699357, "No": 0.006217738646447516}, "ground_truth": 1}, {"key": "27165110", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9974225531347537, "res": {"Yes": 0.9974225531347537, "No": 0.0025774699518687173}, "ground_truth": 0}, {"key": "27165110", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9663023133026225, "res": {"Yes": 0.9663023133026225, "No": 0.033697649224716845}, "ground_truth": 0}, {"key": "35497491", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9988000118622672, "res": {"Yes": 0.9988000118622672, "No": 0.001199937268265572}, "ground_truth": 0}, {"key": "35497491", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9940207761347333, "res": {"Yes": 0.9940207761347333, "No": 0.005979197005815033}, "ground_truth": 0}, {"key": "35497491", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9938102123551016, "res": {"Yes": 0.9938102123551016, "No": 0.006189728836187644}, "ground_truth": 1}, {"key": "35497491", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9983383427835973, "res": {"Yes": 0.9983383427835973, "No": 0.0016616121054686768}, "ground_truth": 0}, {"key": "35497491", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996725744354539, "res": {"Yes": 0.9996725744354539, "No": 0.00032740585822952885}, "ground_truth": 0}, {"key": "40690716", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.4774120139591197, "res": {"No": 0.5225872622482899, "Yes": 0.4774120139591197}, "ground_truth": 0}, {"key": "40690716", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9935654893966445, "res": {"Yes": 0.9935654893966445, "No": 0.006434368358107736}, "ground_truth": 0}, {"key": "40690716", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9872391174209626, "res": {"Yes": 0.9872391174209626, "No": 0.01276075798010788}, "ground_truth": 1}, {"key": "40690716", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9972177774749437, "res": {"Yes": 0.9972177774749437, "No": 0.00278206728482148}, "ground_truth": 0}, {"key": "40690716", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.8568224997336973, "res": {"Yes": 0.8568224997336973, "No": 0.1431768356158067}, "ground_truth": 0}, {"key": "34835193", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.049197818298180235, "res": {"No": 0.9508017908297767, "Yes": 0.049197818298180235}, "ground_truth": 0}, {"key": "34835193", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.98877248828713, "res": {"Yes": 0.98877248828713, "No": 0.011227437721256962}, "ground_truth": 0}, {"key": "34835193", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9767408254762033, "res": {"Yes": 0.9767408254762033, "No": 0.023259077338099298}, "ground_truth": 1}, {"key": "34835193", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.984879736685254, "res": {"Yes": 0.984879736685254, "No": 0.015120270774873485}, "ground_truth": 0}, {"key": "34835193", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9467729610489973, "res": {"Yes": 0.9467729610489973, "No": 0.05322676008418432}, "ground_truth": 0}, {"key": "39471712", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.0003315214647690619, "res": {"No": 0.999668403660223, "Yes": 0.0003315214647690619}, "ground_truth": 0}, {"key": "39471712", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9924019194666973, "res": {"Yes": 0.9924019194666973, "No": 0.007597978909627783}, "ground_truth": 0}, {"key": "39471712", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9818373438248543, "res": {"Yes": 0.9818373438248543, "No": 0.018162709779643665}, "ground_truth": 1}, {"key": "39471712", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9990788396888856, "res": {"Yes": 0.9990788396888856, "No": 0.0009210533474389315}, "ground_truth": 0}, {"key": "39471712", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9982621948350502, "res": {"Yes": 0.9982621948350502, "No": 0.001737804055263854}, "ground_truth": 0}, {"key": "39115192", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9845842619426847, "res": {"Yes": 0.9845842619426847, "No": 0.015415689337895284}, "ground_truth": 0}, {"key": "39115192", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9918870153504141, "res": {"Yes": 0.9918870153504141, "No": 0.008112910478156907}, "ground_truth": 0}, {"key": "39115192", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9938602477848171, "res": {"Yes": 0.9938602477848171, "No": 0.006139757700874725}, "ground_truth": 1}, {"key": "39115192", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9905211195111705, "res": {"Yes": 0.9905211195111705, "No": 0.00947879955455234}, "ground_truth": 0}, {"key": "39115192", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9994236612869971, "res": {"Yes": 0.9994236612869971, "No": 0.0005762957927496865}, "ground_truth": 0}, {"key": "23520673", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.5548660753667999, "res": {"Yes": 0.5548660753667999, "No": 0.4451336744064104}, "ground_truth": 0}, {"key": "23520673", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9987612538801792, "res": {"Yes": 0.9987612538801792, "No": 0.0012387214747899302}, "ground_truth": 0}, {"key": "23520673", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9989413121058758, "res": {"Yes": 0.9989413121058758, "No": 0.0010586588582510547}, "ground_truth": 1}, {"key": "23520673", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9990130488569707, "res": {"Yes": 0.9990130488569707, "No": 0.000986901816673684}, "ground_truth": 0}, {"key": "23520673", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9860571910187516, "res": {"Yes": 0.9860571910187516, "No": 0.013942748871671273}, "ground_truth": 0}, {"key": "35764233", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9960960640491323, "res": {"Yes": 0.9960960640491323, "No": 0.0039039047842848985}, "ground_truth": 0}, {"key": "35764233", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9977978117822981, "res": {"Yes": 0.9977978117822981, "No": 0.002202135736964408}, "ground_truth": 0}, {"key": "35764233", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9987327137862458, "res": {"Yes": 0.9987327137862458, "No": 0.001267219181630067}, "ground_truth": 1}, {"key": "35764233", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.998552484462708, "res": {"Yes": 0.998552484462708, "No": 0.0014475168613267417}, "ground_truth": 0}, {"key": "35764233", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9990507596139651, "res": {"Yes": 0.9990507596139651, "No": 0.000949242272047062}, "ground_truth": 0}, {"key": "35228910", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9993034155276903, "res": {"Yes": 0.9993034155276903, "No": 0.0006965669123083897}, "ground_truth": 0}, {"key": "35228910", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.969388163282259, "res": {"Yes": 0.969388163282259, "No": 0.03061165684245836}, "ground_truth": 0}, {"key": "35228910", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9972031966620862, "res": {"Yes": 0.9972031966620862, "No": 0.002796795448220257}, "ground_truth": 1}, {"key": "35228910", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9905867367750085, "res": {"Yes": 0.9905867367750085, "No": 0.009413155940975417}, "ground_truth": 0}, {"key": "35228910", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9950517899272172, "res": {"Yes": 0.9950517899272172, "No": 0.00494821338438899}, "ground_truth": 0}, {"key": "36795599", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.3318448296149179, "res": {"No": 0.6681545566348857, "Yes": 0.3318448296149179}, "ground_truth": 0}, {"key": "36795599", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9918987375404279, "res": {"Yes": 0.9918987375404279, "No": 0.008101006222654163}, "ground_truth": 0}, {"key": "36795599", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9096165426324957, "res": {"Yes": 0.9096165426324957, "No": 0.09038295321465782}, "ground_truth": 1}, {"key": "36795599", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9978131149251281, "res": {"Yes": 0.9978131149251281, "No": 0.0021867912330126656}, "ground_truth": 0}, {"key": "36795599", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.987456671629858, "res": {"Yes": 0.987456671629858, "No": 0.01254313217807284}, "ground_truth": 0}, {"key": "38641949", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.012078607092935711, "res": {"No": 0.9879212582874067, "Yes": 0.012078607092935711}, "ground_truth": 0}, {"key": "38641949", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999925349918634, "res": {"Yes": 0.9999925349918634, "No": 7.374024025654362e-06}, "ground_truth": 0}, {"key": "38641949", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9989697464759552, "res": {"Yes": 0.9989697464759552, "No": 0.0010301677767059008}, "ground_truth": 1}, {"key": "38641949", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9976186289735658, "res": {"Yes": 0.9976186289735658, "No": 0.002381311244368878}, "ground_truth": 0}, {"key": "38641949", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998335421456462, "res": {"Yes": 0.9998335421456462, "No": 0.0001664352854622891}, "ground_truth": 0}, {"key": "29968443", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.7036166024250168, "res": {"Yes": 0.7036166024250168, "No": 0.29638268646572996}, "ground_truth": 0}, {"key": "29968443", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9959850127007104, "res": {"Yes": 0.9959850127007104, "No": 0.004014816645494185}, "ground_truth": 0}, {"key": "29968443", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9852413144753045, "res": {"Yes": 0.9852413144753045, "No": 0.014758139764027508}, "ground_truth": 1}, {"key": "29968443", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9993850879359619, "res": {"Yes": 0.9993850879359619, "No": 0.000614843373395036}, "ground_truth": 0}, {"key": "29968443", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9751945006823672, "res": {"Yes": 0.9751945006823672, "No": 0.024805070278347996}, "ground_truth": 0}, {"key": "21268042", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.961512950485815, "res": {"Yes": 0.961512950485815, "No": 0.03848693291901959}, "ground_truth": 0}, {"key": "21268042", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9977009725211954, "res": {"Yes": 0.9977009725211954, "No": 0.002299052953939159}, "ground_truth": 0}, {"key": "21268042", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9981986399999707, "res": {"Yes": 0.9981986399999707, "No": 0.0018013137845636097}, "ground_truth": 1}, {"key": "21268042", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9990907381892707, "res": {"Yes": 0.9990907381892707, "No": 0.0009091952412468208}, "ground_truth": 0}, {"key": "21268042", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9994834394633573, "res": {"Yes": 0.9994834394633573, "No": 0.0005164515904162311}, "ground_truth": 0}, {"key": "26808572", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.8779756493123644, "res": {"Yes": 0.8779756493123644, "No": 0.12202411376113746}, "ground_truth": 0}, {"key": "26808572", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9921187045613237, "res": {"Yes": 0.9921187045613237, "No": 0.007881267797861137}, "ground_truth": 0}, {"key": "26808572", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9919449531630381, "res": {"Yes": 0.9919449531630381, "No": 0.008055039166350243}, "ground_truth": 1}, {"key": "26808572", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996937822942151, "res": {"Yes": 0.9996937822942151, "No": 0.0003062119649193608}, "ground_truth": 0}, {"key": "26808572", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997316671676959, "res": {"Yes": 0.9997316671676959, "No": 0.00026825916799030626}, "ground_truth": 0}, {"key": "37829390", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.11809952237844452, "res": {"No": 0.8819003246168686, "Yes": 0.11809952237844452}, "ground_truth": 0}, {"key": "37829390", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.5816324557924134, "res": {"Yes": 0.5816324557924134, "No": 0.41836719714917814}, "ground_truth": 0}, {"key": "37829390", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9827812585120466, "res": {"Yes": 0.9827812585120466, "No": 0.017218746084955164}, "ground_truth": 1}, {"key": "37829390", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.970141236931577, "res": {"Yes": 0.970141236931577, "No": 0.029858718641464572}, "ground_truth": 0}, {"key": "37829390", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9754884383634712, "res": {"Yes": 0.9754884383634712, "No": 0.02451149715186989}, "ground_truth": 0}, {"key": "35716045", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.7512232508171858, "res": {"Yes": 0.7512232508171858, "No": 0.24877639450742212}, "ground_truth": 0}, {"key": "35716045", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9976236127893994, "res": {"Yes": 0.9976236127893994, "No": 0.00237635851194162}, "ground_truth": 0}, {"key": "35716045", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9954494851099234, "res": {"Yes": 0.9954494851099234, "No": 0.004550528040025594}, "ground_truth": 1}, {"key": "35716045", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9967484436963705, "res": {"Yes": 0.9967484436963705, "No": 0.003251506992862472}, "ground_truth": 0}, {"key": "35716045", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9961993333859739, "res": {"Yes": 0.9961993333859739, "No": 0.00380070767243582}, "ground_truth": 0}, {"key": "34367070", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.31387875443964414, "res": {"No": 0.6861210562417376, "Yes": 0.31387875443964414}, "ground_truth": 0}, {"key": "34367070", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.036353048984209824, "res": {"No": 0.963646851603592, "Yes": 0.036353048984209824}, "ground_truth": 0}, {"key": "34367070", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.26347125661651166, "res": {"No": 0.736528520100206, "Yes": 0.26347125661651166}, "ground_truth": 1}, {"key": "34367070", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.950023041947606, "res": {"Yes": 0.950023041947606, "No": 0.0499768040598806}, "ground_truth": 0}, {"key": "34367070", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.12617697424485966, "res": {"No": 0.8738228960137082, "Yes": 0.12617697424485966}, "ground_truth": 0}, {"key": "35239748", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.5623255085780797, "res": {"Yes": 0.5623255085780797, "No": 0.4376739947330822}, "ground_truth": 0}, {"key": "35239748", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9692415466562055, "res": {"Yes": 0.9692415466562055, "No": 0.030758370819588467}, "ground_truth": 0}, {"key": "35239748", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9456655061843916, "res": {"Yes": 0.9456655061843916, "No": 0.05433398896911837}, "ground_truth": 1}, {"key": "35239748", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9774742521314695, "res": {"Yes": 0.9774742521314695, "No": 0.022525654934882466}, "ground_truth": 0}, {"key": "35239748", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.8910857642239949, "res": {"Yes": 0.8910857642239949, "No": 0.10891392365085067}, "ground_truth": 0}, {"key": "40421370", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9929501621209093, "res": {"Yes": 0.9929501621209093, "No": 0.007049772021899794}, "ground_truth": 0}, {"key": "40421370", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9990715786000898, "res": {"Yes": 0.9990715786000898, "No": 0.0009284264445174251}, "ground_truth": 0}, {"key": "40421370", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998794179605985, "res": {"Yes": 0.9998794179605985, "No": 0.00012055445480709628}, "ground_truth": 1}, {"key": "40421370", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999669118653311, "res": {"Yes": 0.999669118653311, "No": 0.0003308231881773912}, "ground_truth": 0}, {"key": "40421370", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9991197721280373, "res": {"Yes": 0.9991197721280373, "No": 0.0008801897928967096}, "ground_truth": 0}, {"key": "37288396", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.0033415941347663685, "res": {"No": 0.9966583172916024, "Yes": 0.0033415941347663685}, "ground_truth": 0}, {"key": "37288396", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9947733122146393, "res": {"Yes": 0.9947733122146393, "No": 0.005226666419591605}, "ground_truth": 0}, {"key": "37288396", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9993675833097869, "res": {"Yes": 0.9993675833097869, "No": 0.0006324140094777252}, "ground_truth": 1}, {"key": "37288396", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998020817783959, "res": {"Yes": 0.9998020817783959, "No": 0.00019787065466583557}, "ground_truth": 0}, {"key": "37288396", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9750421501457511, "res": {"Yes": 0.9750421501457511, "No": 0.024957792023268754}, "ground_truth": 0}, {"key": "38903688", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9490272341969996, "res": {"Yes": 0.9490272341969996, "No": 0.05097224551396289}, "ground_truth": 0}, {"key": "38903688", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9155386805708806, "res": {"Yes": 0.9155386805708806, "No": 0.08446083864773529}, "ground_truth": 0}, {"key": "38903688", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9796801227073697, "res": {"Yes": 0.9796801227073697, "No": 0.020319816232275086}, "ground_truth": 1}, {"key": "38903688", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9987001342582942, "res": {"Yes": 0.9987001342582942, "No": 0.0012998479277643348}, "ground_truth": 0}, {"key": "38903688", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9799912126905657, "res": {"Yes": 0.9799912126905657, "No": 0.02000877403259142}, "ground_truth": 0}, {"key": "28071228", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.0055353194496852885, "res": {"No": 0.9944645647943896, "Yes": 0.0055353194496852885}, "ground_truth": 0}, {"key": "28071228", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9861844794959899, "res": {"Yes": 0.9861844794959899, "No": 0.013815045874258521}, "ground_truth": 0}, {"key": "28071228", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9931025056667827, "res": {"Yes": 0.9931025056667827, "No": 0.006897460352406817}, "ground_truth": 1}, {"key": "28071228", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9777120135337269, "res": {"Yes": 0.9777120135337269, "No": 0.022287506592484747}, "ground_truth": 0}, {"key": "28071228", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9781401202211869, "res": {"Yes": 0.9781401202211869, "No": 0.021859573651827363}, "ground_truth": 0}, {"key": "36855834", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9974863639870275, "res": {"Yes": 0.9974863639870275, "No": 0.0025136050032325695}, "ground_truth": 0}, {"key": "36855834", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9991596380520501, "res": {"Yes": 0.9991596380520501, "No": 0.0008402817547894739}, "ground_truth": 0}, {"key": "36855834", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9978597655748069, "res": {"Yes": 0.9978597655748069, "No": 0.002140182014629584}, "ground_truth": 1}, {"key": "36855834", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9988919511399925, "res": {"Yes": 0.9988919511399925, "No": 0.0011080140613372296}, "ground_truth": 0}, {"key": "36855834", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9923675249008069, "res": {"Yes": 0.9923675249008069, "No": 0.0076324300359045475}, "ground_truth": 0}, {"key": "40548717", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.29930753171692287, "res": {"No": 0.7006923929657707, "Yes": 0.29930753171692287}, "ground_truth": 0}, {"key": "40548717", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9961645454176169, "res": {"Yes": 0.9961645454176169, "No": 0.0038354427539209752}, "ground_truth": 0}, {"key": "40548717", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9793381427692941, "res": {"Yes": 0.9793381427692941, "No": 0.02066182467447686}, "ground_truth": 1}, {"key": "40548717", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9993735397082391, "res": {"Yes": 0.9993735397082391, "No": 0.0006263676869168732}, "ground_truth": 0}, {"key": "40548717", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.31816893344820885, "res": {"No": 0.6818309948404646, "Yes": 0.31816893344820885}, "ground_truth": 0}, {"key": "37051175", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.8197776696997054, "res": {"Yes": 0.8197776696997054, "No": 0.18022213601168094}, "ground_truth": 0}, {"key": "37051175", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998721547728976, "res": {"Yes": 0.9998721547728976, "No": 0.00012783272303609528}, "ground_truth": 0}, {"key": "37051175", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999317434295857, "res": {"Yes": 0.9999317434295857, "No": 6.819965853128814e-05}, "ground_truth": 1}, {"key": "37051175", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999909853566321, "res": {"Yes": 0.9999909853566321, "No": 8.946215180310909e-06}, "ground_truth": 0}, {"key": "37051175", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.999034336203664, "res": {"Yes": 0.999034336203664, "No": 0.0009656143761345449}, "ground_truth": 0}, {"key": "38882119", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9810194567748017, "res": {"Yes": 0.9810194567748017, "No": 0.018980523585712474}, "ground_truth": 0}, {"key": "38882119", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9989589210990557, "res": {"Yes": 0.9989589210990557, "No": 0.0010409890584701282}, "ground_truth": 0}, {"key": "38882119", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9983692371411315, "res": {"Yes": 0.9983692371411315, "No": 0.0016307052321045208}, "ground_truth": 1}, {"key": "38882119", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9776643874084268, "res": {"Yes": 0.9776643874084268, "No": 0.022335614760977212}, "ground_truth": 0}, {"key": "38882119", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9974469803121895, "res": {"Yes": 0.9974469803121895, "No": 0.0025529416409323094}, "ground_truth": 0}, {"key": "19485402", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9161843253534085, "res": {"Yes": 0.9161843253534085, "No": 0.08381565291291902}, "ground_truth": 0}, {"key": "19485402", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9965857482583179, "res": {"Yes": 0.9965857482583179, "No": 0.0034143060945593467}, "ground_truth": 0}, {"key": "19485402", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.996430551481466, "res": {"Yes": 0.996430551481466, "No": 0.0035694527009465494}, "ground_truth": 1}, {"key": "19485402", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9983518876312787, "res": {"Yes": 0.9983518876312787, "No": 0.0016480813266781412}, "ground_truth": 0}, {"key": "19485402", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9642130910425035, "res": {"Yes": 0.9642130910425035, "No": 0.035786852558372564}, "ground_truth": 0}, {"key": "36060907", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9669744285345043, "res": {"Yes": 0.9669744285345043, "No": 0.03302512313637759}, "ground_truth": 0}, {"key": "36060907", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9983768428467874, "res": {"Yes": 0.9983768428467874, "No": 0.001623158559344576}, "ground_truth": 0}, {"key": "36060907", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9902117330035896, "res": {"Yes": 0.9902117330035896, "No": 0.009788050245864923}, "ground_truth": 1}, {"key": "36060907", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9991415527768257, "res": {"Yes": 0.9991415527768257, "No": 0.0008583342204509449}, "ground_truth": 0}, {"key": "36060907", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9889131699673499, "res": {"Yes": 0.9889131699673499, "No": 0.011086574579925963}, "ground_truth": 0}, {"key": "24037309", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.004625509740170298, "res": {"No": 0.9953744813147298, "Yes": 0.004625509740170298}, "ground_truth": 0}, {"key": "24037309", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9930821747824666, "res": {"Yes": 0.9930821747824666, "No": 0.006917844820396276}, "ground_truth": 0}, {"key": "24037309", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9621637543361858, "res": {"Yes": 0.9621637543361858, "No": 0.037835850496305945}, "ground_truth": 1}, {"key": "24037309", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996295752242045, "res": {"Yes": 0.9996295752242045, "No": 0.0003703583157489264}, "ground_truth": 0}, {"key": "24037309", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.5453158777585867, "res": {"Yes": 0.5453158777585867, "No": 0.45468393489214465}, "ground_truth": 0}, {"key": "35605805", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9917377297360056, "res": {"Yes": 0.9917377297360056, "No": 0.008262186640750598}, "ground_truth": 0}, {"key": "35605805", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9986228581313557, "res": {"Yes": 0.9986228581313557, "No": 0.001377171253315693}, "ground_truth": 0}, {"key": "35605805", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.989213237894236, "res": {"Yes": 0.989213237894236, "No": 0.010786654536634879}, "ground_truth": 1}, {"key": "35605805", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9980621762096654, "res": {"Yes": 0.9980621762096654, "No": 0.0019378063366422696}, "ground_truth": 0}, {"key": "35605805", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9197756107927884, "res": {"Yes": 0.9197756107927884, "No": 0.08022420513353198}, "ground_truth": 0}, {"key": "17706248", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9722163324854616, "res": {"Yes": 0.9722163324854616, "No": 0.027783560003289482}, "ground_truth": 0}, {"key": "17706248", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9972257237367412, "res": {"Yes": 0.9972257237367412, "No": 0.0027742895016717784}, "ground_truth": 0}, {"key": "17706248", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996902072756787, "res": {"Yes": 0.9996902072756787, "No": 0.00030973979001996775}, "ground_truth": 1}, {"key": "17706248", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9992896085475726, "res": {"Yes": 0.9992896085475726, "No": 0.000710385057806943}, "ground_truth": 0}, {"key": "17706248", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9992108135715456, "res": {"Yes": 0.9992108135715456, "No": 0.0007891319611205087}, "ground_truth": 0}, {"key": "36883559", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.7405204716085773, "res": {"Yes": 0.7405204716085773, "No": 0.2594795201278058}, "ground_truth": 0}, {"key": "36883559", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996451742655459, "res": {"Yes": 0.9996451742655459, "No": 0.0003547004952270582}, "ground_truth": 0}, {"key": "36883559", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999433054571963, "res": {"Yes": 0.9999433054571963, "No": 5.667757092532857e-05}, "ground_truth": 1}, {"key": "36883559", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9958997520652915, "res": {"Yes": 0.9958997520652915, "No": 0.004100245618971549}, "ground_truth": 0}, {"key": "36883559", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9618809842171531, "res": {"Yes": 0.9618809842171531, "No": 0.038118921226741306}, "ground_truth": 0}, {"key": "32799471", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.19027809862100759, "res": {"No": 0.8097217895550487, "Yes": 0.19027809862100759}, "ground_truth": 0}, {"key": "32799471", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9942017268489195, "res": {"Yes": 0.9942017268489195, "No": 0.005798192777907307}, "ground_truth": 0}, {"key": "32799471", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9956797676692799, "res": {"Yes": 0.9956797676692799, "No": 0.00432024701051839}, "ground_truth": 1}, {"key": "32799471", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9940529349168694, "res": {"Yes": 0.9940529349168694, "No": 0.005947020150002563}, "ground_truth": 0}, {"key": "32799471", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9820659686820921, "res": {"Yes": 0.9820659686820921, "No": 0.017933998680995263}, "ground_truth": 0}, {"key": "34797243", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9754928583115988, "res": {"Yes": 0.9754928583115988, "No": 0.02450712224010078}, "ground_truth": 0}, {"key": "34797243", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.999726900318509, "res": {"Yes": 0.999726900318509, "No": 0.00027308457833768703}, "ground_truth": 0}, {"key": "34797243", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999850252451228, "res": {"Yes": 0.9999850252451228, "No": 1.4890263723121254e-05}, "ground_truth": 1}, {"key": "34797243", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999012298380936, "res": {"Yes": 0.9999012298380936, "No": 9.866335076347488e-05}, "ground_truth": 0}, {"key": "34797243", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998180483552087, "res": {"Yes": 0.9998180483552087, "No": 0.00018187807735803982}, "ground_truth": 0}, {"key": "32154876", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.8670363039588411, "res": {"Yes": 0.8670363039588411, "No": 0.1329633171108371}, "ground_truth": 0}, {"key": "32154876", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9966469523619423, "res": {"Yes": 0.9966469523619423, "No": 0.003353098953513073}, "ground_truth": 0}, {"key": "32154876", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994960649679326, "res": {"Yes": 0.9994960649679326, "No": 0.0005038482129670664}, "ground_truth": 1}, {"key": "32154876", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999826391131764, "res": {"Yes": 0.999826391131764, "No": 0.00017351170445795058}, "ground_truth": 0}, {"key": "32154876", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998511778251187, "res": {"Yes": 0.9998511778251187, "No": 0.00014878518554962237}, "ground_truth": 0}, {"key": "37962274", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9931631846932183, "res": {"Yes": 0.9931631846932183, "No": 0.006836789697459596}, "ground_truth": 0}, {"key": "37962274", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997680005260411, "res": {"Yes": 0.9997680005260411, "No": 0.00023187873892236614}, "ground_truth": 0}, {"key": "37962274", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9993873514158292, "res": {"Yes": 0.9993873514158292, "No": 0.0006125996101204102}, "ground_truth": 1}, {"key": "37962274", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998293707088336, "res": {"Yes": 0.9998293707088336, "No": 0.00017050959281138896}, "ground_truth": 0}, {"key": "37962274", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9991904643509335, "res": {"Yes": 0.9991904643509335, "No": 0.000809433449569074}, "ground_truth": 0}, {"key": "35574030", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.5268836988971429, "res": {"Yes": 0.5268836988971429, "No": 0.4731159656652258}, "ground_truth": 0}, {"key": "35574030", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9726678169113505, "res": {"Yes": 0.9726678169113505, "No": 0.027332030121619062}, "ground_truth": 0}, {"key": "35574030", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9534859874113412, "res": {"Yes": 0.9534859874113412, "No": 0.04651386680428517}, "ground_truth": 1}, {"key": "35574030", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8057506996222235, "res": {"Yes": 0.8057506996222235, "No": 0.19424889078407034}, "ground_truth": 0}, {"key": "35574030", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9116887354611202, "res": {"Yes": 0.9116887354611202, "No": 0.08831096501292354}, "ground_truth": 0}, {"key": "39105949", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.413483487008084, "res": {"No": 0.5865161764142468, "Yes": 0.413483487008084}, "ground_truth": 0}, {"key": "39105949", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9607413182575755, "res": {"Yes": 0.9607413182575755, "No": 0.03925807984561148}, "ground_truth": 0}, {"key": "39105949", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8856952758363563, "res": {"Yes": 0.8856952758363563, "No": 0.1143043672785632}, "ground_truth": 1}, {"key": "39105949", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9955867730328135, "res": {"Yes": 0.9955867730328135, "No": 0.004413151142078024}, "ground_truth": 0}, {"key": "39105949", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9798430815554686, "res": {"Yes": 0.9798430815554686, "No": 0.020156811661451206}, "ground_truth": 0}, {"key": "41064322", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.7923788544207392, "res": {"Yes": 0.7923788544207392, "No": 0.20762107821852108}, "ground_truth": 0}, {"key": "41064322", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9573280847506154, "res": {"Yes": 0.9573280847506154, "No": 0.04267182980488006}, "ground_truth": 0}, {"key": "41064322", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9676407757614301, "res": {"Yes": 0.9676407757614301, "No": 0.032359135572130014}, "ground_truth": 1}, {"key": "41064322", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.792118917563577, "res": {"Yes": 0.792118917563577, "No": 0.20788099550683697}, "ground_truth": 0}, {"key": "41064322", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9844393674011337, "res": {"Yes": 0.9844393674011337, "No": 0.015560581336067593}, "ground_truth": 0}, {"key": "28105101", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9767523071320902, "res": {"Yes": 0.9767523071320902, "No": 0.023247285650016346}, "ground_truth": 0}, {"key": "28105101", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999689331225854, "res": {"Yes": 0.9999689331225854, "No": 3.100683171052088e-05}, "ground_truth": 0}, {"key": "28105101", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9990342170688264, "res": {"Yes": 0.9990342170688264, "No": 0.0009657799932013091}, "ground_truth": 1}, {"key": "28105101", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9982370045055287, "res": {"Yes": 0.9982370045055287, "No": 0.0017629988991239136}, "ground_truth": 0}, {"key": "28105101", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.998389075234478, "res": {"Yes": 0.998389075234478, "No": 0.0016109401923925318}, "ground_truth": 0}, {"key": "36036068", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.8998496186570814, "res": {"Yes": 0.8998496186570814, "No": 0.10015019197474553}, "ground_truth": 0}, {"key": "36036068", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9992955609737547, "res": {"Yes": 0.9992955609737547, "No": 0.0007044038610680437}, "ground_truth": 0}, {"key": "36036068", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9991690399386889, "res": {"Yes": 0.9991690399386889, "No": 0.0008308867948549682}, "ground_truth": 1}, {"key": "36036068", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999288298279897, "res": {"Yes": 0.999288298279897, "No": 0.0007116858256812955}, "ground_truth": 0}, {"key": "36036068", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9926251554120943, "res": {"Yes": 0.9926251554120943, "No": 0.007374757195074357}, "ground_truth": 0}, {"key": "37991460", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.36340146196843287, "res": {"No": 0.6365983729378757, "Yes": 0.36340146196843287}, "ground_truth": 0}, {"key": "37991460", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9777846123236769, "res": {"Yes": 0.9777846123236769, "No": 0.02221538971214987}, "ground_truth": 0}, {"key": "37991460", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999633307373339, "res": {"Yes": 0.9999633307373339, "No": 3.662846088272338e-05}, "ground_truth": 1}, {"key": "37991460", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9993180601267597, "res": {"Yes": 0.9993180601267597, "No": 0.000681945374070188}, "ground_truth": 0}, {"key": "37991460", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9946545321322056, "res": {"Yes": 0.9946545321322056, "No": 0.005345404816258996}, "ground_truth": 0}, {"key": "38437830", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.11059601735397384, "res": {"No": 0.8894038074455378, "Yes": 0.11059601735397384}, "ground_truth": 0}, {"key": "38437830", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9918669581099278, "res": {"Yes": 0.9918669581099278, "No": 0.008133011547221259}, "ground_truth": 0}, {"key": "38437830", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9971240247632117, "res": {"Yes": 0.9971240247632117, "No": 0.002875907506806056}, "ground_truth": 1}, {"key": "38437830", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9829237092193988, "res": {"Yes": 0.9829237092193988, "No": 0.017076278513307022}, "ground_truth": 0}, {"key": "38437830", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.8725095843827383, "res": {"Yes": 0.8725095843827383, "No": 0.12749045244140805}, "ground_truth": 0}, {"key": "36507138", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.04268086438752658, "res": {"No": 0.9573190160246227, "Yes": 0.04268086438752658}, "ground_truth": 0}, {"key": "36507138", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.1749976559009586, "res": {"No": 0.8250021306869026, "Yes": 0.1749976559009586}, "ground_truth": 0}, {"key": "36507138", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9987299791599461, "res": {"Yes": 0.9987299791599461, "No": 0.00126998854276146}, "ground_truth": 1}, {"key": "36507138", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9989820008130994, "res": {"Yes": 0.9989820008130994, "No": 0.0010179572050487123}, "ground_truth": 0}, {"key": "36507138", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9933434705015168, "res": {"Yes": 0.9933434705015168, "No": 0.006656483079812913}, "ground_truth": 0}, {"key": "37824866", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.6258390497285898, "res": {"Yes": 0.6258390497285898, "No": 0.37416060822171}, "ground_truth": 0}, {"key": "37824866", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.823635912896119, "res": {"Yes": 0.823635912896119, "No": 0.17636379504215133}, "ground_truth": 0}, {"key": "37824866", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9963202545179423, "res": {"Yes": 0.9963202545179423, "No": 0.003679795864290856}, "ground_truth": 1}, {"key": "37824866", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9858097936866141, "res": {"Yes": 0.9858097936866141, "No": 0.014189855089766466}, "ground_truth": 0}, {"key": "37824866", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9986058538259639, "res": {"Yes": 0.9986058538259639, "No": 0.001394066867105147}, "ground_truth": 0}, {"key": "25088134", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9567626145222333, "res": {"Yes": 0.9567626145222333, "No": 0.043237318606911775}, "ground_truth": 0}, {"key": "25088134", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994965415777448, "res": {"Yes": 0.9994965415777448, "No": 0.0005033624856453149}, "ground_truth": 0}, {"key": "25088134", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994890391849617, "res": {"Yes": 0.9994890391849617, "No": 0.0005108953925342767}, "ground_truth": 1}, {"key": "25088134", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999635045582071, "res": {"Yes": 0.999635045582071, "No": 0.000364920294303355}, "ground_truth": 0}, {"key": "25088134", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996680461788657, "res": {"Yes": 0.9996680461788657, "No": 0.00033189231303222517}, "ground_truth": 0}, {"key": "40172531", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9915755988300398, "res": {"Yes": 0.9915755988300398, "No": 0.008424346268479984}, "ground_truth": 0}, {"key": "40172531", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.996005227716468, "res": {"Yes": 0.996005227716468, "No": 0.003994793862375196}, "ground_truth": 0}, {"key": "40172531", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9984689298855889, "res": {"Yes": 0.9984689298855889, "No": 0.0015310426577856597}, "ground_truth": 1}, {"key": "40172531", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996653053927408, "res": {"Yes": 0.9996653053927408, "No": 0.0003346079533396274}, "ground_truth": 0}, {"key": "40172531", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.7777912398905408, "res": {"Yes": 0.7777912398905408, "No": 0.22220851081775086}, "ground_truth": 0}, {"key": "37035874", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.4522551487629283, "res": {"No": 0.547744632353769, "Yes": 0.4522551487629283}, "ground_truth": 0}, {"key": "37035874", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997133185409849, "res": {"Yes": 0.9997133185409849, "No": 0.00028662552073681905}, "ground_truth": 0}, {"key": "37035874", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995966987678144, "res": {"Yes": 0.9995966987678144, "No": 0.0004032632433341595}, "ground_truth": 1}, {"key": "37035874", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9987891885244617, "res": {"Yes": 0.9987891885244617, "No": 0.001210757236288067}, "ground_truth": 0}, {"key": "37035874", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9983796917191731, "res": {"Yes": 0.9983796917191731, "No": 0.0016202864895044939}, "ground_truth": 0}, {"key": "36404465", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9623691789807078, "res": {"Yes": 0.9623691789807078, "No": 0.037630725301788494}, "ground_truth": 0}, {"key": "36404465", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9920714163502761, "res": {"Yes": 0.9920714163502761, "No": 0.00792847475834947}, "ground_truth": 0}, {"key": "36404465", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997061720758822, "res": {"Yes": 0.9997061720758822, "No": 0.0002937665951423557}, "ground_truth": 1}, {"key": "36404465", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9991721330111232, "res": {"Yes": 0.9991721330111232, "No": 0.0008278518870122603}, "ground_truth": 0}, {"key": "36404465", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9865166221909918, "res": {"Yes": 0.9865166221909918, "No": 0.013483247365839198}, "ground_truth": 0}, {"key": "39602052", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.009638748523508473, "res": {"No": 0.9903611325659882, "Yes": 0.009638748523508473}, "ground_truth": 0}, {"key": "39602052", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.24610768375204856, "res": {"No": 0.7538921487487246, "Yes": 0.24610768375204856}, "ground_truth": 0}, {"key": "39602052", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9966924245194668, "res": {"Yes": 0.9966924245194668, "No": 0.003307517404028642}, "ground_truth": 1}, {"key": "39602052", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9743150750229091, "res": {"Yes": 0.9743150750229091, "No": 0.025684841140382008}, "ground_truth": 0}, {"key": "39602052", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.5905263624835075, "res": {"Yes": 0.5905263624835075, "No": 0.40947341435768736}, "ground_truth": 0}, {"key": "33792789", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9901086474262794, "res": {"Yes": 0.9901086474262794, "No": 0.009891222809395387}, "ground_truth": 0}, {"key": "33792789", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9974265883155969, "res": {"Yes": 0.9974265883155969, "No": 0.002573372457596313}, "ground_truth": 0}, {"key": "33792789", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998825168816821, "res": {"Yes": 0.9998825168816821, "No": 0.00011745973076670712}, "ground_truth": 1}, {"key": "33792789", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9993061553216205, "res": {"Yes": 0.9993061553216205, "No": 0.00069376033202883}, "ground_truth": 0}, {"key": "33792789", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9966248058170832, "res": {"Yes": 0.9966248058170832, "No": 0.003375175351524672}, "ground_truth": 0}, {"key": "32776626", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9590021070199668, "res": {"Yes": 0.9590021070199668, "No": 0.040997823461165254}, "ground_truth": 0}, {"key": "32776626", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9869465421212676, "res": {"Yes": 0.9869465421212676, "No": 0.013053426664842868}, "ground_truth": 0}, {"key": "32776626", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9757202439050429, "res": {"Yes": 0.9757202439050429, "No": 0.024279713808248282}, "ground_truth": 1}, {"key": "32776626", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9938573082487093, "res": {"Yes": 0.9938573082487093, "No": 0.006142668635862033}, "ground_truth": 0}, {"key": "32776626", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9443267171869422, "res": {"Yes": 0.9443267171869422, "No": 0.05567312744956095}, "ground_truth": 0}, {"key": "37195090", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.8543159062099022, "res": {"Yes": 0.8543159062099022, "No": 0.14568363382354652}, "ground_truth": 0}, {"key": "37195090", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9864553831129388, "res": {"Yes": 0.9864553831129388, "No": 0.013544577449254686}, "ground_truth": 0}, {"key": "37195090", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9948945961027261, "res": {"Yes": 0.9948945961027261, "No": 0.00510533759136281}, "ground_truth": 1}, {"key": "37195090", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9934967623778042, "res": {"Yes": 0.9934967623778042, "No": 0.006503212457099056}, "ground_truth": 0}, {"key": "37195090", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9856538863961908, "res": {"Yes": 0.9856538863961908, "No": 0.014346030431884949}, "ground_truth": 0}, {"key": "33981824", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9643414392391361, "res": {"Yes": 0.9643414392391361, "No": 0.03565846902593635}, "ground_truth": 0}, {"key": "33981824", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997767004150644, "res": {"Yes": 0.9997767004150644, "No": 0.00022327329472010492}, "ground_truth": 0}, {"key": "33981824", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9929512163366561, "res": {"Yes": 0.9929512163366561, "No": 0.0070487830082660505}, "ground_truth": 1}, {"key": "33981824", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9891987667996279, "res": {"Yes": 0.9891987667996279, "No": 0.01080111402185302}, "ground_truth": 0}, {"key": "33981824", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9988664994975569, "res": {"Yes": 0.9988664994975569, "No": 0.0011334439929006406}, "ground_truth": 0}, {"key": "39569142", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9192122920360128, "res": {"Yes": 0.9192122920360128, "No": 0.08078725185294057}, "ground_truth": 0}, {"key": "39569142", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.8820195567810701, "res": {"Yes": 0.8820195567810701, "No": 0.11797994813685377}, "ground_truth": 0}, {"key": "39569142", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9813319582914822, "res": {"Yes": 0.9813319582914822, "No": 0.018667765734769703}, "ground_truth": 1}, {"key": "39569142", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.981181942747922, "res": {"Yes": 0.981181942747922, "No": 0.018818049310705947}, "ground_truth": 0}, {"key": "39569142", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.7961083613927313, "res": {"Yes": 0.7961083613927313, "No": 0.20389131433081226}, "ground_truth": 0}, {"key": "40268210", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.20525946853422342, "res": {"No": 0.7947404465196465, "Yes": 0.20525946853422342}, "ground_truth": 0}, {"key": "40268210", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9952078541466278, "res": {"Yes": 0.9952078541466278, "No": 0.0047920978448646985}, "ground_truth": 0}, {"key": "40268210", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9982061216266351, "res": {"Yes": 0.9982061216266351, "No": 0.0017938870090882693}, "ground_truth": 1}, {"key": "40268210", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9991006120919728, "res": {"Yes": 0.9991006120919728, "No": 0.0008993354106740372}, "ground_truth": 0}, {"key": "40268210", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9985968253713925, "res": {"Yes": 0.9985968253713925, "No": 0.0014030918684477657}, "ground_truth": 0}, {"key": "34925159", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.06149685768671463, "res": {"No": 0.938502921513353, "Yes": 0.06149685768671463}, "ground_truth": 0}, {"key": "34925159", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.2792222833090047, "res": {"No": 0.7207774917049554, "Yes": 0.2792222833090047}, "ground_truth": 0}, {"key": "34925159", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9373006535136228, "res": {"Yes": 0.9373006535136228, "No": 0.06269924956971337}, "ground_truth": 1}, {"key": "34925159", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9803295189581128, "res": {"Yes": 0.9803295189581128, "No": 0.019670520211130478}, "ground_truth": 0}, {"key": "34925159", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9325435837038232, "res": {"Yes": 0.9325435837038232, "No": 0.06745614472722548}, "ground_truth": 0}, {"key": "36181903", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9623792290545203, "res": {"Yes": 0.9623792290545203, "No": 0.03762064161868152}, "ground_truth": 0}, {"key": "36181903", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9973532953152336, "res": {"Yes": 0.9973532953152336, "No": 0.0026466952306828514}, "ground_truth": 0}, {"key": "36181903", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9946777585813648, "res": {"Yes": 0.9946777585813648, "No": 0.005322274124262097}, "ground_truth": 1}, {"key": "36181903", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9784761269910933, "res": {"Yes": 0.9784761269910933, "No": 0.02152379955556158}, "ground_truth": 0}, {"key": "36181903", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.5118832742062259, "res": {"Yes": 0.5118832742062259, "No": 0.488116471743985}, "ground_truth": 0}, {"key": "38620559", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9150782978013873, "res": {"Yes": 0.9150782978013873, "No": 0.0849216000902047}, "ground_truth": 0}, {"key": "38620559", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.44686609187423554, "res": {"No": 0.5531337190997514, "Yes": 0.44686609187423554}, "ground_truth": 0}, {"key": "38620559", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9992610424636994, "res": {"Yes": 0.9992610424636994, "No": 0.0007389386190041574}, "ground_truth": 1}, {"key": "38620559", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997282111812746, "res": {"Yes": 0.9997282111812746, "No": 0.0002716997395843616}, "ground_truth": 0}, {"key": "38620559", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9992024832356782, "res": {"Yes": 0.9992024832356782, "No": 0.0007974319947356295}, "ground_truth": 0}, {"key": "32719657", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.13419600820582875, "res": {"No": 0.8658038735755768, "Yes": 0.13419600820582875}, "ground_truth": 0}, {"key": "32719657", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.7873088404773515, "res": {"Yes": 0.7873088404773515, "No": 0.2126909087105405}, "ground_truth": 0}, {"key": "32719657", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9787890741214323, "res": {"Yes": 0.9787890741214323, "No": 0.021210952571091648}, "ground_truth": 1}, {"key": "32719657", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9680376285423202, "res": {"Yes": 0.9680376285423202, "No": 0.03196232223673027}, "ground_truth": 0}, {"key": "32719657", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.7071401222552652, "res": {"Yes": 0.7071401222552652, "No": 0.29285988281356634}, "ground_truth": 0}, {"key": "37530914", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.1821588446040713, "res": {"No": 0.8178411454130203, "Yes": 0.1821588446040713}, "ground_truth": 0}, {"key": "37530914", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9990762232846645, "res": {"Yes": 0.9990762232846645, "No": 0.0009236830173457248}, "ground_truth": 0}, {"key": "37530914", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998831128318374, "res": {"Yes": 0.9998831128318374, "No": 0.00011685952331009885}, "ground_truth": 1}, {"key": "37530914", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9976161353291574, "res": {"Yes": 0.9976161353291574, "No": 0.0023838143646210436}, "ground_truth": 0}, {"key": "37530914", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9972361588610339, "res": {"Yes": 0.9972361588610339, "No": 0.0027637978845741658}, "ground_truth": 0}, {"key": "33306933", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.7116639068198044, "res": {"Yes": 0.7116639068198044, "No": 0.28833565751649654}, "ground_truth": 0}, {"key": "33306933", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9409118346858526, "res": {"Yes": 0.9409118346858526, "No": 0.05908772942910007}, "ground_truth": 0}, {"key": "33306933", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.986459907007699, "res": {"Yes": 0.986459907007699, "No": 0.013539937871825568}, "ground_truth": 1}, {"key": "33306933", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9863156221513047, "res": {"Yes": 0.9863156221513047, "No": 0.013684277950407832}, "ground_truth": 0}, {"key": "33306933", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.8799004525596307, "res": {"Yes": 0.8799004525596307, "No": 0.12009936946418868}, "ground_truth": 0}, {"key": "33837212", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9886258810062017, "res": {"Yes": 0.9886258810062017, "No": 0.011373968495172925}, "ground_truth": 0}, {"key": "33837212", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.8232245600957299, "res": {"Yes": 0.8232245600957299, "No": 0.1767753962401628}, "ground_truth": 0}, {"key": "33837212", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9957411099044076, "res": {"Yes": 0.9957411099044076, "No": 0.004258919454728514}, "ground_truth": 1}, {"key": "33837212", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999036136754603, "res": {"Yes": 0.9999036136754603, "No": 9.631157972728101e-05}, "ground_truth": 0}, {"key": "33837212", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.3187369259430378, "res": {"No": 0.6812630549220258, "Yes": 0.3187369259430378}, "ground_truth": 0}, {"key": "40945179", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9525455610517992, "res": {"Yes": 0.9525455610517992, "No": 0.04745430646920973}, "ground_truth": 0}, {"key": "40945179", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997329780567067, "res": {"Yes": 0.9997329780567067, "No": 0.0002669383033129347}, "ground_truth": 0}, {"key": "40945179", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9782740309003796, "res": {"Yes": 0.9782740309003796, "No": 0.02172588656821089}, "ground_truth": 1}, {"key": "40945179", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9971696630773884, "res": {"Yes": 0.9971696630773884, "No": 0.0028303488680369577}, "ground_truth": 0}, {"key": "40945179", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.998359846524251, "res": {"Yes": 0.998359846524251, "No": 0.0016401419270603813}, "ground_truth": 0}, {"key": "34152358", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.6793947436762244, "res": {"Yes": 0.6793947436762244, "No": 0.3206052062036942}, "ground_truth": 0}, {"key": "34152358", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994117552138446, "res": {"Yes": 0.9994117552138446, "No": 0.0005881485534808627}, "ground_truth": 0}, {"key": "34152358", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998928864370674, "res": {"Yes": 0.9998928864370674, "No": 0.00010709599244766285}, "ground_truth": 1}, {"key": "34152358", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999126723232835, "res": {"Yes": 0.9999126723232835, "No": 8.727565499899166e-05}, "ground_truth": 0}, {"key": "34152358", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998737042159841, "res": {"Yes": 0.9998737042159841, "No": 0.00012627758735202942}, "ground_truth": 0}, {"key": "34136541", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.10015607096356108, "res": {"No": 0.899843936124682, "Yes": 0.10015607096356108}, "ground_truth": 0}, {"key": "34136541", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9974877874010846, "res": {"Yes": 0.9974877874010846, "No": 0.0025122286787131734}, "ground_truth": 0}, {"key": "34136541", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998204320042628, "res": {"Yes": 0.9998204320042628, "No": 0.00017950814051603635}, "ground_truth": 1}, {"key": "34136541", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9987896646473814, "res": {"Yes": 0.9987896646473814, "No": 0.0012103513374394847}, "ground_truth": 0}, {"key": "34136541", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.998549508680883, "res": {"Yes": 0.998549508680883, "No": 0.0014505144389331493}, "ground_truth": 0}, {"key": "37469603", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9290410071213733, "res": {"Yes": 0.9290410071213733, "No": 0.07095890303647749}, "ground_truth": 0}, {"key": "37469603", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9712299538985203, "res": {"Yes": 0.9712299538985203, "No": 0.028769896469001716}, "ground_truth": 0}, {"key": "37469603", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9985364445434116, "res": {"Yes": 0.9985364445434116, "No": 0.0014634948072658328}, "ground_truth": 1}, {"key": "37469603", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9985760162284048, "res": {"Yes": 0.9985760162284048, "No": 0.0014239408568013465}, "ground_truth": 0}, {"key": "37469603", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9504011760007458, "res": {"Yes": 0.9504011760007458, "No": 0.04959872754078465}, "ground_truth": 0}, {"key": "37353611", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.023384714419547054, "res": {"No": 0.9766150576811621, "Yes": 0.023384714419547054}, "ground_truth": 0}, {"key": "37353611", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.33968624372469913, "res": {"No": 0.6603132073084824, "Yes": 0.33968624372469913}, "ground_truth": 0}, {"key": "37353611", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9562759700384741, "res": {"Yes": 0.9562759700384741, "No": 0.04372356838287451}, "ground_truth": 1}, {"key": "37353611", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5201113789779129, "res": {"Yes": 0.5201113789779129, "No": 0.47988806509200854}, "ground_truth": 0}, {"key": "37353611", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.01533875725022262, "res": {"No": 0.9846610113079255, "Yes": 0.01533875725022262}, "ground_truth": 0}, {"key": "37211649", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.31545498041606546, "res": {"No": 0.6845449144301093, "Yes": 0.31545498041606546}, "ground_truth": 0}, {"key": "37211649", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9833428739907247, "res": {"Yes": 0.9833428739907247, "No": 0.01665710045012761}, "ground_truth": 0}, {"key": "37211649", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9730326873126788, "res": {"Yes": 0.9730326873126788, "No": 0.02696719909061972}, "ground_truth": 1}, {"key": "37211649", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.990848837108322, "res": {"Yes": 0.990848837108322, "No": 0.00915112917687518}, "ground_truth": 0}, {"key": "37211649", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9774845039351863, "res": {"Yes": 0.9774845039351863, "No": 0.02251549815503285}, "ground_truth": 0}, {"key": "37320976", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9972445770656001, "res": {"Yes": 0.9972445770656001, "No": 0.0027554089725718013}, "ground_truth": 0}, {"key": "37320976", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998641691758419, "res": {"Yes": 0.9998641691758419, "No": 0.00013581987481186113}, "ground_truth": 0}, {"key": "37320976", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997054570662895, "res": {"Yes": 0.9997054570662895, "No": 0.000294529320369459}, "ground_truth": 1}, {"key": "37320976", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999273332003598, "res": {"Yes": 0.9999273332003598, "No": 7.26519011262951e-05}, "ground_truth": 0}, {"key": "37320976", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9989257278439243, "res": {"Yes": 0.9989257278439243, "No": 0.001074239295484209}, "ground_truth": 0}, {"key": "34492412", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.8947493149224, "res": {"Yes": 0.8947493149224, "No": 0.10525036905131481}, "ground_truth": 0}, {"key": "34492412", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9639865989233628, "res": {"Yes": 0.9639865989233628, "No": 0.03601328791067494}, "ground_truth": 0}, {"key": "34492412", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9982080219134376, "res": {"Yes": 0.9982080219134376, "No": 0.001791893494622617}, "ground_truth": 1}, {"key": "34492412", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9958984497280372, "res": {"Yes": 0.9958984497280372, "No": 0.004101524685040089}, "ground_truth": 0}, {"key": "34492412", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9901090008951297, "res": {"Yes": 0.9901090008951297, "No": 0.00989096366275262}, "ground_truth": 0}, {"key": "36655016", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.7674180119866897, "res": {"Yes": 0.7674180119866897, "No": 0.23258161111450787}, "ground_truth": 0}, {"key": "36655016", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9919835475161213, "res": {"Yes": 0.9919835475161213, "No": 0.008016361807436222}, "ground_truth": 0}, {"key": "36655016", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9917750118414919, "res": {"Yes": 0.9917750118414919, "No": 0.008224950504818945}, "ground_truth": 1}, {"key": "36655016", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9838593196363487, "res": {"Yes": 0.9838593196363487, "No": 0.016140403765555304}, "ground_truth": 0}, {"key": "36655016", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.7324294183023432, "res": {"Yes": 0.7324294183023432, "No": 0.26757028486478246}, "ground_truth": 0}, {"key": "35220773", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.18243690349745134, "res": {"No": 0.8175630367575022, "Yes": 0.18243690349745134}, "ground_truth": 0}, {"key": "35220773", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9818980271752696, "res": {"Yes": 0.9818980271752696, "No": 0.018101997277045912}, "ground_truth": 0}, {"key": "35220773", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997321438498659, "res": {"Yes": 0.9997321438498659, "No": 0.0002678337783266846}, "ground_truth": 1}, {"key": "35220773", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9977244659619081, "res": {"Yes": 0.9977244659619081, "No": 0.0022755138134819206}, "ground_truth": 0}, {"key": "35220773", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9989610608713564, "res": {"Yes": 0.9989610608713564, "No": 0.00103886448881055}, "ground_truth": 0}, {"key": "31569808", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9964359932028529, "res": {"Yes": 0.9964359932028529, "No": 0.0035640287593634527}, "ground_truth": 0}, {"key": "31569808", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997394133585984, "res": {"Yes": 0.9997394133585984, "No": 0.0002605517656514708}, "ground_truth": 0}, {"key": "31569808", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9984434999047163, "res": {"Yes": 0.9984434999047163, "No": 0.0015564308432569785}, "ground_truth": 1}, {"key": "31569808", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9992143868257488, "res": {"Yes": 0.9992143868257488, "No": 0.0007855157889196643}, "ground_truth": 0}, {"key": "31569808", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9945159704935578, "res": {"Yes": 0.9945159704935578, "No": 0.005483964448890101}, "ground_truth": 0}, {"key": "37696256", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.7775233525077951, "res": {"Yes": 0.7775233525077951, "No": 0.22247627773081263}, "ground_truth": 0}, {"key": "37696256", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9951967536601302, "res": {"Yes": 0.9951967536601302, "No": 0.004803209205462331}, "ground_truth": 0}, {"key": "37696256", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.996795584134893, "res": {"Yes": 0.996795584134893, "No": 0.003204436574023263}, "ground_truth": 1}, {"key": "37696256", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9938783875894422, "res": {"Yes": 0.9938783875894422, "No": 0.006121537429721967}, "ground_truth": 0}, {"key": "37696256", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9953339701986763, "res": {"Yes": 0.9953339701986763, "No": 0.004666028487803569}, "ground_truth": 0}, {"key": "36874328", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.003733366147047878, "res": {"No": 0.9962664076530315, "Yes": 0.003733366147047878}, "ground_truth": 0}, {"key": "36874328", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9668335622254746, "res": {"Yes": 0.9668335622254746, "No": 0.03316599416909368}, "ground_truth": 0}, {"key": "36874328", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9988530594396805, "res": {"Yes": 0.9988530594396805, "No": 0.001146876292209264}, "ground_truth": 1}, {"key": "36874328", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9988994453549686, "res": {"Yes": 0.9988994453549686, "No": 0.0011004996225260973}, "ground_truth": 0}, {"key": "36874328", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.44384601045791694, "res": {"No": 0.5561536077256621, "Yes": 0.44384601045791694}, "ground_truth": 0}, {"key": "24532377", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9997925474212627, "res": {"Yes": 0.9997925474212627, "No": 0.00020736790985666245}, "ground_truth": 0}, {"key": "24532377", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995018993933162, "res": {"Yes": 0.9995018993933162, "No": 0.0004980759412975842}, "ground_truth": 0}, {"key": "24532377", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995429756974372, "res": {"Yes": 0.9995429756974372, "No": 0.00045694295491164846}, "ground_truth": 1}, {"key": "24532377", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994696264977764, "res": {"Yes": 0.9994696264977764, "No": 0.0005303155241346012}, "ground_truth": 0}, {"key": "24532377", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9957872088061568, "res": {"Yes": 0.9957872088061568, "No": 0.004212773751061182}, "ground_truth": 0}, {"key": "39560618", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.017359910939087286, "res": {"No": 0.9826400085993756, "Yes": 0.017359910939087286}, "ground_truth": 0}, {"key": "39560618", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996622108036213, "res": {"Yes": 0.9996622108036213, "No": 0.00033777576493229585}, "ground_truth": 0}, {"key": "39560618", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999016856003053, "res": {"Yes": 0.999016856003053, "No": 0.0009831218384242547}, "ground_truth": 1}, {"key": "39560618", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998933632061071, "res": {"Yes": 0.9998933632061071, "No": 0.00010661866156165802}, "ground_truth": 0}, {"key": "39560618", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9992486654932272, "res": {"Yes": 0.9992486654932272, "No": 0.0007513278877136084}, "ground_truth": 0}, {"key": "34922693", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.579367724637986, "res": {"Yes": 0.579367724637986, "No": 0.4206320154875393}, "ground_truth": 0}, {"key": "34922693", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998350915388954, "res": {"Yes": 0.9998350915388954, "No": 0.00016481762473495332}, "ground_truth": 0}, {"key": "34922693", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.960654630636608, "res": {"Yes": 0.960654630636608, "No": 0.03934523516557856}, "ground_truth": 1}, {"key": "34922693", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9986171515173381, "res": {"Yes": 0.9986171515173381, "No": 0.0013827643932433984}, "ground_truth": 0}, {"key": "34922693", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999039712510555, "res": {"Yes": 0.9999039712510555, "No": 9.591376993407409e-05}, "ground_truth": 0}, {"key": "33629577", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.04968510942133688, "res": {"No": 0.9503147152859847, "Yes": 0.04968510942133688}, "ground_truth": 0}, {"key": "33629577", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9748351258265072, "res": {"Yes": 0.9748351258265072, "No": 0.025164797139557375}, "ground_truth": 0}, {"key": "33629577", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9990342170688264, "res": {"Yes": 0.9990342170688264, "No": 0.000965702637321998}, "ground_truth": 1}, {"key": "33629577", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9992667599919336, "res": {"Yes": 0.9992667599919336, "No": 0.0007332007639938624}, "ground_truth": 0}, {"key": "33629577", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9954604699556003, "res": {"Yes": 0.9954604699556003, "No": 0.004539499569439621}, "ground_truth": 0}, {"key": "32284359", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.8832682152254743, "res": {"Yes": 0.8832682152254743, "No": 0.1167317513503974}, "ground_truth": 0}, {"key": "32284359", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9991432202455545, "res": {"Yes": 0.9991432202455545, "No": 0.0008567229247371165}, "ground_truth": 0}, {"key": "32284359", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998449838462458, "res": {"Yes": 0.9998449838462458, "No": 0.00015491948927882337}, "ground_truth": 1}, {"key": "32284359", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996670928458881, "res": {"Yes": 0.9996670928458881, "No": 0.00033279828575734577}, "ground_truth": 0}, {"key": "32284359", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9989734342794782, "res": {"Yes": 0.9989734342794782, "No": 0.0010265393377789128}, "ground_truth": 0}, {"key": "28082962", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9968004408338492, "res": {"Yes": 0.9968004408338492, "No": 0.003199542014141388}, "ground_truth": 0}, {"key": "28082962", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997141527421145, "res": {"Yes": 0.9997141527421145, "No": 0.00028578293609692615}, "ground_truth": 0}, {"key": "28082962", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9984739251382936, "res": {"Yes": 0.9984739251382936, "No": 0.0015260356265947937}, "ground_truth": 1}, {"key": "28082962", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999642843338196, "res": {"Yes": 0.9999642843338196, "No": 3.5627847241524106e-05}, "ground_truth": 0}, {"key": "28082962", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998162643014713, "res": {"Yes": 0.9998162643014713, "No": 0.00018369477670128538}, "ground_truth": 0}, {"key": "24796803", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.705788811564256, "res": {"Yes": 0.705788811564256, "No": 0.2942109358569602}, "ground_truth": 0}, {"key": "24796803", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9944365048808752, "res": {"Yes": 0.9944365048808752, "No": 0.005563465909015577}, "ground_truth": 0}, {"key": "24796803", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997639485445299, "res": {"Yes": 0.9997639485445299, "No": 0.00023596713998140837}, "ground_truth": 1}, {"key": "24796803", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9958083655259421, "res": {"Yes": 0.9958083655259421, "No": 0.00419162412844264}, "ground_truth": 0}, {"key": "24796803", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9958322492801936, "res": {"Yes": 0.9958322492801936, "No": 0.004167759407189739}, "ground_truth": 0}, {"key": "35466150", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.010586020172132789, "res": {"No": 0.9894137964763206, "Yes": 0.010586020172132789}, "ground_truth": 0}, {"key": "35466150", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9981081392288725, "res": {"Yes": 0.9981081392288725, "No": 0.0018918569860082368}, "ground_truth": 0}, {"key": "35466150", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9736877489811655, "res": {"Yes": 0.9736877489811655, "No": 0.026312020262757474}, "ground_truth": 1}, {"key": "35466150", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9943405606072958, "res": {"Yes": 0.9943405606072958, "No": 0.0056593779252892265}, "ground_truth": 0}, {"key": "35466150", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9912217508015783, "res": {"Yes": 0.9912217508015783, "No": 0.0087781772095606}, "ground_truth": 0}, {"key": "35754289", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9663972745640964, "res": {"Yes": 0.9663972745640964, "No": 0.03360263474693263}, "ground_truth": 0}, {"key": "35754289", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9765711686100716, "res": {"Yes": 0.9765711686100716, "No": 0.0234287588628888}, "ground_truth": 0}, {"key": "35754289", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9869765003512809, "res": {"Yes": 0.9869765003512809, "No": 0.013023380980369918}, "ground_truth": 1}, {"key": "35754289", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9983944235192402, "res": {"Yes": 0.9983944235192402, "No": 0.0016055562648257984}, "ground_truth": 0}, {"key": "35754289", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9919327503312841, "res": {"Yes": 0.9919327503312841, "No": 0.008067178387763566}, "ground_truth": 0}, {"key": "36678662", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.7179324680486291, "res": {"Yes": 0.7179324680486291, "No": 0.2820670134295643}, "ground_truth": 0}, {"key": "36678662", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.5941011071779358, "res": {"Yes": 0.5941011071779358, "No": 0.40589819583423364}, "ground_truth": 0}, {"key": "36678662", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7555562847834938, "res": {"Yes": 0.7555562847834938, "No": 0.2444432019296343}, "ground_truth": 1}, {"key": "36678662", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4293376528716963, "res": {"No": 0.5706620505333146, "Yes": 0.4293376528716963}, "ground_truth": 0}, {"key": "36678662", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.6934482940317409, "res": {"Yes": 0.6934482940317409, "No": 0.3065512276657563}, "ground_truth": 0}, {"key": "35399671", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9481561045845535, "res": {"Yes": 0.9481561045845535, "No": 0.05184378067032671}, "ground_truth": 0}, {"key": "35399671", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.8916831558045855, "res": {"Yes": 0.8916831558045855, "No": 0.10831671468452678}, "ground_truth": 0}, {"key": "35399671", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9814590656938313, "res": {"Yes": 0.9814590656938313, "No": 0.01854094035122226}, "ground_truth": 1}, {"key": "35399671", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8432753864574265, "res": {"Yes": 0.8432753864574265, "No": 0.1567244987663709}, "ground_truth": 0}, {"key": "35399671", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9540974079278608, "res": {"Yes": 0.9540974079278608, "No": 0.04590257790431712}, "ground_truth": 0}, {"key": "36888180", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9925791121724836, "res": {"Yes": 0.9925791121724836, "No": 0.007420862415891926}, "ground_truth": 0}, {"key": "36888180", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9918450217269276, "res": {"Yes": 0.9918450217269276, "No": 0.008154861716096645}, "ground_truth": 0}, {"key": "36888180", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9987168982798353, "res": {"Yes": 0.9987168982798353, "No": 0.0012830705584064063}, "ground_truth": 1}, {"key": "36888180", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9898353805031398, "res": {"Yes": 0.9898353805031398, "No": 0.010164551041196977}, "ground_truth": 0}, {"key": "36888180", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9968190415029808, "res": {"Yes": 0.9968190415029808, "No": 0.0031809693550779406}, "ground_truth": 0}, {"key": "28061069", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.8852682080005075, "res": {"Yes": 0.8852682080005075, "No": 0.11473141545236994}, "ground_truth": 0}, {"key": "28061069", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9936674158639673, "res": {"Yes": 0.9936674158639673, "No": 0.006332580271838834}, "ground_truth": 0}, {"key": "28061069", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9986586456210642, "res": {"Yes": 0.9986586456210642, "No": 0.0013413764375751376}, "ground_truth": 1}, {"key": "28061069", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995320213361899, "res": {"Yes": 0.9995320213361899, "No": 0.00046793460720685536}, "ground_truth": 0}, {"key": "28061069", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9963092458377418, "res": {"Yes": 0.9963092458377418, "No": 0.0036908053230965787}, "ground_truth": 0}, {"key": "22259982", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9883356245085284, "res": {"Yes": 0.9883356245085284, "No": 0.011664234492098564}, "ground_truth": 0}, {"key": "22259982", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9985912344431378, "res": {"Yes": 0.9985912344431378, "No": 0.0014087425232345845}, "ground_truth": 0}, {"key": "22259982", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999023640249656, "res": {"Yes": 0.999023640249656, "No": 0.0009763709226995271}, "ground_truth": 1}, {"key": "22259982", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9972682903277076, "res": {"Yes": 0.9972682903277076, "No": 0.0027316859398844075}, "ground_truth": 0}, {"key": "22259982", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9972793133944942, "res": {"Yes": 0.9972793133944942, "No": 0.0027207355838398746}, "ground_truth": 0}, {"key": "34026805", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9478657733927245, "res": {"Yes": 0.9478657733927245, "No": 0.05213406005303346}, "ground_truth": 0}, {"key": "34026805", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9586702509452251, "res": {"Yes": 0.9586702509452251, "No": 0.04132965904785599}, "ground_truth": 0}, {"key": "34026805", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9836573166715252, "res": {"Yes": 0.9836573166715252, "No": 0.016342454281550685}, "ground_truth": 1}, {"key": "34026805", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9977465438531599, "res": {"Yes": 0.9977465438531599, "No": 0.00225345924769944}, "ground_truth": 0}, {"key": "34026805", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9079641622930611, "res": {"Yes": 0.9079641622930611, "No": 0.0920357123074044}, "ground_truth": 0}, {"key": "36713809", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9997034311852276, "res": {"Yes": 0.9997034311852276, "No": 0.000296535682816589}, "ground_truth": 0}, {"key": "36713809", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999021833683587, "res": {"Yes": 0.9999021833683587, "No": 9.77665562538442e-05}, "ground_truth": 0}, {"key": "36713809", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999818286721858, "res": {"Yes": 0.999818286721858, "No": 0.00018163543212649608}, "ground_truth": 1}, {"key": "36713809", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995932432479673, "res": {"Yes": 0.9995932432479673, "No": 0.0004067148224723817}, "ground_truth": 0}, {"key": "36713809", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997234480175513, "res": {"Yes": 0.9997234480175513, "No": 0.00027653255619751796}, "ground_truth": 0}, {"key": "39726411", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.001426074092592584, "res": {"No": 0.9985738772808688, "Yes": 0.001426074092592584}, "ground_truth": 0}, {"key": "39726411", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9984052390847726, "res": {"Yes": 0.9984052390847726, "No": 0.0015947568492143214}, "ground_truth": 0}, {"key": "39726411", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995933624294768, "res": {"Yes": 0.9995933624294768, "No": 0.00040653952550039096}, "ground_truth": 1}, {"key": "39726411", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997641868982813, "res": {"Yes": 0.9997641868982813, "No": 0.0002357433133880827}, "ground_truth": 0}, {"key": "39726411", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.996912145559379, "res": {"Yes": 0.996912145559379, "No": 0.0030878269126300424}, "ground_truth": 0}, {"key": "37069841", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.03377286870046255, "res": {"No": 0.9662267417539631, "Yes": 0.03377286870046255}, "ground_truth": 0}, {"key": "37069841", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.999665782063262, "res": {"Yes": 0.999665782063262, "No": 0.0003341882837559323}, "ground_truth": 0}, {"key": "37069841", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9936449353866509, "res": {"Yes": 0.9936449353866509, "No": 0.0063548225349132}, "ground_truth": 1}, {"key": "37069841", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9960390528275528, "res": {"Yes": 0.9960390528275528, "No": 0.003960650498136266}, "ground_truth": 0}, {"key": "37069841", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.11779348675474344, "res": {"No": 0.8822060177813513, "Yes": 0.11779348675474344}, "ground_truth": 0}, {"key": "38894693", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9489962138507791, "res": {"Yes": 0.9489962138507791, "No": 0.05100364388931796}, "ground_truth": 0}, {"key": "38894693", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9988314114320285, "res": {"Yes": 0.9988314114320285, "No": 0.0011685437073743045}, "ground_truth": 0}, {"key": "38894693", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995243995837504, "res": {"Yes": 0.9995243995837504, "No": 0.000475542928165595}, "ground_truth": 1}, {"key": "38894693", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9990368370694672, "res": {"Yes": 0.9990368370694672, "No": 0.0009630781469286196}, "ground_truth": 0}, {"key": "38894693", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.817658345635667, "res": {"Yes": 0.817658345635667, "No": 0.18234127689965082}, "ground_truth": 0}, {"key": "33946032", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.936381930805681, "res": {"Yes": 0.936381930805681, "No": 0.06361763927490205}, "ground_truth": 0}, {"key": "33946032", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9871683665032139, "res": {"Yes": 0.9871683665032139, "No": 0.012831207364355229}, "ground_truth": 0}, {"key": "33946032", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9936033830179433, "res": {"Yes": 0.9936033830179433, "No": 0.006396557653239577}, "ground_truth": 1}, {"key": "33946032", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9967002403130967, "res": {"Yes": 0.9967002403130967, "No": 0.0032997796020422326}, "ground_truth": 0}, {"key": "33946032", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.6663479682728952, "res": {"Yes": 0.6663479682728952, "No": 0.33365141455638936}, "ground_truth": 0}, {"key": "39035311", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9702424521956692, "res": {"Yes": 0.9702424521956692, "No": 0.029757219623841903}, "ground_truth": 0}, {"key": "39035311", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9989447617558652, "res": {"Yes": 0.9989447617558652, "No": 0.0010551588021676034}, "ground_truth": 0}, {"key": "39035311", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9970455685477765, "res": {"Yes": 0.9970455685477765, "No": 0.0029544623220292623}, "ground_truth": 1}, {"key": "39035311", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9984738061202088, "res": {"Yes": 0.9984738061202088, "No": 0.0015261928163594672}, "ground_truth": 0}, {"key": "39035311", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9880440185880139, "res": {"Yes": 0.9880440185880139, "No": 0.011955868897923485}, "ground_truth": 0}, {"key": "27680038", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9936358729887086, "res": {"Yes": 0.9936358729887086, "No": 0.0063640806228866635}, "ground_truth": 0}, {"key": "27680038", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9970530345526514, "res": {"Yes": 0.9970530345526514, "No": 0.00294694275574871}, "ground_truth": 0}, {"key": "27680038", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9855671526705279, "res": {"Yes": 0.9855671526705279, "No": 0.014432817100362217}, "ground_truth": 1}, {"key": "27680038", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9985039940342159, "res": {"Yes": 0.9985039940342159, "No": 0.001496027525800287}, "ground_truth": 0}, {"key": "27680038", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9951027876382562, "res": {"Yes": 0.9951027876382562, "No": 0.004897201791222303}, "ground_truth": 0}, {"key": "36901907", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.7540465164293396, "res": {"Yes": 0.7540465164293396, "No": 0.24595347179095026}, "ground_truth": 0}, {"key": "36901907", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9973229372405807, "res": {"Yes": 0.9973229372405807, "No": 0.002677056256257472}, "ground_truth": 0}, {"key": "36901907", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995818080866243, "res": {"Yes": 0.9995818080866243, "No": 0.000418077055135111}, "ground_truth": 1}, {"key": "36901907", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999741779333953, "res": {"Yes": 0.9999741779333953, "No": 2.579579664619078e-05}, "ground_truth": 0}, {"key": "36901907", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999048055962748, "res": {"Yes": 0.9999048055962748, "No": 9.510471597499753e-05}, "ground_truth": 0}, {"key": "21530542", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9736127165733142, "res": {"Yes": 0.9736127165733142, "No": 0.026387179810943257}, "ground_truth": 0}, {"key": "21530542", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9923608225724139, "res": {"Yes": 0.9923608225724139, "No": 0.007639136543936963}, "ground_truth": 0}, {"key": "21530542", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9993397259768918, "res": {"Yes": 0.9993397259768918, "No": 0.0006601948098879295}, "ground_truth": 1}, {"key": "21530542", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9977340774870743, "res": {"Yes": 0.9977340774870743, "No": 0.0022658869665280303}, "ground_truth": 0}, {"key": "21530542", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9987280743839463, "res": {"Yes": 0.9987280743839463, "No": 0.0012718429355874934}, "ground_truth": 0}, {"key": "38192532", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.8397530822994117, "res": {"Yes": 0.8397530822994117, "No": 0.1602464558190057}, "ground_truth": 0}, {"key": "38192532", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9901045558107482, "res": {"Yes": 0.9901045558107482, "No": 0.009895293885147845}, "ground_truth": 0}, {"key": "38192532", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9888977825981392, "res": {"Yes": 0.9888977825981392, "No": 0.01110206668674097}, "ground_truth": 1}, {"key": "38192532", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9808837548727234, "res": {"Yes": 0.9808837548727234, "No": 0.019116237006726395}, "ground_truth": 0}, {"key": "38192532", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9595519175837935, "res": {"Yes": 0.9595519175837935, "No": 0.04044779292528493}, "ground_truth": 0}, {"key": "34102400", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9694834995407148, "res": {"Yes": 0.9694834995407148, "No": 0.030516379705448057}, "ground_truth": 0}, {"key": "34102400", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9971771260227905, "res": {"Yes": 0.9971771260227905, "No": 0.0028229086201525502}, "ground_truth": 0}, {"key": "34102400", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9901306171907998, "res": {"Yes": 0.9901306171907998, "No": 0.00986932023245727}, "ground_truth": 1}, {"key": "34102400", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.994546033176679, "res": {"Yes": 0.994546033176679, "No": 0.005453983608348003}, "ground_truth": 0}, {"key": "34102400", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9907457372197059, "res": {"Yes": 0.9907457372197059, "No": 0.009254221992216904}, "ground_truth": 0}, {"key": "36133399", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.8713474259274067, "res": {"Yes": 0.8713474259274067, "No": 0.12865253960879267}, "ground_truth": 0}, {"key": "36133399", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.99154887622774, "res": {"Yes": 0.99154887622774, "No": 0.008451025365036727}, "ground_truth": 0}, {"key": "36133399", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994972528197369, "res": {"Yes": 0.9994972528197369, "No": 0.0005026949666236889}, "ground_truth": 1}, {"key": "36133399", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9923492094386037, "res": {"Yes": 0.9923492094386037, "No": 0.0076507698669848944}, "ground_truth": 0}, {"key": "36133399", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9452113676762596, "res": {"Yes": 0.9452113676762596, "No": 0.0547885690919143}, "ground_truth": 0}, {"key": "34314544", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.36516199570901803, "res": {"No": 0.6348375624366897, "Yes": 0.36516199570901803}, "ground_truth": 0}, {"key": "34314544", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9993484184715007, "res": {"Yes": 0.9993484184715007, "No": 0.0006514642736312935}, "ground_truth": 0}, {"key": "34314544", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9941778000815592, "res": {"Yes": 0.9941778000815592, "No": 0.005822188831823219}, "ground_truth": 1}, {"key": "34314544", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9870177094543988, "res": {"Yes": 0.9870177094543988, "No": 0.012982210262989253}, "ground_truth": 0}, {"key": "34314544", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9974640616382402, "res": {"Yes": 0.9974640616382402, "No": 0.002535921999944106}, "ground_truth": 0}, {"key": "33460074", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.00998581193447148, "res": {"No": 0.990014002513991, "Yes": 0.00998581193447148}, "ground_truth": 0}, {"key": "33460074", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9961740050410545, "res": {"Yes": 0.9961740050410545, "No": 0.0038259365193971913}, "ground_truth": 0}, {"key": "33460074", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9932877137107154, "res": {"Yes": 0.9932877137107154, "No": 0.006712230291443869}, "ground_truth": 1}, {"key": "33460074", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.989074784439787, "res": {"Yes": 0.989074784439787, "No": 0.010925060502822815}, "ground_truth": 0}, {"key": "33460074", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9973337302280755, "res": {"Yes": 0.9973337302280755, "No": 0.002666232104666552}, "ground_truth": 0}, {"key": "36191495", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9105255943458034, "res": {"Yes": 0.9105255943458034, "No": 0.0894741519894996}, "ground_truth": 0}, {"key": "36191495", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9966193664530615, "res": {"Yes": 0.9966193664530615, "No": 0.003380619507633459}, "ground_truth": 0}, {"key": "36191495", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9848352240817206, "res": {"Yes": 0.9848352240817206, "No": 0.0151647048592398}, "ground_truth": 1}, {"key": "36191495", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9855228149728031, "res": {"Yes": 0.9855228149728031, "No": 0.014477102726409173}, "ground_truth": 0}, {"key": "36191495", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.996993191131688, "res": {"Yes": 0.996993191131688, "No": 0.0030067894421243113}, "ground_truth": 0}, {"key": "39532668", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.154462612842903, "res": {"No": 0.845536949701438, "Yes": 0.154462612842903}, "ground_truth": 0}, {"key": "39532668", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9694335355876476, "res": {"Yes": 0.9694335355876476, "No": 0.03056595411669331}, "ground_truth": 0}, {"key": "39532668", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996034832935363, "res": {"Yes": 0.9996034832935363, "No": 0.000396470627134666}, "ground_truth": 1}, {"key": "39532668", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9975980999891205, "res": {"Yes": 0.9975980999891205, "No": 0.0024018898836132207}, "ground_truth": 0}, {"key": "39532668", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9858268296128573, "res": {"Yes": 0.9858268296128573, "No": 0.01417278645414588}, "ground_truth": 0}, {"key": "20328247", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.0003658789562102841, "res": {"No": 0.9996340923205497, "Yes": 0.0003658789562102841}, "ground_truth": 0}, {"key": "20328247", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9806180812196603, "res": {"Yes": 0.9806180812196603, "No": 0.019381974389994575}, "ground_truth": 0}, {"key": "20328247", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997962419714873, "res": {"Yes": 0.9997962419714873, "No": 0.00020363263425654768}, "ground_truth": 1}, {"key": "20328247", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6897041096206553, "res": {"Yes": 0.6897041096206553, "No": 0.31029581331412154}, "ground_truth": 0}, {"key": "20328247", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9387175910641331, "res": {"Yes": 0.9387175910641331, "No": 0.06128211134990337}, "ground_truth": 0}, {"key": "39112675", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9902459889808033, "res": {"Yes": 0.9902459889808033, "No": 0.009753949490936746}, "ground_truth": 0}, {"key": "39112675", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.999810662776555, "res": {"Yes": 0.999810662776555, "No": 0.00018928854764892357}, "ground_truth": 0}, {"key": "39112675", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999961110815618, "res": {"Yes": 0.9999961110815618, "No": 3.7875772170373933e-06}, "ground_truth": 1}, {"key": "39112675", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999948788531352, "res": {"Yes": 0.999948788531352, "No": 5.109760146085405e-05}, "ground_truth": 0}, {"key": "39112675", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9778388730528882, "res": {"Yes": 0.9778388730528882, "No": 0.022161099542281498}, "ground_truth": 0}, {"key": "31620300", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.0047001681232576725, "res": {"No": 0.9952998348101914, "Yes": 0.0047001681232576725}, "ground_truth": 0}, {"key": "31620300", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9948387906473627, "res": {"Yes": 0.9948387906473627, "No": 0.005161247320093758}, "ground_truth": 0}, {"key": "31620300", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9974551659941664, "res": {"Yes": 0.9974551659941664, "No": 0.002544852977000086}, "ground_truth": 1}, {"key": "31620300", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9962697221372566, "res": {"Yes": 0.9962697221372566, "No": 0.0037302892552825723}, "ground_truth": 0}, {"key": "31620300", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.5725490902890754, "res": {"Yes": 0.5725490902890754, "No": 0.42745081921293937}, "ground_truth": 0}, {"key": "37518509", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9800746762161966, "res": {"Yes": 0.9800746762161966, "No": 0.019925319884611988}, "ground_truth": 0}, {"key": "37518509", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.99378184077048, "res": {"Yes": 0.99378184077048, "No": 0.006218186339746827}, "ground_truth": 0}, {"key": "37518509", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9993949648076534, "res": {"Yes": 0.9993949648076534, "No": 0.0006049177860462942}, "ground_truth": 1}, {"key": "37518509", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9910200963480424, "res": {"Yes": 0.9910200963480424, "No": 0.008979791636496271}, "ground_truth": 0}, {"key": "37518509", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9817484100807421, "res": {"Yes": 0.9817484100807421, "No": 0.018251627953508664}, "ground_truth": 0}, {"key": "35454095", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.04023616745091382, "res": {"No": 0.9597634606852989, "Yes": 0.04023616745091382}, "ground_truth": 0}, {"key": "35454095", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998704861276457, "res": {"Yes": 0.9998704861276457, "No": 0.0001294291410419401}, "ground_truth": 0}, {"key": "35454095", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997547830705872, "res": {"Yes": 0.9997547830705872, "No": 0.00024519371165746}, "ground_truth": 1}, {"key": "35454095", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9928481520587593, "res": {"Yes": 0.9928481520587593, "No": 0.0071515368594217265}, "ground_truth": 0}, {"key": "35454095", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9985970635367637, "res": {"Yes": 0.9985970635367637, "No": 0.0014028786146914579}, "ground_truth": 0}, {"key": "38542788", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9027964466211823, "res": {"Yes": 0.9027964466211823, "No": 0.09720305869065474}, "ground_truth": 0}, {"key": "38542788", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9863338316784125, "res": {"Yes": 0.9863338316784125, "No": 0.013665744210727306}, "ground_truth": 0}, {"key": "38542788", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9841530631944162, "res": {"Yes": 0.9841530631944162, "No": 0.015846541569108176}, "ground_truth": 1}, {"key": "38542788", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9934807656191221, "res": {"Yes": 0.9934807656191221, "No": 0.006519177899677379}, "ground_truth": 0}, {"key": "38542788", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.8683313044425505, "res": {"Yes": 0.8683313044425505, "No": 0.13166827337177123}, "ground_truth": 0}, {"key": "23944937", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.039098409131205804, "res": {"No": 0.960901348815234, "Yes": 0.039098409131205804}, "ground_truth": 0}, {"key": "23944937", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9728661222667833, "res": {"Yes": 0.9728661222667833, "No": 0.027133658789898072}, "ground_truth": 0}, {"key": "23944937", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.995461415643496, "res": {"Yes": 0.995461415643496, "No": 0.004538603106803258}, "ground_truth": 1}, {"key": "23944937", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9878700670160231, "res": {"Yes": 0.9878700670160231, "No": 0.01212983207375993}, "ground_truth": 0}, {"key": "23944937", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9840420077876538, "res": {"Yes": 0.9840420077876538, "No": 0.015957947467883198}, "ground_truth": 0}, {"key": "31753944", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.12823399474872285, "res": {"No": 0.8717658689813782, "Yes": 0.12823399474872285}, "ground_truth": 0}, {"key": "31753944", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9938885105905362, "res": {"Yes": 0.9938885105905362, "No": 0.0061115185591072065}, "ground_truth": 0}, {"key": "31753944", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9336838858703735, "res": {"Yes": 0.9336838858703735, "No": 0.06631574028085233}, "ground_truth": 1}, {"key": "31753944", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9923549581342251, "res": {"Yes": 0.9923549581342251, "No": 0.0076450415858636015}, "ground_truth": 0}, {"key": "31753944", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9879283550452554, "res": {"Yes": 0.9879283550452554, "No": 0.012071554038577776}, "ground_truth": 0}, {"key": "35527214", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9420734924369363, "res": {"Yes": 0.9420734924369363, "No": 0.05792641388910017}, "ground_truth": 0}, {"key": "35527214", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997882569807182, "res": {"Yes": 0.9997882569807182, "No": 0.00021161922642201907}, "ground_truth": 0}, {"key": "35527214", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9951828210031077, "res": {"Yes": 0.9951828210031077, "No": 0.004817177997864743}, "ground_truth": 1}, {"key": "35527214", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.998322659909919, "res": {"Yes": 0.998322659909919, "No": 0.0016772965133390017}, "ground_truth": 0}, {"key": "35527214", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9910688012274683, "res": {"Yes": 0.9910688012274683, "No": 0.008931142077982249}, "ground_truth": 0}, {"key": "40400404", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.17924580550988328, "res": {"No": 0.8207539417437724, "Yes": 0.17924580550988328}, "ground_truth": 0}, {"key": "40400404", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.059990413771481906, "res": {"No": 0.9400094204622067, "Yes": 0.059990413771481906}, "ground_truth": 0}, {"key": "40400404", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9418035893692726, "res": {"Yes": 0.9418035893692726, "No": 0.05819637566389947}, "ground_truth": 1}, {"key": "40400404", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9943541115727919, "res": {"Yes": 0.9943541115727919, "No": 0.00564586872723175}, "ground_truth": 0}, {"key": "40400404", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9699572603017835, "res": {"Yes": 0.9699572603017835, "No": 0.030042702282205673}, "ground_truth": 0}, {"key": "21713119", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.6131277275922682, "res": {"Yes": 0.6131277275922682, "No": 0.38687186846972915}, "ground_truth": 0}, {"key": "21713119", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9946374308854521, "res": {"Yes": 0.9946374308854521, "No": 0.005362572365967187}, "ground_truth": 0}, {"key": "21713119", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.99050591512868, "res": {"Yes": 0.99050591512868, "No": 0.009493992963553091}, "ground_truth": 1}, {"key": "21713119", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995385745095009, "res": {"Yes": 0.9995385745095009, "No": 0.00046140055932077887}, "ground_truth": 0}, {"key": "21713119", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9990300526633142, "res": {"Yes": 0.9990300526633142, "No": 0.0009698516852127986}, "ground_truth": 0}, {"key": "28730678", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9490084559808994, "res": {"Yes": 0.9490084559808994, "No": 0.05099146567329404}, "ground_truth": 0}, {"key": "28730678", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9903512042454001, "res": {"Yes": 0.9903512042454001, "No": 0.009648768387642425}, "ground_truth": 0}, {"key": "28730678", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9533322311246227, "res": {"Yes": 0.9533322311246227, "No": 0.04666773278075498}, "ground_truth": 1}, {"key": "28730678", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9913873918472338, "res": {"Yes": 0.9913873918472338, "No": 0.008612493477259796}, "ground_truth": 0}, {"key": "28730678", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9983407155371563, "res": {"Yes": 0.9983407155371563, "No": 0.0016592907947195101}, "ground_truth": 0}, {"key": "36823733", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.8236247527051097, "res": {"Yes": 0.8236247527051097, "No": 0.17637485339877634}, "ground_truth": 0}, {"key": "36823733", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9208087675075067, "res": {"Yes": 0.9208087675075067, "No": 0.07919112852571099}, "ground_truth": 0}, {"key": "36823733", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995616786109003, "res": {"Yes": 0.9995616786109003, "No": 0.0004382342382553479}, "ground_truth": 1}, {"key": "36823733", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9991307256380533, "res": {"Yes": 0.9991307256380533, "No": 0.000869231560861651}, "ground_truth": 0}, {"key": "36823733", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.7428241781647554, "res": {"Yes": 0.7428241781647554, "No": 0.25717579517733935}, "ground_truth": 0}, {"key": "35988862", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9572633007219299, "res": {"Yes": 0.9572633007219299, "No": 0.04273638131855581}, "ground_truth": 0}, {"key": "35988862", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9979776467176708, "res": {"Yes": 0.9979776467176708, "No": 0.0020222915092634073}, "ground_truth": 0}, {"key": "35988862", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9978035098227516, "res": {"Yes": 0.9978035098227516, "No": 0.002196502404176964}, "ground_truth": 1}, {"key": "35988862", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9975873077319725, "res": {"Yes": 0.9975873077319725, "No": 0.0024127092325347973}, "ground_truth": 0}, {"key": "35988862", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.98624998700325, "res": {"Yes": 0.98624998700325, "No": 0.01374997325448242}, "ground_truth": 0}, {"key": "40499665", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.21791678701578018, "res": {"No": 0.7820828326042705, "Yes": 0.21791678701578018}, "ground_truth": 0}, {"key": "40499665", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.600831590012624, "res": {"Yes": 0.600831590012624, "No": 0.39916822675881497}, "ground_truth": 0}, {"key": "40499665", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.970934868175302, "res": {"Yes": 0.970934868175302, "No": 0.02906490053476494}, "ground_truth": 1}, {"key": "40499665", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.45559478222946903, "res": {"No": 0.5444046863268777, "Yes": 0.45559478222946903}, "ground_truth": 0}, {"key": "40499665", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.7715232434686663, "res": {"Yes": 0.7715232434686663, "No": 0.2284763214629806}, "ground_truth": 0}, {"key": "32829820", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.7092352940616475, "res": {"Yes": 0.7092352940616475, "No": 0.2907643888708952}, "ground_truth": 0}, {"key": "32829820", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9892877808878349, "res": {"Yes": 0.9892877808878349, "No": 0.010712096093426072}, "ground_truth": 0}, {"key": "32829820", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997153444320862, "res": {"Yes": 0.9997153444320862, "No": 0.0002845723588013898}, "ground_truth": 1}, {"key": "32829820", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9946472208514728, "res": {"Yes": 0.9946472208514728, "No": 0.005352669877666817}, "ground_truth": 0}, {"key": "32829820", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9945044176664374, "res": {"Yes": 0.9945044176664374, "No": 0.005495606083162586}, "ground_truth": 0}, {"key": "20583553", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9888084921264555, "res": {"Yes": 0.9888084921264555, "No": 0.011191369254773428}, "ground_truth": 0}, {"key": "20583553", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9992448575137062, "res": {"Yes": 0.9992448575137062, "No": 0.0007551380331964647}, "ground_truth": 0}, {"key": "20583553", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998863309608148, "res": {"Yes": 0.9998863309608148, "No": 0.00011360682207536898}, "ground_truth": 1}, {"key": "20583553", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999493845180067, "res": {"Yes": 0.9999493845180067, "No": 5.0498294223798784e-05}, "ground_truth": 0}, {"key": "20583553", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.999717012808431, "res": {"Yes": 0.999717012808431, "No": 0.0002829676437206919}, "ground_truth": 0}, {"key": "30501550", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9946768136379429, "res": {"Yes": 0.9946768136379429, "No": 0.00532318218168279}, "ground_truth": 0}, {"key": "30501550", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9868797725116348, "res": {"Yes": 0.9868797725116348, "No": 0.013120137701239088}, "ground_truth": 0}, {"key": "30501550", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9861769391582864, "res": {"Yes": 0.9861769391582864, "No": 0.013822950340960555}, "ground_truth": 1}, {"key": "30501550", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9974103433610442, "res": {"Yes": 0.9974103433610442, "No": 0.0025896876733042568}, "ground_truth": 0}, {"key": "30501550", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9858749085738826, "res": {"Yes": 0.9858749085738826, "No": 0.014124952002620102}, "ground_truth": 0}, {"key": "38755897", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9809919258310829, "res": {"Yes": 0.9809919258310829, "No": 0.019008031093347998}, "ground_truth": 0}, {"key": "38755897", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9990061492970758, "res": {"Yes": 0.9990061492970758, "No": 0.0009937670846926188}, "ground_truth": 0}, {"key": "38755897", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997191578835323, "res": {"Yes": 0.9997191578835323, "No": 0.00028071574058053934}, "ground_truth": 1}, {"key": "38755897", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999674242730425, "res": {"Yes": 0.999674242730425, "No": 0.0003257007639895518}, "ground_truth": 0}, {"key": "38755897", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996248088718013, "res": {"Yes": 0.9996248088718013, "No": 0.0003751054768287331}, "ground_truth": 0}, {"key": "35507201", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9886008374525871, "res": {"Yes": 0.9886008374525871, "No": 0.011399047268644725}, "ground_truth": 0}, {"key": "35507201", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9929802448954159, "res": {"Yes": 0.9929802448954159, "No": 0.007019742798541646}, "ground_truth": 0}, {"key": "35507201", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9932581950290958, "res": {"Yes": 0.9932581950290958, "No": 0.006741797487993472}, "ground_truth": 1}, {"key": "35507201", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.991189892455268, "res": {"Yes": 0.991189892455268, "No": 0.00880999858189008}, "ground_truth": 0}, {"key": "35507201", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.982626309710504, "res": {"Yes": 0.982626309710504, "No": 0.017373648060955555}, "ground_truth": 0}, {"key": "36453511", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.8318202559685047, "res": {"Yes": 0.8318202559685047, "No": 0.16817972159125508}, "ground_truth": 0}, {"key": "36453511", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9229544598228773, "res": {"Yes": 0.9229544598228773, "No": 0.07704541698780026}, "ground_truth": 0}, {"key": "36453511", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995612020000143, "res": {"Yes": 0.9995612020000143, "No": 0.0004386823425891566}, "ground_truth": 1}, {"key": "36453511", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9970244756736635, "res": {"Yes": 0.9970244756736635, "No": 0.0029754946050644473}, "ground_truth": 0}, {"key": "36453511", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9539397601732795, "res": {"Yes": 0.9539397601732795, "No": 0.04606021116668093}, "ground_truth": 0}, {"key": "38066835", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.7184290900375482, "res": {"Yes": 0.7184290900375482, "No": 0.2815706460530592}, "ground_truth": 0}, {"key": "38066835", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9508913605771417, "res": {"Yes": 0.9508913605771417, "No": 0.04910850702619472}, "ground_truth": 0}, {"key": "38066835", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9705828108369076, "res": {"Yes": 0.9705828108369076, "No": 0.02941703943222887}, "ground_truth": 1}, {"key": "38066835", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9914436358407346, "res": {"Yes": 0.9914436358407346, "No": 0.008556329307628596}, "ground_truth": 0}, {"key": "38066835", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9839287835402559, "res": {"Yes": 0.9839287835402559, "No": 0.016071132962605775}, "ground_truth": 0}, {"key": "39697181", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.8929706548105066, "res": {"Yes": 0.8929706548105066, "No": 0.10702915553511726}, "ground_truth": 0}, {"key": "39697181", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9977987594911097, "res": {"Yes": 0.9977987594911097, "No": 0.0022012805935237765}, "ground_truth": 0}, {"key": "39697181", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9961659630603903, "res": {"Yes": 0.9961659630603903, "No": 0.0038339855625583795}, "ground_truth": 1}, {"key": "39697181", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9948517665138155, "res": {"Yes": 0.9948517665138155, "No": 0.005148238302565406}, "ground_truth": 0}, {"key": "39697181", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9795086525470085, "res": {"Yes": 0.9795086525470085, "No": 0.020491387562805004}, "ground_truth": 0}, {"key": "21820893", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.6254075448450912, "res": {"Yes": 0.6254075448450912, "No": 0.37459216681086727}, "ground_truth": 0}, {"key": "21820893", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9979495081366435, "res": {"Yes": 0.9979495081366435, "No": 0.002050462298074106}, "ground_truth": 0}, {"key": "21820893", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9967051007336695, "res": {"Yes": 0.9967051007336695, "No": 0.0032948662620503216}, "ground_truth": 1}, {"key": "21820893", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9980184903874194, "res": {"Yes": 0.9980184903874194, "No": 0.001981442032726804}, "ground_truth": 0}, {"key": "21820893", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9801972186930035, "res": {"Yes": 0.9801972186930035, "No": 0.019802774991817537}, "ground_truth": 0}, {"key": "40519933", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.024693480933540082, "res": {"No": 0.9753064018942758, "Yes": 0.024693480933540082}, "ground_truth": 0}, {"key": "40519933", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 3.6343004751187115e-07}, "ground_truth": 0}, {"key": "40519933", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9690561397607257, "res": {"Yes": 0.9690561397607257, "No": 0.030943710975454803}, "ground_truth": 1}, {"key": "40519933", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9571000237172027, "res": {"Yes": 0.9571000237172027, "No": 0.04289944028941757}, "ground_truth": 0}, {"key": "40519933", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9570182353581604, "res": {"Yes": 0.9570182353581604, "No": 0.042980945044648534}, "ground_truth": 0}, {"key": "30446033", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.8360886925146853, "res": {"Yes": 0.8360886925146853, "No": 0.16391121003412354}, "ground_truth": 0}, {"key": "30446033", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.999361749668451, "res": {"Yes": 0.999361749668451, "No": 0.0006381586741355284}, "ground_truth": 0}, {"key": "30446033", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9993135371934755, "res": {"Yes": 0.9993135371934755, "No": 0.0006864366614592457}, "ground_truth": 1}, {"key": "30446033", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997724100525223, "res": {"Yes": 0.9997724100525223, "No": 0.00022752487156236815}, "ground_truth": 0}, {"key": "30446033", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9979168644448096, "res": {"Yes": 0.9979168644448096, "No": 0.002083113769699813}, "ground_truth": 0}, {"key": "40216291", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.029626773984094818, "res": {"No": 0.9703730808488478, "Yes": 0.029626773984094818}, "ground_truth": 0}, {"key": "40216291", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9955099676707371, "res": {"Yes": 0.9955099676707371, "No": 0.0044900052857567135}, "ground_truth": 0}, {"key": "40216291", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9937022481939602, "res": {"Yes": 0.9937022481939602, "No": 0.0062977276570868115}, "ground_truth": 1}, {"key": "40216291", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9928815162886437, "res": {"Yes": 0.9928815162886437, "No": 0.007118461915489584}, "ground_truth": 0}, {"key": "40216291", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.6927441222067873, "res": {"Yes": 0.6927441222067873, "No": 0.3072556321088034}, "ground_truth": 0}, {"key": "33479118", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.964759343228756, "res": {"Yes": 0.964759343228756, "No": 0.03524029876296138}, "ground_truth": 0}, {"key": "33479118", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9986802753035727, "res": {"Yes": 0.9986802753035727, "No": 0.001319728031968197}, "ground_truth": 0}, {"key": "33479118", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9974150808719442, "res": {"Yes": 0.9974150808719442, "No": 0.0025848939422669455}, "ground_truth": 1}, {"key": "33479118", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9993106782514733, "res": {"Yes": 0.9993106782514733, "No": 0.0006892896385703667}, "ground_truth": 0}, {"key": "33479118", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9992322388795101, "res": {"Yes": 0.9992322388795101, "No": 0.0007677076970989103}, "ground_truth": 0}, {"key": "22297373", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.13160725926817424, "res": {"No": 0.8683925239574762, "Yes": 0.13160725926817424}, "ground_truth": 0}, {"key": "22297373", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9751991435944375, "res": {"Yes": 0.9751991435944375, "No": 0.024800739691148665}, "ground_truth": 0}, {"key": "22297373", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8840603884070853, "res": {"Yes": 0.8840603884070853, "No": 0.11593914935952015}, "ground_truth": 1}, {"key": "22297373", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9825541492666731, "res": {"Yes": 0.9825541492666731, "No": 0.01744581477629726}, "ground_truth": 0}, {"key": "22297373", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9870022745921438, "res": {"Yes": 0.9870022745921438, "No": 0.012997664389603555}, "ground_truth": 0}, {"key": "36463668", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9165826552571688, "res": {"Yes": 0.9165826552571688, "No": 0.08341723802732104}, "ground_truth": 0}, {"key": "36463668", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.89518738849729, "res": {"Yes": 0.89518738849729, "No": 0.10481256701699863}, "ground_truth": 0}, {"key": "36463668", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.995668896100385, "res": {"Yes": 0.995668896100385, "No": 0.004331123074691406}, "ground_truth": 1}, {"key": "36463668", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9869894925019542, "res": {"Yes": 0.9869894925019542, "No": 0.013010442171328853}, "ground_truth": 0}, {"key": "36463668", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9898734582229392, "res": {"Yes": 0.9898734582229392, "No": 0.010126471937626275}, "ground_truth": 0}, {"key": "35264615", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.8032068754573078, "res": {"Yes": 0.8032068754573078, "No": 0.19679303229418083}, "ground_truth": 0}, {"key": "35264615", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998443879188161, "res": {"Yes": 0.9998443879188161, "No": 0.00015557276401577985}, "ground_truth": 0}, {"key": "35264615", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997998173492212, "res": {"Yes": 0.9997998173492212, "No": 0.00020014469059644}, "ground_truth": 1}, {"key": "35264615", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999211350800014, "res": {"Yes": 0.9999211350800014, "No": 7.880737295962047e-05}, "ground_truth": 0}, {"key": "35264615", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9975762706865732, "res": {"Yes": 0.9975762706865732, "No": 0.002423752063599595}, "ground_truth": 0}, {"key": "39898482", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.5145429953241822, "res": {"Yes": 0.5145429953241822, "No": 0.48545652117422644}, "ground_truth": 0}, {"key": "39898482", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9792409678667996, "res": {"Yes": 0.9792409678667996, "No": 0.020758603328393244}, "ground_truth": 0}, {"key": "39898482", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9949027319860412, "res": {"Yes": 0.9949027319860412, "No": 0.005097245414348726}, "ground_truth": 1}, {"key": "39898482", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9916805130782619, "res": {"Yes": 0.9916805130782619, "No": 0.00831936540912213}, "ground_truth": 0}, {"key": "39898482", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.998087354841389, "res": {"Yes": 0.998087354841389, "No": 0.0019126503136107683}, "ground_truth": 0}, {"key": "37228721", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9899606060637661, "res": {"Yes": 0.9899606060637661, "No": 0.01003931494114714}, "ground_truth": 0}, {"key": "37228721", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9928415714830154, "res": {"Yes": 0.9928415714830154, "No": 0.007158369839351072}, "ground_truth": 0}, {"key": "37228721", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9991322665985597, "res": {"Yes": 0.9991322665985597, "No": 0.000867672342646312}, "ground_truth": 1}, {"key": "37228721", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996290986009366, "res": {"Yes": 0.9996290986009366, "No": 0.00037083597834562855}, "ground_truth": 0}, {"key": "37228721", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9929640335329742, "res": {"Yes": 0.9929640335329742, "No": 0.007035925785415029}, "ground_truth": 0}, {"key": "24535799", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.829832894822969, "res": {"Yes": 0.829832894822969, "No": 0.17016667177941383}, "ground_truth": 0}, {"key": "24535799", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9808085308395261, "res": {"Yes": 0.9808085308395261, "No": 0.019191342751365834}, "ground_truth": 0}, {"key": "24535799", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9990669413302035, "res": {"Yes": 0.9990669413302035, "No": 0.0009329807403888835}, "ground_truth": 1}, {"key": "24535799", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9988561515938827, "res": {"Yes": 0.9988561515938827, "No": 0.001143843724287029}, "ground_truth": 0}, {"key": "24535799", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9986271398359012, "res": {"Yes": 0.9986271398359012, "No": 0.0013728811750076196}, "ground_truth": 0}, {"key": "35177759", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.1965121927459712, "res": {"No": 0.8034877015660932, "Yes": 0.1965121927459712}, "ground_truth": 0}, {"key": "35177759", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9978666500326502, "res": {"Yes": 0.9978666500326502, "No": 0.002133333071540539}, "ground_truth": 0}, {"key": "35177759", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.99959109846303, "res": {"Yes": 0.99959109846303, "No": 0.00040879040111046566}, "ground_truth": 1}, {"key": "35177759", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9987906171935388, "res": {"Yes": 0.9987906171935388, "No": 0.00120934667888084}, "ground_truth": 0}, {"key": "35177759", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.994257115767658, "res": {"Yes": 0.994257115767658, "No": 0.0057428452690096304}, "ground_truth": 0}, {"key": "34364829", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.04626663518579834, "res": {"No": 0.9537331487058548, "Yes": 0.04626663518579834}, "ground_truth": 0}, {"key": "34364829", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9989281092856982, "res": {"Yes": 0.9989281092856982, "No": 0.0010718715995998297}, "ground_truth": 0}, {"key": "34364829", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9426631416973704, "res": {"Yes": 0.9426631416973704, "No": 0.05733657223279272}, "ground_truth": 1}, {"key": "34364829", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9897125238217114, "res": {"Yes": 0.9897125238217114, "No": 0.010287407546436377}, "ground_truth": 0}, {"key": "34364829", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9046199936344197, "res": {"Yes": 0.9046199936344197, "No": 0.09537972016617866}, "ground_truth": 0}, {"key": "38090732", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9978907494019744, "res": {"Yes": 0.9978907494019744, "No": 0.0021091737412730054}, "ground_truth": 0}, {"key": "38090732", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998104244117232, "res": {"Yes": 0.9998104244117232, "No": 0.00018945045856421622}, "ground_truth": 0}, {"key": "38090732", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999547484278832, "res": {"Yes": 0.9999547484278832, "No": 4.5152473299013484e-05}, "ground_truth": 1}, {"key": "38090732", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998067298090901, "res": {"Yes": 0.9998067298090901, "No": 0.00019324147216166158}, "ground_truth": 0}, {"key": "38090732", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9987209385972199, "res": {"Yes": 0.9987209385972199, "No": 0.0012790444541894964}, "ground_truth": 0}, {"key": "30651479", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.1589530346428135, "res": {"No": 0.8410468021189617, "Yes": 0.1589530346428135}, "ground_truth": 0}, {"key": "30651479", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994826054947233, "res": {"Yes": 0.9994826054947233, "No": 0.0005173582414062957}, "ground_truth": 0}, {"key": "30651479", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.996806122612555, "res": {"Yes": 0.996806122612555, "No": 0.0031938895827392944}, "ground_truth": 1}, {"key": "30651479", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994549795776181, "res": {"Yes": 0.9994549795776181, "No": 0.0005449233583884994}, "ground_truth": 0}, {"key": "30651479", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9966902967827501, "res": {"Yes": 0.9966902967827501, "No": 0.003309492581450643}, "ground_truth": 0}, {"key": "39380921", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.6010346813732297, "res": {"Yes": 0.6010346813732297, "No": 0.39896513773793507}, "ground_truth": 0}, {"key": "39380921", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.935977491814888, "res": {"Yes": 0.935977491814888, "No": 0.06402244205365078}, "ground_truth": 0}, {"key": "39380921", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9897559622678974, "res": {"Yes": 0.9897559622678974, "No": 0.010243919181764734}, "ground_truth": 1}, {"key": "39380921", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9929011477360596, "res": {"Yes": 0.9929011477360596, "No": 0.007098808683994782}, "ground_truth": 0}, {"key": "39380921", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9917989337432744, "res": {"Yes": 0.9917989337432744, "No": 0.00820096047888463}, "ground_truth": 0}, {"key": "39037490", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.7523304675752677, "res": {"Yes": 0.7523304675752677, "No": 0.24766957006329754}, "ground_truth": 0}, {"key": "39037490", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9992677129531263, "res": {"Yes": 0.9992677129531263, "No": 0.0007321925735367153}, "ground_truth": 0}, {"key": "39037490", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998101860569464, "res": {"Yes": 0.9998101860569464, "No": 0.0001897853236729017}, "ground_truth": 1}, {"key": "39037490", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998772798807299, "res": {"Yes": 0.9998772798807299, "No": 0.00012269389045866682}, "ground_truth": 0}, {"key": "39037490", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9985845756592694, "res": {"Yes": 0.9985845756592694, "No": 0.0014153964647918087}, "ground_truth": 0}, {"key": "35917499", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.17170477522861585, "res": {"No": 0.8282952384300162, "Yes": 0.17170477522861585}, "ground_truth": 0}, {"key": "35917499", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9574991937157636, "res": {"Yes": 0.9574991937157636, "No": 0.04250074813628477}, "ground_truth": 0}, {"key": "35917499", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9977270789059336, "res": {"Yes": 0.9977270789059336, "No": 0.002272891885307271}, "ground_truth": 1}, {"key": "35917499", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9927698901150384, "res": {"Yes": 0.9927698901150384, "No": 0.007230023445564897}, "ground_truth": 0}, {"key": "35917499", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9467607818399632, "res": {"Yes": 0.9467607818399632, "No": 0.053239072857709145}, "ground_truth": 0}, {"key": "34908073", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9721575200373753, "res": {"Yes": 0.9721575200373753, "No": 0.027842356575216165}, "ground_truth": 0}, {"key": "34908073", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999512916842885, "res": {"Yes": 0.9999512916842885, "No": 4.864021014015539e-05}, "ground_truth": 0}, {"key": "34908073", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9989724816588654, "res": {"Yes": 0.9989724816588654, "No": 0.0010274235687555507}, "ground_truth": 1}, {"key": "34908073", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999346041184038, "res": {"Yes": 0.9999346041184038, "No": 6.53726227192138e-05}, "ground_truth": 0}, {"key": "34908073", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.999910407653634, "res": {"Yes": 0.999910407653634, "No": 8.948325904106194e-05}, "ground_truth": 0}, {"key": "36344759", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.789609349600672, "res": {"Yes": 0.789609349600672, "No": 0.21039017415359423}, "ground_truth": 0}, {"key": "36344759", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998426001486566, "res": {"Yes": 0.9998426001486566, "No": 0.0001573846104921948}, "ground_truth": 0}, {"key": "36344759", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994208020399346, "res": {"Yes": 0.9994208020399346, "No": 0.0005791613784320084}, "ground_truth": 1}, {"key": "36344759", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9991517808913819, "res": {"Yes": 0.9991517808913819, "No": 0.0008481033963051936}, "ground_truth": 0}, {"key": "36344759", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9995976520236648, "res": {"Yes": 0.9995976520236648, "No": 0.00040230341111959357}, "ground_truth": 0}, {"key": "39984637", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9858517407857537, "res": {"Yes": 0.9858517407857537, "No": 0.014148240626121434}, "ground_truth": 0}, {"key": "39984637", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.05518221884305934, "res": {"No": 0.9448176784117113, "Yes": 0.05518221884305934}, "ground_truth": 0}, {"key": "39984637", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998073257140748, "res": {"Yes": 0.9998073257140748, "No": 0.0001926433511645012}, "ground_truth": 1}, {"key": "39984637", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998832320179115, "res": {"Yes": 0.9998832320179115, "No": 0.00011669125000704191}, "ground_truth": 0}, {"key": "39984637", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.978623149181191, "res": {"Yes": 0.978623149181191, "No": 0.021376786258537873}, "ground_truth": 0}, {"key": "17917326", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9809295288811583, "res": {"Yes": 0.9809295288811583, "No": 0.019070512215061123}, "ground_truth": 0}, {"key": "17917326", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9804904022714688, "res": {"Yes": 0.9804904022714688, "No": 0.019509403478131824}, "ground_truth": 0}, {"key": "17917326", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9792593767907887, "res": {"Yes": 0.9792593767907887, "No": 0.0207405511973132}, "ground_truth": 1}, {"key": "17917326", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9910936248386099, "res": {"Yes": 0.9910936248386099, "No": 0.00890628563854836}, "ground_truth": 0}, {"key": "17917326", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9846689949717709, "res": {"Yes": 0.9846689949717709, "No": 0.015330928813024992}, "ground_truth": 0}, {"key": "32193638", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.0915345575372787, "res": {"No": 0.9084652515806082, "Yes": 0.0915345575372787}, "ground_truth": 0}, {"key": "32193638", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994377120131742, "res": {"Yes": 0.9994377120131742, "No": 0.0005622733716016882}, "ground_truth": 0}, {"key": "32193638", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9869308616391724, "res": {"Yes": 0.9869308616391724, "No": 0.013068807434503135}, "ground_truth": 1}, {"key": "32193638", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9991526146339333, "res": {"Yes": 0.9991526146339333, "No": 0.0008473152809898195}, "ground_truth": 0}, {"key": "32193638", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.7191059317895347, "res": {"Yes": 0.7191059317895347, "No": 0.28089352388918176}, "ground_truth": 0}, {"key": "34564692", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.8764098256040662, "res": {"Yes": 0.8764098256040662, "No": 0.12358975818163169}, "ground_truth": 0}, {"key": "34564692", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9829109214651264, "res": {"Yes": 0.9829109214651264, "No": 0.01708902216987083}, "ground_truth": 0}, {"key": "34564692", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.987542108102017, "res": {"Yes": 0.987542108102017, "No": 0.012457763202457492}, "ground_truth": 1}, {"key": "34564692", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9863964560013738, "res": {"Yes": 0.9863964560013738, "No": 0.01360341656194449}, "ground_truth": 0}, {"key": "34564692", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.36944115759622526, "res": {"No": 0.6305585910376034, "Yes": 0.36944115759622526}, "ground_truth": 0}, {"key": "39329284", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.8486786486795008, "res": {"Yes": 0.8486786486795008, "No": 0.15132095017414632}, "ground_truth": 0}, {"key": "39329284", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9961887938520196, "res": {"Yes": 0.9961887938520196, "No": 0.0038111206470812253}, "ground_truth": 0}, {"key": "39329284", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999546292272846, "res": {"Yes": 0.9999546292272846, "No": 4.532050404298142e-05}, "ground_truth": 1}, {"key": "39329284", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999583118759142, "res": {"Yes": 0.999583118759142, "No": 0.0004168275378541377}, "ground_truth": 0}, {"key": "39329284", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.6839017582257836, "res": {"Yes": 0.6839017582257836, "No": 0.31609774297837245}, "ground_truth": 0}, {"key": "37438541", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.3400415714076931, "res": {"No": 0.6599583183309379, "Yes": 0.3400415714076931}, "ground_truth": 0}, {"key": "37438541", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9799495306307837, "res": {"Yes": 0.9799495306307837, "No": 0.020050483716687523}, "ground_truth": 0}, {"key": "37438541", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9882475983630477, "res": {"Yes": 0.9882475983630477, "No": 0.011752338933866732}, "ground_truth": 1}, {"key": "37438541", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9985949207494837, "res": {"Yes": 0.9985949207494837, "No": 0.0014050709208458737}, "ground_truth": 0}, {"key": "37438541", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9073105091412489, "res": {"Yes": 0.9073105091412489, "No": 0.09268902556390161}, "ground_truth": 0}, {"key": "34652757", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.00043069989917495486, "res": {"No": 0.9995691852073547, "Yes": 0.00043069989917495486}, "ground_truth": 0}, {"key": "34652757", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9879595315922721, "res": {"Yes": 0.9879595315922721, "No": 0.012040063078946837}, "ground_truth": 0}, {"key": "34652757", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9799426641484793, "res": {"Yes": 0.9799426641484793, "No": 0.02005707138255333}, "ground_truth": 1}, {"key": "34652757", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9812451839244843, "res": {"Yes": 0.9812451839244843, "No": 0.018754694210625495}, "ground_truth": 0}, {"key": "34652757", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9871954419412994, "res": {"Yes": 0.9871954419412994, "No": 0.012804320832240867}, "ground_truth": 0}, {"key": "31361004", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9961864294007234, "res": {"Yes": 0.9961864294007234, "No": 0.0038135414772243474}, "ground_truth": 0}, {"key": "31361004", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.999684963934286, "res": {"Yes": 0.999684963934286, "No": 0.0003149139912728748}, "ground_truth": 0}, {"key": "31361004", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9986121593428141, "res": {"Yes": 0.9986121593428141, "No": 0.0013878145990372573}, "ground_truth": 1}, {"key": "31361004", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997856387386591, "res": {"Yes": 0.9997856387386591, "No": 0.00021423650277937158}, "ground_truth": 0}, {"key": "31361004", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9795215792079925, "res": {"Yes": 0.9795215792079925, "No": 0.020478142114321873}, "ground_truth": 0}, {"key": "26150727", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9317290550756806, "res": {"Yes": 0.9317290550756806, "No": 0.06827080883716286}, "ground_truth": 0}, {"key": "26150727", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9794159004078823, "res": {"Yes": 0.9794159004078823, "No": 0.020584084476964797}, "ground_truth": 0}, {"key": "26150727", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9949073318299657, "res": {"Yes": 0.9949073318299657, "No": 0.005092624308746095}, "ground_truth": 1}, {"key": "26150727", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9981128836473775, "res": {"Yes": 0.9981128836473775, "No": 0.00188712324889294}, "ground_truth": 0}, {"key": "26150727", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9965148253689158, "res": {"Yes": 0.9965148253689158, "No": 0.003485171587396227}, "ground_truth": 0}, {"key": "36997402", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.7041341901355949, "res": {"Yes": 0.7041341901355949, "No": 0.2958650620653141}, "ground_truth": 0}, {"key": "36997402", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9873612464533125, "res": {"Yes": 0.9873612464533125, "No": 0.012638557735219493}, "ground_truth": 0}, {"key": "36997402", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9651731632103097, "res": {"Yes": 0.9651731632103097, "No": 0.03482672942862979}, "ground_truth": 1}, {"key": "36997402", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9718304566992971, "res": {"Yes": 0.9718304566992971, "No": 0.0281694534254365}, "ground_truth": 0}, {"key": "36997402", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9073184481429369, "res": {"Yes": 0.9073184481429369, "No": 0.09268098050564327}, "ground_truth": 0}, {"key": "37430643", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.012448286451021695, "res": {"No": 0.9875515302862189, "Yes": 0.012448286451021695}, "ground_truth": 0}, {"key": "37430643", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9993619879163205, "res": {"Yes": 0.9993619879163205, "No": 0.000637954559825497}, "ground_truth": 0}, {"key": "37430643", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998019625919968, "res": {"Yes": 0.9998019625919968, "No": 0.0001980163410736912}, "ground_truth": 1}, {"key": "37430643", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995540566323144, "res": {"Yes": 0.9995540566323144, "No": 0.0004458746496695593}, "ground_truth": 0}, {"key": "37430643", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.926706181470558, "res": {"Yes": 0.926706181470558, "No": 0.0732936356438749}, "ground_truth": 0}, {"key": "36964631", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.10235863531390181, "res": {"No": 0.8976410250397232, "Yes": 0.10235863531390181}, "ground_truth": 0}, {"key": "36964631", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9894782202953498, "res": {"Yes": 0.9894782202953498, "No": 0.010521604124284118}, "ground_truth": 0}, {"key": "36964631", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9914457446435907, "res": {"Yes": 0.9914457446435907, "No": 0.008554157986802592}, "ground_truth": 1}, {"key": "36964631", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9971595811447602, "res": {"Yes": 0.9971595811447602, "No": 0.0028403244226661534}, "ground_truth": 0}, {"key": "36964631", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9987478152384152, "res": {"Yes": 0.9987478152384152, "No": 0.0012520300562536646}, "ground_truth": 0}, {"key": "35502013", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9292232563884327, "res": {"Yes": 0.9292232563884327, "No": 0.07077627027997935}, "ground_truth": 0}, {"key": "35502013", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9140503331064274, "res": {"Yes": 0.9140503331064274, "No": 0.08594928207582267}, "ground_truth": 0}, {"key": "35502013", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9944986412037539, "res": {"Yes": 0.9944986412037539, "No": 0.00550136354608593}, "ground_truth": 1}, {"key": "35502013", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9989041972308226, "res": {"Yes": 0.9989041972308226, "No": 0.0010957874957440866}, "ground_truth": 0}, {"key": "35502013", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.4118193280347341, "res": {"No": 0.5881805031767047, "Yes": 0.4118193280347341}, "ground_truth": 0}, {"key": "33987664", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.17672703710758247, "res": {"No": 0.8232725801848125, "Yes": 0.17672703710758247}, "ground_truth": 0}, {"key": "33987664", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9979340727713825, "res": {"Yes": 0.9979340727713825, "No": 0.002065858294376142}, "ground_truth": 0}, {"key": "33987664", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9605269220142651, "res": {"Yes": 0.9605269220142651, "No": 0.03947289678225747}, "ground_truth": 1}, {"key": "33987664", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9942915305848027, "res": {"Yes": 0.9942915305848027, "No": 0.005708140300754205}, "ground_truth": 0}, {"key": "33987664", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.975547994726091, "res": {"Yes": 0.975547994726091, "No": 0.02445154194046694}, "ground_truth": 0}, {"key": "35203721", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9338210821226003, "res": {"Yes": 0.9338210821226003, "No": 0.06617839690743658}, "ground_truth": 0}, {"key": "35203721", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9969481664403856, "res": {"Yes": 0.9969481664403856, "No": 0.0030517999954425617}, "ground_truth": 0}, {"key": "35203721", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.990354827947086, "res": {"Yes": 0.990354827947086, "No": 0.009645047575280103}, "ground_truth": 1}, {"key": "35203721", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9910143613313391, "res": {"Yes": 0.9910143613313391, "No": 0.008985599847360635}, "ground_truth": 0}, {"key": "35203721", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9869731239104487, "res": {"Yes": 0.9869731239104487, "No": 0.013026814396100648}, "ground_truth": 0}, {"key": "39028348", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9987055994599925, "res": {"Yes": 0.9987055994599925, "No": 0.0012943336140751906}, "ground_truth": 0}, {"key": "39028348", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997148677379322, "res": {"Yes": 0.9997148677379322, "No": 0.0002850405805320654}, "ground_truth": 0}, {"key": "39028348", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998783452545388, "res": {"Yes": 0.9998783452545388, "No": 0.00012162962396123702}, "ground_truth": 1}, {"key": "39028348", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999357960811598, "res": {"Yes": 0.9999357960811598, "No": 6.409315410341117e-05}, "ground_truth": 0}, {"key": "39028348", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9988103628799458, "res": {"Yes": 0.9988103628799458, "No": 0.00118958151711227}, "ground_truth": 0}, {"key": "37459383", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9615574859125084, "res": {"Yes": 0.9615574859125084, "No": 0.038442344714696014}, "ground_truth": 0}, {"key": "37459383", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9675980302066909, "res": {"Yes": 0.9675980302066909, "No": 0.03240192967781841}, "ground_truth": 0}, {"key": "37459383", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9535331355986193, "res": {"Yes": 0.9535331355986193, "No": 0.04646667918124867}, "ground_truth": 1}, {"key": "37459383", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9956993899229465, "res": {"Yes": 0.9956993899229465, "No": 0.004300585941933494}, "ground_truth": 0}, {"key": "37459383", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9387314503929569, "res": {"Yes": 0.9387314503929569, "No": 0.06126828766495311}, "ground_truth": 0}, {"key": "34020070", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9959106297401484, "res": {"Yes": 0.9959106297401484, "No": 0.004089310111703808}, "ground_truth": 0}, {"key": "34020070", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9916791058846122, "res": {"Yes": 0.9916791058846122, "No": 0.00832081310464918}, "ground_truth": 0}, {"key": "34020070", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9972216931579564, "res": {"Yes": 0.9972216931579564, "No": 0.0027783126388486808}, "ground_truth": 1}, {"key": "34020070", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.990809385265607, "res": {"Yes": 0.990809385265607, "No": 0.009190576622716858}, "ground_truth": 0}, {"key": "34020070", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9754366003342801, "res": {"Yes": 0.9754366003342801, "No": 0.02456326610042166}, "ground_truth": 0}, {"key": "35176615", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.977948541876278, "res": {"Yes": 0.977948541876278, "No": 0.022051419560511765}, "ground_truth": 0}, {"key": "35176615", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995423872866515, "res": {"Yes": 0.9995423872866515, "No": 0.0004575201639556215}, "ground_truth": 0}, {"key": "35176615", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999806147848957, "res": {"Yes": 0.9999806147848957, "No": 1.9305915624042144e-05}, "ground_truth": 1}, {"key": "35176615", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998428385111608, "res": {"Yes": 0.9998428385111608, "No": 0.00015713393926111778}, "ground_truth": 0}, {"key": "35176615", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996226640291312, "res": {"Yes": 0.9996226640291312, "No": 0.0003773263858866247}, "ground_truth": 0}, {"key": "33296389", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9726957795676844, "res": {"Yes": 0.9726957795676844, "No": 0.027304050986929177}, "ground_truth": 0}, {"key": "33296389", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9913903253668663, "res": {"Yes": 0.9913903253668663, "No": 0.00860966043296712}, "ground_truth": 0}, {"key": "33296389", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999704827216435, "res": {"Yes": 0.9999704827216435, "No": 2.9412553057574058e-05}, "ground_truth": 1}, {"key": "33296389", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999166057074973, "res": {"Yes": 0.9999166057074973, "No": 8.329558594928403e-05}, "ground_truth": 0}, {"key": "33296389", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9962322076098031, "res": {"Yes": 0.9962322076098031, "No": 0.0037677604467211082}, "ground_truth": 0}, {"key": "35399504", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9852988335569391, "res": {"Yes": 0.9852988335569391, "No": 0.014701069981341394}, "ground_truth": 0}, {"key": "35399504", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9983334672105382, "res": {"Yes": 0.9983334672105382, "No": 0.0016664789359338867}, "ground_truth": 0}, {"key": "35399504", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.992355424541165, "res": {"Yes": 0.992355424541165, "No": 0.007644575252550177}, "ground_truth": 1}, {"key": "35399504", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9811864208725278, "res": {"Yes": 0.9811864208725278, "No": 0.018813541046202756}, "ground_truth": 0}, {"key": "35399504", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9889291459886567, "res": {"Yes": 0.9889291459886567, "No": 0.01107078081967198}, "ground_truth": 0}, {"key": "34807886", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9472705452070787, "res": {"Yes": 0.9472705452070787, "No": 0.05272935276641074}, "ground_truth": 0}, {"key": "34807886", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994993973733282, "res": {"Yes": 0.9994993973733282, "No": 0.0005005605437456917}, "ground_truth": 0}, {"key": "34807886", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998379519412053, "res": {"Yes": 0.9998379519412053, "No": 0.0001619794633843818}, "ground_truth": 1}, {"key": "34807886", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997535913236059, "res": {"Yes": 0.9997535913236059, "No": 0.0002463005548745706}, "ground_truth": 0}, {"key": "34807886", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9982089738050612, "res": {"Yes": 0.9982089738050612, "No": 0.0017909657132514753}, "ground_truth": 0}, {"key": "37629813", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.19962168075232006, "res": {"No": 0.800378210425154, "Yes": 0.19962168075232006}, "ground_truth": 0}, {"key": "37629813", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9824335510941964, "res": {"Yes": 0.9824335510941964, "No": 0.017566412164451747}, "ground_truth": 0}, {"key": "37629813", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9964548133661179, "res": {"Yes": 0.9964548133661179, "No": 0.003545140097616314}, "ground_truth": 1}, {"key": "37629813", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9965102147047881, "res": {"Yes": 0.9965102147047881, "No": 0.003489760045509024}, "ground_truth": 0}, {"key": "37629813", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9983082824699978, "res": {"Yes": 0.9983082824699978, "No": 0.0016916658931055634}, "ground_truth": 0}, {"key": "28084389", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.4750106757200282, "res": {"No": 0.524989193716236, "Yes": 0.4750106757200282}, "ground_truth": 0}, {"key": "28084389", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996717402690188, "res": {"Yes": 0.9996717402690188, "No": 0.0003282247404214247}, "ground_truth": 0}, {"key": "28084389", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9970139307997625, "res": {"Yes": 0.9970139307997625, "No": 0.002986093104528789}, "ground_truth": 1}, {"key": "28084389", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.996318357924511, "res": {"Yes": 0.996318357924511, "No": 0.003681656471329064}, "ground_truth": 0}, {"key": "28084389", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996128967438067, "res": {"Yes": 0.9996128967438067, "No": 0.0003870906711497694}, "ground_truth": 0}, {"key": "35391734", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.8266464949988515, "res": {"Yes": 0.8266464949988515, "No": 0.17335319085700568}, "ground_truth": 0}, {"key": "35391734", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.6926076927604903, "res": {"Yes": 0.6926076927604903, "No": 0.30739202242110497}, "ground_truth": 0}, {"key": "35391734", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9850868727635743, "res": {"Yes": 0.9850868727635743, "No": 0.014913084455562988}, "ground_truth": 1}, {"key": "35391734", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.981996760021718, "res": {"Yes": 0.981996760021718, "No": 0.018003195663187914}, "ground_truth": 0}, {"key": "35391734", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9311179388270187, "res": {"Yes": 0.9311179388270187, "No": 0.06888189552534137}, "ground_truth": 0}, {"key": "40214591", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.5245258328621082, "res": {"Yes": 0.5245258328621082, "No": 0.4754739181122685}, "ground_truth": 0}, {"key": "40214591", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9903001173058746, "res": {"Yes": 0.9903001173058746, "No": 0.00969986316765128}, "ground_truth": 0}, {"key": "40214591", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.995240914903544, "res": {"Yes": 0.995240914903544, "No": 0.00475913809316012}, "ground_truth": 1}, {"key": "40214591", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.943970523905985, "res": {"Yes": 0.943970523905985, "No": 0.05602938464857809}, "ground_truth": 0}, {"key": "40214591", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9991598762717193, "res": {"Yes": 0.9991598762717193, "No": 0.0008401248888297481}, "ground_truth": 0}, {"key": "26283171", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.005258356781466395, "res": {"No": 0.9947415781589558, "Yes": 0.005258356781466395}, "ground_truth": 0}, {"key": "26283171", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.7992908675302581, "res": {"Yes": 0.7992908675302581, "No": 0.20070871522141476}, "ground_truth": 0}, {"key": "26283171", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.991398174235142, "res": {"Yes": 0.991398174235142, "No": 0.008601752649782678}, "ground_truth": 1}, {"key": "26283171", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9963569298411501, "res": {"Yes": 0.9963569298411501, "No": 0.00364300167926204}, "ground_truth": 0}, {"key": "26283171", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9680491405142488, "res": {"Yes": 0.9680491405142488, "No": 0.03195038973354406}, "ground_truth": 0}, {"key": "37084030", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9928788201225469, "res": {"Yes": 0.9928788201225469, "No": 0.007121128990835148}, "ground_truth": 0}, {"key": "37084030", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9949216162150348, "res": {"Yes": 0.9949216162150348, "No": 0.005078400140327284}, "ground_truth": 0}, {"key": "37084030", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997456066634737, "res": {"Yes": 0.9997456066634737, "No": 0.00025438059676115526}, "ground_truth": 1}, {"key": "37084030", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9984286445737721, "res": {"Yes": 0.9984286445737721, "No": 0.0015713753877042692}, "ground_truth": 0}, {"key": "37084030", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9989130014114352, "res": {"Yes": 0.9989130014114352, "No": 0.001086908772834774}, "ground_truth": 0}, {"key": "39027295", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9695679888616398, "res": {"Yes": 0.9695679888616398, "No": 0.03043184457258311}, "ground_truth": 0}, {"key": "39027295", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.995880240588484, "res": {"Yes": 0.995880240588484, "No": 0.0041197788262941305}, "ground_truth": 0}, {"key": "39027295", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9991114462578782, "res": {"Yes": 0.9991114462578782, "No": 0.0008885319069362525}, "ground_truth": 1}, {"key": "39027295", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9969074105370063, "res": {"Yes": 0.9969074105370063, "No": 0.0030926096425035287}, "ground_truth": 0}, {"key": "39027295", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9957095538750547, "res": {"Yes": 0.9957095538750547, "No": 0.004290452816417158}, "ground_truth": 0}, {"key": "14018647", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9046079984528324, "res": {"Yes": 0.9046079984528324, "No": 0.09539191047335993}, "ground_truth": 0}, {"key": "14018647", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995654914661867, "res": {"Yes": 0.9995654914661867, "No": 0.00043443918001805537}, "ground_truth": 0}, {"key": "14018647", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998530848030946, "res": {"Yes": 0.9998530848030946, "No": 0.00014687098641138666}, "ground_truth": 1}, {"key": "14018647", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9974104620528821, "res": {"Yes": 0.9974104620528821, "No": 0.0025895297071742}, "ground_truth": 0}, {"key": "14018647", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997971954076322, "res": {"Yes": 0.9997971954076322, "No": 0.00020271368362527884}, "ground_truth": 0}, {"key": "37424289", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.41549612119540147, "res": {"No": 0.5845035058824374, "Yes": 0.41549612119540147}, "ground_truth": 0}, {"key": "37424289", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.6663370602459371, "res": {"Yes": 0.6663370602459371, "No": 0.3336624919672367}, "ground_truth": 0}, {"key": "37424289", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9631867738402536, "res": {"Yes": 0.9631867738402536, "No": 0.03681284300465581}, "ground_truth": 1}, {"key": "37424289", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9834040850563114, "res": {"Yes": 0.9834040850563114, "No": 0.016595538247808923}, "ground_truth": 0}, {"key": "37424289", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9312582036859267, "res": {"Yes": 0.9312582036859267, "No": 0.06874133408865093}, "ground_truth": 0}, {"key": "37498031", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.823684113480077, "res": {"Yes": 0.823684113480077, "No": 0.1763156014029271}, "ground_truth": 0}, {"key": "37498031", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9931695298344416, "res": {"Yes": 0.9931695298344416, "No": 0.0068304460504427554}, "ground_truth": 0}, {"key": "37498031", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9990092418252982, "res": {"Yes": 0.9990092418252982, "No": 0.000990677604474364}, "ground_truth": 1}, {"key": "37498031", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9956506947423633, "res": {"Yes": 0.9956506947423633, "No": 0.004349323340018766}, "ground_truth": 0}, {"key": "37498031", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9372688233239092, "res": {"Yes": 0.9372688233239092, "No": 0.06273108378763592}, "ground_truth": 0}, {"key": "30104095", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 3.408152872687371e-05, "res": {"No": 0.9999658339276736, "Yes": 3.408152872687371e-05}, "ground_truth": 0}, {"key": "30104095", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9986774217784988, "res": {"Yes": 0.9986774217784988, "No": 0.0013225515708770589}, "ground_truth": 0}, {"key": "30104095", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995002313659682, "res": {"Yes": 0.9995002313659682, "No": 0.0004996758348994948}, "ground_truth": 1}, {"key": "30104095", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998642883696566, "res": {"Yes": 0.9998642883696566, "No": 0.0001355807709048456}, "ground_truth": 0}, {"key": "30104095", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9956005966520086, "res": {"Yes": 0.9956005966520086, "No": 0.004399445802359494}, "ground_truth": 0}, {"key": "37911407", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9208885222085881, "res": {"Yes": 0.9208885222085881, "No": 0.07911101973042423}, "ground_truth": 0}, {"key": "37911407", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9901163643631503, "res": {"Yes": 0.9901163643631503, "No": 0.00988352845406708}, "ground_truth": 0}, {"key": "37911407", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9990093609072068, "res": {"Yes": 0.9990093609072068, "No": 0.000990647290203478}, "ground_truth": 1}, {"key": "37911407", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9936435167607893, "res": {"Yes": 0.9936435167607893, "No": 0.006356438771779636}, "ground_truth": 0}, {"key": "37911407", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9962731551890072, "res": {"Yes": 0.9962731551890072, "No": 0.003726790021088973}, "ground_truth": 0}, {"key": "39177472", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9746248044438262, "res": {"Yes": 0.9746248044438262, "No": 0.025374691482987752}, "ground_truth": 0}, {"key": "39177472", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9984644180143827, "res": {"Yes": 0.9984644180143827, "No": 0.0015355625238899736}, "ground_truth": 0}, {"key": "39177472", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9988142847165352, "res": {"Yes": 0.9988142847165352, "No": 0.0011857078985671532}, "ground_truth": 1}, {"key": "39177472", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9972833477963889, "res": {"Yes": 0.9972833477963889, "No": 0.002716649389801411}, "ground_truth": 0}, {"key": "39177472", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9982045747078071, "res": {"Yes": 0.9982045747078071, "No": 0.0017953691923524042}, "ground_truth": 0}, {"key": "32325454", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.4008399333771944, "res": {"No": 0.5991597631410855, "Yes": 0.4008399333771944}, "ground_truth": 0}, {"key": "32325454", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9913209592212477, "res": {"Yes": 0.9913209592212477, "No": 0.008678961502818916}, "ground_truth": 0}, {"key": "32325454", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9983739865945634, "res": {"Yes": 0.9983739865945634, "No": 0.0016259240153101758}, "ground_truth": 1}, {"key": "32325454", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9993321056407993, "res": {"Yes": 0.9993321056407993, "No": 0.000667859724328209}, "ground_truth": 0}, {"key": "32325454", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9977661207303795, "res": {"Yes": 0.9977661207303795, "No": 0.0022339119337365463}, "ground_truth": 0}, {"key": "38395319", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9784254559599664, "res": {"Yes": 0.9784254559599664, "No": 0.021574465853035057}, "ground_truth": 0}, {"key": "38395319", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9991378643526775, "res": {"Yes": 0.9991378643526775, "No": 0.0008621343853273117}, "ground_truth": 0}, {"key": "38395319", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994172316754619, "res": {"Yes": 0.9994172316754619, "No": 0.0005827338464220989}, "ground_truth": 1}, {"key": "38395319", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997681196883961, "res": {"Yes": 0.9997681196883961, "No": 0.0002317920324891231}, "ground_truth": 0}, {"key": "38395319", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.999774078524101, "res": {"Yes": 0.999774078524101, "No": 0.00022579162438630154}, "ground_truth": 0}, {"key": "38235895", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9828569119939157, "res": {"Yes": 0.9828569119939157, "No": 0.017142868890096672}, "ground_truth": 0}, {"key": "38235895", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9978527621206191, "res": {"Yes": 0.9978527621206191, "No": 0.002147215698637923}, "ground_truth": 0}, {"key": "38235895", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9979705237774348, "res": {"Yes": 0.9979705237774348, "No": 0.0020294022244657086}, "ground_truth": 1}, {"key": "38235895", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9985528341558492, "res": {"Yes": 0.9985528341558492, "No": 0.0014471256503936641}, "ground_truth": 0}, {"key": "38235895", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9947817983635301, "res": {"Yes": 0.9947817983635301, "No": 0.005218145022067574}, "ground_truth": 0}, {"key": "26543267", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.3113270224904929, "res": {"No": 0.6886728082031814, "Yes": 0.3113270224904929}, "ground_truth": 0}, {"key": "26543267", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9851858952248327, "res": {"Yes": 0.9851858952248327, "No": 0.014814106652162783}, "ground_truth": 0}, {"key": "26543267", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9983743436131649, "res": {"Yes": 0.9983743436131649, "No": 0.001625589922254267}, "ground_truth": 1}, {"key": "26543267", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9993962752751542, "res": {"Yes": 0.9993962752751542, "No": 0.0006036807858399093}, "ground_truth": 0}, {"key": "26543267", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9960563218043312, "res": {"Yes": 0.9960563218043312, "No": 0.00394372725123291}, "ground_truth": 0}, {"key": "39054728", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.890652828480567, "res": {"Yes": 0.890652828480567, "No": 0.10934669022891161}, "ground_truth": 0}, {"key": "39054728", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9943369041254754, "res": {"Yes": 0.9943369041254754, "No": 0.005663048089553392}, "ground_truth": 0}, {"key": "39054728", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9981390191761634, "res": {"Yes": 0.9981390191761634, "No": 0.0018609224003781975}, "ground_truth": 1}, {"key": "39054728", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9990042474908796, "res": {"Yes": 0.9990042474908796, "No": 0.0009956853141010879}, "ground_truth": 0}, {"key": "39054728", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9948153051683476, "res": {"Yes": 0.9948153051683476, "No": 0.005184670437877891}, "ground_truth": 0}, {"key": "39158443", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9967040313693406, "res": {"Yes": 0.9967040313693406, "No": 0.003296018678270393}, "ground_truth": 0}, {"key": "39158443", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994280693147415, "res": {"Yes": 0.9994280693147415, "No": 0.0005719029207253518}, "ground_truth": 0}, {"key": "39158443", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999364370498076, "res": {"Yes": 0.999364370498076, "No": 0.0006355998241305933}, "ground_truth": 1}, {"key": "39158443", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998864501472726, "res": {"Yes": 0.9998864501472726, "No": 0.00011344357286953472}, "ground_truth": 0}, {"key": "39158443", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998248417619957, "res": {"Yes": 0.9998248417619957, "No": 0.0001750436826581534}, "ground_truth": 0}, {"key": "36254201", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9939538728359221, "res": {"Yes": 0.9939538728359221, "No": 0.0060460627736758435}, "ground_truth": 0}, {"key": "36254201", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999244725263433, "res": {"Yes": 0.9999244725263433, "No": 7.540989938425346e-05}, "ground_truth": 0}, {"key": "36254201", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999670259236568, "res": {"Yes": 0.9999670259236568, "No": 3.28737201178708e-05}, "ground_truth": 1}, {"key": "36254201", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9991325047917204, "res": {"Yes": 0.9991325047917204, "No": 0.0008674974375330627}, "ground_truth": 0}, {"key": "36254201", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996229023192102, "res": {"Yes": 0.9996229023192102, "No": 0.000377093270220307}, "ground_truth": 0}, {"key": "23434347", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 8.537221904528651e-07, "res": {"No": 0.999999091165773, "Yes": 8.537221904528651e-07}, "ground_truth": 0}, {"key": "23434347", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9976014223953652, "res": {"Yes": 0.9976014223953652, "No": 0.0023985101625191687}, "ground_truth": 0}, {"key": "23434347", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9978232030662791, "res": {"Yes": 0.9978232030662791, "No": 0.0021767426866885512}, "ground_truth": 1}, {"key": "23434347", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9985562860591081, "res": {"Yes": 0.9985562860591081, "No": 0.0014437283203712328}, "ground_truth": 0}, {"key": "23434347", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9427582800622316, "res": {"Yes": 0.9427582800622316, "No": 0.057241604133059336}, "ground_truth": 0}, {"key": "34397620", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.12476472690192267, "res": {"No": 0.8752352076537305, "Yes": 0.12476472690192267}, "ground_truth": 0}, {"key": "34397620", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9949127617505281, "res": {"Yes": 0.9949127617505281, "No": 0.005087194902684499}, "ground_truth": 0}, {"key": "34397620", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9771211593607129, "res": {"Yes": 0.9771211593607129, "No": 0.02287874420778993}, "ground_truth": 1}, {"key": "34397620", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.944689251460146, "res": {"Yes": 0.944689251460146, "No": 0.05531021875013027}, "ground_truth": 0}, {"key": "34397620", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.48049464272961034, "res": {"No": 0.5195049032942028, "Yes": 0.48049464272961034}, "ground_truth": 0}, {"key": "34340916", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.8508257929511347, "res": {"Yes": 0.8508257929511347, "No": 0.14917418932170295}, "ground_truth": 0}, {"key": "34340916", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9901284017760223, "res": {"Yes": 0.9901284017760223, "No": 0.009871480863128555}, "ground_truth": 0}, {"key": "34340916", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996996178037684, "res": {"Yes": 0.9996996178037684, "No": 0.000300367750208749}, "ground_truth": 1}, {"key": "34340916", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999961110815618, "res": {"Yes": 0.9999961110815618, "No": 3.7704960006467597e-06}, "ground_truth": 0}, {"key": "34340916", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999657147257535, "res": {"Yes": 0.9999657147257535, "No": 3.419014887428777e-05}, "ground_truth": 0}, {"key": "30375089", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9476414391674118, "res": {"Yes": 0.9476414391674118, "No": 0.05235846262513169}, "ground_truth": 0}, {"key": "30375089", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9731956926971512, "res": {"Yes": 0.9731956926971512, "No": 0.026804218423233935}, "ground_truth": 0}, {"key": "30375089", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9991448876471261, "res": {"Yes": 0.9991448876471261, "No": 0.0008550430837039885}, "ground_truth": 1}, {"key": "30375089", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9973774905020355, "res": {"Yes": 0.9973774905020355, "No": 0.002622478279741995}, "ground_truth": 0}, {"key": "30375089", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9972976918259352, "res": {"Yes": 0.9972976918259352, "No": 0.0027023208530266183}, "ground_truth": 0}, {"key": "35807797", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.0904617020148001, "res": {"No": 0.9095378423755406, "Yes": 0.0904617020148001}, "ground_truth": 0}, {"key": "35807797", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9741906220157028, "res": {"Yes": 0.9741906220157028, "No": 0.02580933321375734}, "ground_truth": 0}, {"key": "35807797", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9845107674398421, "res": {"Yes": 0.9845107674398421, "No": 0.015488930058285319}, "ground_truth": 1}, {"key": "35807797", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9050785259870163, "res": {"Yes": 0.9050785259870163, "No": 0.09492085878447627}, "ground_truth": 0}, {"key": "35807797", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9548933596732969, "res": {"Yes": 0.9548933596732969, "No": 0.04510628433235067}, "ground_truth": 0}, {"key": "34188172", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.8171499447958566, "res": {"Yes": 0.8171499447958566, "No": 0.18284971372784076}, "ground_truth": 0}, {"key": "34188172", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997375065874259, "res": {"Yes": 0.9997375065874259, "No": 0.00026244553058435926}, "ground_truth": 0}, {"key": "34188172", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.931779956824328, "res": {"Yes": 0.931779956824328, "No": 0.06821986369600566}, "ground_truth": 1}, {"key": "34188172", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9992508095833891, "res": {"Yes": 0.9992508095833891, "No": 0.0007491542907502747}, "ground_truth": 0}, {"key": "34188172", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9971390798542408, "res": {"Yes": 0.9971390798542408, "No": 0.0028609172179170812}, "ground_truth": 0}, {"key": "37075567", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9215907555140999, "res": {"Yes": 0.9215907555140999, "No": 0.07840912249815919}, "ground_truth": 0}, {"key": "37075567", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9779032785755313, "res": {"Yes": 0.9779032785755313, "No": 0.022096713321550453}, "ground_truth": 0}, {"key": "37075567", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.977190370326352, "res": {"Yes": 0.977190370326352, "No": 0.02280960408310006}, "ground_truth": 1}, {"key": "37075567", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9981222720411265, "res": {"Yes": 0.9981222720411265, "No": 0.00187768320473493}, "ground_truth": 0}, {"key": "37075567", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.7513780187474036, "res": {"Yes": 0.7513780187474036, "No": 0.2486217534282114}, "ground_truth": 0}, {"key": "35559735", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.8091248560699957, "res": {"Yes": 0.8091248560699957, "No": 0.19087471663545794}, "ground_truth": 0}, {"key": "35559735", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9992846056166711, "res": {"Yes": 0.9992846056166711, "No": 0.0007153003739946568}, "ground_truth": 0}, {"key": "35559735", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995857401493166, "res": {"Yes": 0.9995857401493166, "No": 0.00041417842447090587}, "ground_truth": 1}, {"key": "35559735", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9985149261151884, "res": {"Yes": 0.9985149261151884, "No": 0.0014850586686491946}, "ground_truth": 0}, {"key": "35559735", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9991977226367158, "res": {"Yes": 0.9991977226367158, "No": 0.0008022277603131547}, "ground_truth": 0}, {"key": "33005019", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.7032476311852487, "res": {"Yes": 0.7032476311852487, "No": 0.29675206108201535}, "ground_truth": 0}, {"key": "33005019", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9989950853652388, "res": {"Yes": 0.9989950853652388, "No": 0.0010048409587439212}, "ground_truth": 0}, {"key": "33005019", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9985224139068429, "res": {"Yes": 0.9985224139068429, "No": 0.001477526312564936}, "ground_truth": 1}, {"key": "33005019", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999109064319083, "res": {"Yes": 0.999109064319083, "No": 0.0008909124461716829}, "ground_truth": 0}, {"key": "33005019", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9846516521494925, "res": {"Yes": 0.9846516521494925, "No": 0.015348268693609316}, "ground_truth": 0}, {"key": "30808252", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.7566544520275205, "res": {"Yes": 0.7566544520275205, "No": 0.24334507043662565}, "ground_truth": 0}, {"key": "30808252", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9938987593235317, "res": {"Yes": 0.9938987593235317, "No": 0.006101217119005847}, "ground_truth": 0}, {"key": "30808252", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9602797372482625, "res": {"Yes": 0.9602797372482625, "No": 0.0397200849931521}, "ground_truth": 1}, {"key": "30808252", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9667788467951511, "res": {"Yes": 0.9667788467951511, "No": 0.03322109211870744}, "ground_truth": 0}, {"key": "30808252", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9458977551577971, "res": {"Yes": 0.9458977551577971, "No": 0.05410199595513512}, "ground_truth": 0}, {"key": "15159017", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.2120279867608923, "res": {"No": 0.7879719154903965, "Yes": 0.2120279867608923}, "ground_truth": 0}, {"key": "15159017", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9975754420003093, "res": {"Yes": 0.9975754420003093, "No": 0.002424536274231399}, "ground_truth": 0}, {"key": "15159017", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998155492131384, "res": {"Yes": 0.9998155492131384, "No": 0.0001843849253338335}, "ground_truth": 1}, {"key": "15159017", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9938093867973011, "res": {"Yes": 0.9938093867973011, "No": 0.0061905954588868835}, "ground_truth": 0}, {"key": "15159017", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9964069952448878, "res": {"Yes": 0.9964069952448878, "No": 0.003592946359967164}, "ground_truth": 0}, {"key": "24493400", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9814605535869028, "res": {"Yes": 0.9814605535869028, "No": 0.018539342191045673}, "ground_truth": 0}, {"key": "24493400", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9983297853635005, "res": {"Yes": 0.9983297853635005, "No": 0.0016701893094934553}, "ground_truth": 0}, {"key": "24493400", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9901830984747232, "res": {"Yes": 0.9901830984747232, "No": 0.009816823317891741}, "ground_truth": 1}, {"key": "24493400", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9968859637004391, "res": {"Yes": 0.9968859637004391, "No": 0.003114029600486694}, "ground_truth": 0}, {"key": "24493400", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9632042481332135, "res": {"Yes": 0.9632042481332135, "No": 0.0367956811775537}, "ground_truth": 0}, {"key": "37791071", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.04797830257298316, "res": {"No": 0.9520215765541213, "Yes": 0.04797830257298316}, "ground_truth": 0}, {"key": "37791071", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9989204957843416, "res": {"Yes": 0.9989204957843416, "No": 0.0010794628744845737}, "ground_truth": 0}, {"key": "37791071", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996065813694129, "res": {"Yes": 0.9996065813694129, "No": 0.0003933160778530937}, "ground_truth": 1}, {"key": "37791071", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994441417464723, "res": {"Yes": 0.9994441417464723, "No": 0.0005558175305729945}, "ground_truth": 0}, {"key": "37791071", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.995833784854706, "res": {"Yes": 0.995833784854706, "No": 0.004166193457464898}, "ground_truth": 0}, {"key": "33528627", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9992315242288684, "res": {"Yes": 0.9992315242288684, "No": 0.0007684310647884153}, "ground_truth": 0}, {"key": "33528627", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.8442201982616293, "res": {"Yes": 0.8442201982616293, "No": 0.1557794895861605}, "ground_truth": 0}, {"key": "33528627", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999142218341572, "res": {"Yes": 0.9999142218341572, "No": 8.566647492236489e-05}, "ground_truth": 1}, {"key": "33528627", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9992455685566148, "res": {"Yes": 0.9992455685566148, "No": 0.0007543696938297623}, "ground_truth": 0}, {"key": "33528627", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9986400934121323, "res": {"Yes": 0.9986400934121323, "No": 0.0013598888642995892}, "ground_truth": 0}, {"key": "39925662", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9430546053817341, "res": {"Yes": 0.9430546053817341, "No": 0.05694526703029891}, "ground_truth": 0}, {"key": "39925662", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996466042090082, "res": {"Yes": 0.9996466042090082, "No": 0.00035334911338497793}, "ground_truth": 0}, {"key": "39925662", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996369521478262, "res": {"Yes": 0.9996369521478262, "No": 0.00036293166213629267}, "ground_truth": 1}, {"key": "39925662", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9993855644928546, "res": {"Yes": 0.9993855644928546, "No": 0.0006144262822168924}, "ground_truth": 0}, {"key": "39925662", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9836204075905058, "res": {"Yes": 0.9836204075905058, "No": 0.016379598704042944}, "ground_truth": 0}, {"key": "29213416", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.13998729646858754, "res": {"No": 0.8600126332645314, "Yes": 0.13998729646858754}, "ground_truth": 0}, {"key": "29213416", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9883543753898668, "res": {"Yes": 0.9883543753898668, "No": 0.0116455598543533}, "ground_truth": 0}, {"key": "29213416", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9742203684062912, "res": {"Yes": 0.9742203684062912, "No": 0.02577951744084126}, "ground_truth": 1}, {"key": "29213416", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9993975857243853, "res": {"Yes": 0.9993975857243853, "No": 0.0006023930612826646}, "ground_truth": 0}, {"key": "29213416", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.976767660822278, "res": {"Yes": 0.976767660822278, "No": 0.02323222396545266}, "ground_truth": 0}, {"key": "34492745", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.3683009043732209, "res": {"No": 0.6316989394255359, "Yes": 0.3683009043732209}, "ground_truth": 0}, {"key": "34492745", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9990256464409935, "res": {"Yes": 0.9990256464409935, "No": 0.0009742861587374989}, "ground_truth": 0}, {"key": "34492745", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998269870484672, "res": {"Yes": 0.9998269870484672, "No": 0.00017298719215383377}, "ground_truth": 1}, {"key": "34492745", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9986254733283209, "res": {"Yes": 0.9986254733283209, "No": 0.0013744611714168586}, "ground_truth": 0}, {"key": "34492745", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9917107657454354, "res": {"Yes": 0.9917107657454354, "No": 0.008289206766484527}, "ground_truth": 0}, {"key": "34191937", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.002322560588489453, "res": {"No": 0.9976773609100703, "Yes": 0.002322560588489453}, "ground_truth": 0}, {"key": "34191937", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9943242914395749, "res": {"Yes": 0.9943242914395749, "No": 0.005675670913268687}, "ground_truth": 0}, {"key": "34191937", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.995611938499048, "res": {"Yes": 0.995611938499048, "No": 0.004388064664541908}, "ground_truth": 1}, {"key": "34191937", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9966501562880499, "res": {"Yes": 0.9966501562880499, "No": 0.0033498121813015425}, "ground_truth": 0}, {"key": "34191937", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9981605147306538, "res": {"Yes": 0.9981605147306538, "No": 0.0018395058559093906}, "ground_truth": 0}, {"key": "34933372", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.004019440761834383, "res": {"No": 0.9959803973168554, "Yes": 0.004019440761834383}, "ground_truth": 0}, {"key": "34933372", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9829668909893707, "res": {"Yes": 0.9829668909893707, "No": 0.017032591895975346}, "ground_truth": 0}, {"key": "34933372", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9968976916343455, "res": {"Yes": 0.9968976916343455, "No": 0.0031020565010491646}, "ground_truth": 1}, {"key": "34933372", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9948771318762701, "res": {"Yes": 0.9948771318762701, "No": 0.005122531646140622}, "ground_truth": 0}, {"key": "34933372", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9928978587564551, "res": {"Yes": 0.9928978587564551, "No": 0.007101574918685594}, "ground_truth": 0}, {"key": "38714379", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.008849662113116658, "res": {"No": 0.9911501911392492, "Yes": 0.008849662113116658}, "ground_truth": 0}, {"key": "38714379", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.044048825052397725, "res": {"No": 0.9559510195725764, "Yes": 0.044048825052397725}, "ground_truth": 0}, {"key": "38714379", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6503344369675836, "res": {"Yes": 0.6503344369675836, "No": 0.3496653936214485}, "ground_truth": 1}, {"key": "38714379", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.373539979491413, "res": {"No": 0.626459657609673, "Yes": 0.373539979491413}, "ground_truth": 0}, {"key": "38714379", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.31252884814573817, "res": {"No": 0.6874707471133376, "Yes": 0.31252884814573817}, "ground_truth": 0}, {"key": "39220660", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9064888762138765, "res": {"Yes": 0.9064888762138765, "No": 0.09351064489719123}, "ground_truth": 0}, {"key": "39220660", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.995142907508535, "res": {"Yes": 0.995142907508535, "No": 0.004857118102395058}, "ground_truth": 0}, {"key": "39220660", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9870845071510767, "res": {"Yes": 0.9870845071510767, "No": 0.012915463042489963}, "ground_truth": 1}, {"key": "39220660", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9909598005556375, "res": {"Yes": 0.9909598005556375, "No": 0.009040109426047868}, "ground_truth": 0}, {"key": "39220660", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.7859995099707561, "res": {"Yes": 0.7859995099707561, "No": 0.21400012068753513}, "ground_truth": 0}, {"key": "41028780", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.06978425972614885, "res": {"No": 0.9302155272067202, "Yes": 0.06978425972614885}, "ground_truth": 0}, {"key": "41028780", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9474073741094791, "res": {"Yes": 0.9474073741094791, "No": 0.05259251866803146}, "ground_truth": 0}, {"key": "41028780", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8539633430020592, "res": {"Yes": 0.8539633430020592, "No": 0.14603629387234102}, "ground_truth": 1}, {"key": "41028780", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8827320134225282, "res": {"Yes": 0.8827320134225282, "No": 0.1172676910361409}, "ground_truth": 0}, {"key": "41028780", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9671693167259047, "res": {"Yes": 0.9671693167259047, "No": 0.03283040536989795}, "ground_truth": 0}, {"key": "39457108", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.7494775014265829, "res": {"Yes": 0.7494775014265829, "No": 0.2505221347714157}, "ground_truth": 0}, {"key": "39457108", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9975438915821945, "res": {"Yes": 0.9975438915821945, "No": 0.002456053218897015}, "ground_truth": 0}, {"key": "39457108", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9991161991822088, "res": {"Yes": 0.9991161991822088, "No": 0.0008837697438614436}, "ground_truth": 1}, {"key": "39457108", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9992716291908943, "res": {"Yes": 0.9992716291908943, "No": 0.0007283702892506757}, "ground_truth": 0}, {"key": "39457108", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9962752890091361, "res": {"Yes": 0.9962752890091361, "No": 0.0037246797649019983}, "ground_truth": 0}, {"key": "38288018", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.6176449877901635, "res": {"Yes": 0.6176449877901635, "No": 0.3823548786228534}, "ground_truth": 0}, {"key": "38288018", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.995799974182944, "res": {"Yes": 0.995799974182944, "No": 0.004200022905572281}, "ground_truth": 0}, {"key": "38288018", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994628391225894, "res": {"Yes": 0.9994628391225894, "No": 0.0005370689893067622}, "ground_truth": 1}, {"key": "38288018", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9941113637851203, "res": {"Yes": 0.9941113637851203, "No": 0.005888582337602558}, "ground_truth": 0}, {"key": "38288018", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9950612350036335, "res": {"Yes": 0.9950612350036335, "No": 0.004938755018951301}, "ground_truth": 0}, {"key": "40106293", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9675686795332065, "res": {"Yes": 0.9675686795332065, "No": 0.03243124399529703}, "ground_truth": 0}, {"key": "40106293", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998835895922175, "res": {"Yes": 0.9998835895922175, "No": 0.00011639208770736278}, "ground_truth": 0}, {"key": "40106293", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9981783258661553, "res": {"Yes": 0.9981783258661553, "No": 0.0018216683900037609}, "ground_truth": 1}, {"key": "40106293", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997395325275436, "res": {"Yes": 0.9997395325275436, "No": 0.0002603986062465455}, "ground_truth": 0}, {"key": "40106293", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9898339828565692, "res": {"Yes": 0.9898339828565692, "No": 0.010165933514144604}, "ground_truth": 0}, {"key": "39948797", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9786475876636099, "res": {"Yes": 0.9786475876636099, "No": 0.021352381497028}, "ground_truth": 0}, {"key": "39948797", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9828898451322081, "res": {"Yes": 0.9828898451322081, "No": 0.017110184636341397}, "ground_truth": 0}, {"key": "39948797", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999333535156419, "res": {"Yes": 0.999333535156419, "No": 0.0006663440028360323}, "ground_truth": 1}, {"key": "39948797", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9918404513155977, "res": {"Yes": 0.9918404513155977, "No": 0.008159519471799244}, "ground_truth": 0}, {"key": "39948797", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9972416123616038, "res": {"Yes": 0.9972416123616038, "No": 0.0027583985587223084}, "ground_truth": 0}, {"key": "31853399", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.564049480471387, "res": {"Yes": 0.564049480471387, "No": 0.43595012690868756}, "ground_truth": 0}, {"key": "31853399", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9941369332546933, "res": {"Yes": 0.9941369332546933, "No": 0.0058630460892245}, "ground_truth": 0}, {"key": "31853399", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999836640924547, "res": {"Yes": 0.999836640924547, "No": 0.00016324604773395522}, "ground_truth": 1}, {"key": "31853399", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996866322899207, "res": {"Yes": 0.9996866322899207, "No": 0.00031334084445210384}, "ground_truth": 0}, {"key": "31853399", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9990624195035159, "res": {"Yes": 0.9990624195035159, "No": 0.0009375504389690254}, "ground_truth": 0}, {"key": "35273252", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.26478063189097595, "res": {"No": 0.7352191433431986, "Yes": 0.26478063189097595}, "ground_truth": 0}, {"key": "35273252", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9891781194692552, "res": {"Yes": 0.9891781194692552, "No": 0.010821670797187207}, "ground_truth": 0}, {"key": "35273252", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9992672365023945, "res": {"Yes": 0.9992672365023945, "No": 0.0007327313725397863}, "ground_truth": 1}, {"key": "35273252", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9976531584487703, "res": {"Yes": 0.9976531584487703, "No": 0.0023467649881467913}, "ground_truth": 0}, {"key": "35273252", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9751059609420712, "res": {"Yes": 0.9751059609420712, "No": 0.02489371791283128}, "ground_truth": 0}, {"key": "37130459", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9415243200922649, "res": {"Yes": 0.9415243200922649, "No": 0.05847548923382961}, "ground_truth": 0}, {"key": "37130459", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.960180592523596, "res": {"Yes": 0.960180592523596, "No": 0.039819143096601346}, "ground_truth": 0}, {"key": "37130459", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9945947377857315, "res": {"Yes": 0.9945947377857315, "No": 0.00540520446361786}, "ground_truth": 1}, {"key": "37130459", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9988279623732367, "res": {"Yes": 0.9988279623732367, "No": 0.0011719772491544374}, "ground_truth": 0}, {"key": "37130459", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9968131184218337, "res": {"Yes": 0.9968131184218337, "No": 0.0031868780790162373}, "ground_truth": 0}, {"key": "21734003", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9772936345114096, "res": {"Yes": 0.9772936345114096, "No": 0.02270636039793694}, "ground_truth": 0}, {"key": "21734003", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997421506688544, "res": {"Yes": 0.9997421506688544, "No": 0.0002577635154929119}, "ground_truth": 0}, {"key": "21734003", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9955536862518046, "res": {"Yes": 0.9955536862518046, "No": 0.004446362201207705}, "ground_truth": 1}, {"key": "21734003", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9987037020210265, "res": {"Yes": 0.9987037020210265, "No": 0.0012962678863260228}, "ground_truth": 0}, {"key": "21734003", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9986134613341963, "res": {"Yes": 0.9986134613341963, "No": 0.0013865411698287763}, "ground_truth": 0}, {"key": "33990737", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9041785066272391, "res": {"Yes": 0.9041785066272391, "No": 0.09582135562564194}, "ground_truth": 0}, {"key": "33990737", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9871475779434777, "res": {"Yes": 0.9871475779434777, "No": 0.012852293513838589}, "ground_truth": 0}, {"key": "33990737", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996000351372798, "res": {"Yes": 0.9996000351372798, "No": 0.0003998609485172632}, "ground_truth": 1}, {"key": "33990737", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999754187196919, "res": {"Yes": 0.999754187196919, "No": 0.00024569146906011006}, "ground_truth": 0}, {"key": "33990737", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9957119166966295, "res": {"Yes": 0.9957119166966295, "No": 0.004288040972951824}, "ground_truth": 0}, {"key": "34559912", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.199463244045499, "res": {"No": 0.8005366129418352, "Yes": 0.199463244045499}, "ground_truth": 0}, {"key": "34559912", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9831981100262592, "res": {"Yes": 0.9831981100262592, "No": 0.016801871499458755}, "ground_truth": 0}, {"key": "34559912", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9982285743297988, "res": {"Yes": 0.9982285743297988, "No": 0.0017714586070100746}, "ground_truth": 1}, {"key": "34559912", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8537856004926622, "res": {"Yes": 0.8537856004926622, "No": 0.14621433293216543}, "ground_truth": 0}, {"key": "34559912", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9332230119084156, "res": {"Yes": 0.9332230119084156, "No": 0.06677689109385039}, "ground_truth": 0}, {"key": "39820439", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9706734422675305, "res": {"Yes": 0.9706734422675305, "No": 0.02932645116718399}, "ground_truth": 0}, {"key": "39820439", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9983026930574024, "res": {"Yes": 0.9983026930574024, "No": 0.0016972279100658817}, "ground_truth": 0}, {"key": "39820439", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9924829335343699, "res": {"Yes": 0.9924829335343699, "No": 0.007517068831881555}, "ground_truth": 1}, {"key": "39820439", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9982586286495553, "res": {"Yes": 0.9982586286495553, "No": 0.0017413749093403367}, "ground_truth": 0}, {"key": "39820439", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9900460171144155, "res": {"Yes": 0.9900460171144155, "No": 0.009953881533629604}, "ground_truth": 0}, {"key": "34759328", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9920621514385813, "res": {"Yes": 0.9920621514385813, "No": 0.007937785078553139}, "ground_truth": 0}, {"key": "34759328", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.6892309418656594, "res": {"Yes": 0.6892309418656594, "No": 0.310768567429435}, "ground_truth": 0}, {"key": "34759328", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9772010843003063, "res": {"Yes": 0.9772010843003063, "No": 0.022798892927773977}, "ground_truth": 1}, {"key": "34759328", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9627377503233545, "res": {"Yes": 0.9627377503233545, "No": 0.03726210203780338}, "ground_truth": 0}, {"key": "34759328", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9287158996660043, "res": {"Yes": 0.9287158996660043, "No": 0.07128399149377232}, "ground_truth": 0}, {"key": "36939137", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9919986813325626, "res": {"Yes": 0.9919986813325626, "No": 0.008001269999213868}, "ground_truth": 0}, {"key": "36939137", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9971678835299956, "res": {"Yes": 0.9971678835299956, "No": 0.002832071093015005}, "ground_truth": 0}, {"key": "36939137", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9963359955059891, "res": {"Yes": 0.9963359955059891, "No": 0.0036639554173430442}, "ground_truth": 1}, {"key": "36939137", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.927939362930979, "res": {"Yes": 0.927939362930979, "No": 0.07206066826319603}, "ground_truth": 0}, {"key": "36939137", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.36952657118077553, "res": {"No": 0.6304734083261759, "Yes": 0.36952657118077553}, "ground_truth": 0}, {"key": "35851522", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.8161149286514544, "res": {"Yes": 0.8161149286514544, "No": 0.18388478242134138}, "ground_truth": 0}, {"key": "35851522", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9992709144121535, "res": {"Yes": 0.9992709144121535, "No": 0.0007290006021218924}, "ground_truth": 0}, {"key": "35851522", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9985237231702904, "res": {"Yes": 0.9985237231702904, "No": 0.0014762945693285816}, "ground_truth": 1}, {"key": "35851522", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997827784660145, "res": {"Yes": 0.9997827784660145, "No": 0.0002170922889794184}, "ground_truth": 0}, {"key": "35851522", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9983312134752799, "res": {"Yes": 0.9983312134752799, "No": 0.0016687457210229596}, "ground_truth": 0}, {"key": "22412782", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9954640182807695, "res": {"Yes": 0.9954640182807695, "No": 0.00453598508818164}, "ground_truth": 0}, {"key": "22412782", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9992523580236433, "res": {"Yes": 0.9992523580236433, "No": 0.0007475880253253343}, "ground_truth": 0}, {"key": "22412782", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998788220126454, "res": {"Yes": 0.9998788220126454, "No": 0.00012108715286827787}, "ground_truth": 1}, {"key": "22412782", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999359152835132, "res": {"Yes": 0.9999359152835132, "No": 6.399321473440616e-05}, "ground_truth": 0}, {"key": "22412782", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.999909692497968, "res": {"Yes": 0.999909692497968, "No": 9.025144046929884e-05}, "ground_truth": 0}, {"key": "38579227", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.7866621509223592, "res": {"Yes": 0.7866621509223592, "No": 0.21333738484839787}, "ground_truth": 0}, {"key": "38579227", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9959618307221397, "res": {"Yes": 0.9959618307221397, "No": 0.0040381161308813435}, "ground_truth": 0}, {"key": "38579227", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9844252762368794, "res": {"Yes": 0.9844252762368794, "No": 0.015574670036352021}, "ground_truth": 1}, {"key": "38579227", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9932285552562431, "res": {"Yes": 0.9932285552562431, "No": 0.006771362207260388}, "ground_truth": 0}, {"key": "38579227", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.6947709889643198, "res": {"Yes": 0.6947709889643198, "No": 0.30522865339028704}, "ground_truth": 0}, {"key": "37206995", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.5216414523486858, "res": {"Yes": 0.5216414523486858, "No": 0.478358369695227}, "ground_truth": 0}, {"key": "37206995", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9661403861994394, "res": {"Yes": 0.9661403861994394, "No": 0.03385949895281141}, "ground_truth": 0}, {"key": "37206995", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.517404092251893, "res": {"Yes": 0.517404092251893, "No": 0.48259571732583856}, "ground_truth": 1}, {"key": "37206995", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9383382061827271, "res": {"Yes": 0.9383382061827271, "No": 0.061661760820419374}, "ground_truth": 0}, {"key": "37206995", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.3671417006990834, "res": {"No": 0.6328581839069036, "Yes": 0.3671417006990834}, "ground_truth": 0}, {"key": "38700847", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9895034285635916, "res": {"Yes": 0.9895034285635916, "No": 0.010496498013221585}, "ground_truth": 0}, {"key": "38700847", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.957497232759423, "res": {"Yes": 0.957497232759423, "No": 0.042502278190758504}, "ground_truth": 0}, {"key": "38700847", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9788880983269233, "res": {"Yes": 0.9788880983269233, "No": 0.021111862345587647}, "ground_truth": 1}, {"key": "38700847", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9964190698776576, "res": {"Yes": 0.9964190698776576, "No": 0.003580949465162255}, "ground_truth": 0}, {"key": "38700847", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9910803958091963, "res": {"Yes": 0.9910803958091963, "No": 0.008919494539242519}, "ground_truth": 0}, {"key": "20246590", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.8886217642845499, "res": {"Yes": 0.8886217642845499, "No": 0.11137776137589737}, "ground_truth": 0}, {"key": "20246590", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9965567346811386, "res": {"Yes": 0.9965567346811386, "No": 0.003443241344254827}, "ground_truth": 0}, {"key": "20246590", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996390970411464, "res": {"Yes": 0.9996390970411464, "No": 0.00036085894863019163}, "ground_truth": 1}, {"key": "20246590", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9984700010436315, "res": {"Yes": 0.9984700010436315, "No": 0.0015299289233250196}, "ground_truth": 0}, {"key": "20246590", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9908958946278406, "res": {"Yes": 0.9908958946278406, "No": 0.009104070115619665}, "ground_truth": 0}, {"key": "39141360", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.961447826534685, "res": {"Yes": 0.961447826534685, "No": 0.03855206552473524}, "ground_truth": 0}, {"key": "39141360", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9949881763096782, "res": {"Yes": 0.9949881763096782, "No": 0.005011820783849184}, "ground_truth": 0}, {"key": "39141360", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9985895715903987, "res": {"Yes": 0.9985895715903987, "No": 0.0014104281051524402}, "ground_truth": 1}, {"key": "39141360", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9922912192376145, "res": {"Yes": 0.9922912192376145, "No": 0.007708736368310064}, "ground_truth": 0}, {"key": "39141360", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9992946080059903, "res": {"Yes": 0.9992946080059903, "No": 0.0007053552368508487}, "ground_truth": 0}, {"key": "37906226", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.8972395955084294, "res": {"Yes": 0.8972395955084294, "No": 0.10276033980121091}, "ground_truth": 0}, {"key": "37906226", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9936894278283671, "res": {"Yes": 0.9936894278283671, "No": 0.0063105843483827065}, "ground_truth": 0}, {"key": "37906226", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9832721024009073, "res": {"Yes": 0.9832721024009073, "No": 0.01672791664217159}, "ground_truth": 1}, {"key": "37906226", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9975289453942829, "res": {"Yes": 0.9975289453942829, "No": 0.002471032992952126}, "ground_truth": 0}, {"key": "37906226", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9874465601253114, "res": {"Yes": 0.9874465601253114, "No": 0.012553296231022798}, "ground_truth": 0}, {"key": "16201033", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.07952584625761719, "res": {"No": 0.9204740176976376, "Yes": 0.07952584625761719}, "ground_truth": 0}, {"key": "16201033", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996548227073943, "res": {"Yes": 0.9996548227073943, "No": 0.0003451047720275886}, "ground_truth": 0}, {"key": "16201033", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998221005559831, "res": {"Yes": 0.9998221005559831, "No": 0.00017786966410649822}, "ground_truth": 1}, {"key": "16201033", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998507010861922, "res": {"Yes": 0.9998507010861922, "No": 0.000149200428233651}, "ground_truth": 0}, {"key": "16201033", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9988948088740633, "res": {"Yes": 0.9988948088740633, "No": 0.0011051385138144826}, "ground_truth": 0}, {"key": "36469022", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.013819619411321517, "res": {"No": 0.9861803089306448, "Yes": 0.013819619411321517}, "ground_truth": 0}, {"key": "36469022", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9961589824518624, "res": {"Yes": 0.9961589824518624, "No": 0.0038410435173106745}, "ground_truth": 0}, {"key": "36469022", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.996476466664121, "res": {"Yes": 0.996476466664121, "No": 0.0035235111525899723}, "ground_truth": 1}, {"key": "36469022", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9729690316504725, "res": {"Yes": 0.9729690316504725, "No": 0.027030833042129467}, "ground_truth": 0}, {"key": "36469022", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.7418361813811333, "res": {"Yes": 0.7418361813811333, "No": 0.2581637255953957}, "ground_truth": 0}, {"key": "31295270", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.02350096017866784, "res": {"No": 0.9764989802077989, "Yes": 0.02350096017866784}, "ground_truth": 0}, {"key": "31295270", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9682833418631829, "res": {"Yes": 0.9682833418631829, "No": 0.03171655483098204}, "ground_truth": 0}, {"key": "31295270", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9804080013987982, "res": {"Yes": 0.9804080013987982, "No": 0.01959203838727545}, "ground_truth": 1}, {"key": "31295270", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9960196576459336, "res": {"Yes": 0.9960196576459336, "No": 0.0039802788473173095}, "ground_truth": 0}, {"key": "31295270", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9795791091792408, "res": {"Yes": 0.9795791091792408, "No": 0.02042091210510551}, "ground_truth": 0}, {"key": "35360689", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9802260800448054, "res": {"Yes": 0.9802260800448054, "No": 0.019773915674099096}, "ground_truth": 0}, {"key": "35360689", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996226640291312, "res": {"Yes": 0.9996226640291312, "No": 0.00037725153175743984}, "ground_truth": 0}, {"key": "35360689", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999850343529647, "res": {"Yes": 0.999850343529647, "No": 0.00014961997179962743}, "ground_truth": 1}, {"key": "35360689", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9984486104508496, "res": {"Yes": 0.9984486104508496, "No": 0.0015513294593022578}, "ground_truth": 0}, {"key": "35360689", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9197774227525264, "res": {"Yes": 0.9197774227525264, "No": 0.08022222362013698}, "ground_truth": 0}, {"key": "29202793", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9199747269710671, "res": {"Yes": 0.9199747269710671, "No": 0.08002515950008392}, "ground_truth": 0}, {"key": "29202793", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997726484082909, "res": {"Yes": 0.9997726484082909, "No": 0.000227286550399073}, "ground_truth": 0}, {"key": "29202793", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999422326855956, "res": {"Yes": 0.9999422326855956, "No": 5.768519518243215e-05}, "ground_truth": 1}, {"key": "29202793", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999751565364996, "res": {"Yes": 0.999751565364996, "No": 0.00024831883810584485}, "ground_truth": 0}, {"key": "29202793", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9992787725096675, "res": {"Yes": 0.9992787725096675, "No": 0.0007211264894038978}, "ground_truth": 0}, {"key": "35999008", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.8410989149932611, "res": {"Yes": 0.8410989149932611, "No": 0.15890104960779783}, "ground_truth": 0}, {"key": "35999008", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994674819081155, "res": {"Yes": 0.9994674819081155, "No": 0.0005324266587038367}, "ground_truth": 0}, {"key": "35999008", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996313626234608, "res": {"Yes": 0.9996313626234608, "No": 0.00036857752077722417}, "ground_truth": 1}, {"key": "35999008", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9983878888494447, "res": {"Yes": 0.9983878888494447, "No": 0.0016121122553634936}, "ground_truth": 0}, {"key": "35999008", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.994715155686711, "res": {"Yes": 0.994715155686711, "No": 0.005284881932621598}, "ground_truth": 0}, {"key": "31797119", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.02445264228461182, "res": {"No": 0.9755472035569881, "Yes": 0.02445264228461182}, "ground_truth": 0}, {"key": "31797119", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.98858359244999, "res": {"Yes": 0.98858359244999, "No": 0.011416339046622195}, "ground_truth": 0}, {"key": "31797119", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997519228962147, "res": {"Yes": 0.9997519228962147, "No": 0.000248015081784795}, "ground_truth": 1}, {"key": "31797119", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9950238174244822, "res": {"Yes": 0.9950238174244822, "No": 0.004976145916100584}, "ground_truth": 0}, {"key": "31797119", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9983479640162353, "res": {"Yes": 0.9983479640162353, "No": 0.0016520532016719405}, "ground_truth": 0}, {"key": "26711893", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.3944732848502567, "res": {"No": 0.605526500886248, "Yes": 0.3944732848502567}, "ground_truth": 0}, {"key": "26711893", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.872813157090715, "res": {"Yes": 0.872813157090715, "No": 0.12718645749224308}, "ground_truth": 0}, {"key": "26711893", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.38231162143899106, "res": {"No": 0.6176881873912821, "Yes": 0.38231162143899106}, "ground_truth": 1}, {"key": "26711893", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9932102243991849, "res": {"Yes": 0.9932102243991849, "No": 0.006789808778716961}, "ground_truth": 0}, {"key": "26711893", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9399340121712774, "res": {"Yes": 0.9399340121712774, "No": 0.06006573699058684}, "ground_truth": 0}, {"key": "35348288", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9655107722532433, "res": {"Yes": 0.9655107722532433, "No": 0.03448915679993885}, "ground_truth": 0}, {"key": "35348288", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9993428231953304, "res": {"Yes": 0.9993428231953304, "No": 0.0006571790828406025}, "ground_truth": 0}, {"key": "35348288", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.99864425245719, "res": {"Yes": 0.99864425245719, "No": 0.001355730304136657}, "ground_truth": 1}, {"key": "35348288", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9980338077933475, "res": {"Yes": 0.9980338077933475, "No": 0.0019662030142487473}, "ground_truth": 0}, {"key": "35348288", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9927536721600296, "res": {"Yes": 0.9927536721600296, "No": 0.007246224531724747}, "ground_truth": 0}, {"key": "38124131", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9671650718290891, "res": {"Yes": 0.9671650718290891, "No": 0.03283490016005446}, "ground_truth": 0}, {"key": "38124131", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9961007947197931, "res": {"Yes": 0.9961007947197931, "No": 0.003899176117837059}, "ground_truth": 0}, {"key": "38124131", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9985183742927366, "res": {"Yes": 0.9985183742927366, "No": 0.0014815788044892184}, "ground_truth": 1}, {"key": "38124131", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.996811101970616, "res": {"Yes": 0.996811101970616, "No": 0.00318890302702743}, "ground_truth": 0}, {"key": "38124131", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9948772505651191, "res": {"Yes": 0.9948772505651191, "No": 0.0051227467969878985}, "ground_truth": 0}, {"key": "20285901", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.7734121423275503, "res": {"Yes": 0.7734121423275503, "No": 0.2265876575221212}, "ground_truth": 0}, {"key": "20285901", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9865194110774248, "res": {"Yes": 0.9865194110774248, "No": 0.013480509196369726}, "ground_truth": 0}, {"key": "20285901", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9976513782380627, "res": {"Yes": 0.9976513782380627, "No": 0.0023485691702083435}, "ground_truth": 1}, {"key": "20285901", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.998309584165016, "res": {"Yes": 0.998309584165016, "No": 0.0016904363298965597}, "ground_truth": 0}, {"key": "20285901", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.8594467763878363, "res": {"Yes": 0.8594467763878363, "No": 0.14055314577324338}, "ground_truth": 0}, {"key": "35633632", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.993007051255441, "res": {"Yes": 0.993007051255441, "No": 0.006992962449511849}, "ground_truth": 0}, {"key": "35633632", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998558260940567, "res": {"Yes": 0.9998558260940567, "No": 0.00014414929855048887}, "ground_truth": 0}, {"key": "35633632", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999818067994983, "res": {"Yes": 0.9999818067994983, "No": 1.8167759333023384e-05}, "ground_truth": 1}, {"key": "35633632", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7460983283683057, "res": {"Yes": 0.7460983283683057, "No": 0.25390139398376166}, "ground_truth": 0}, {"key": "35633632", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997933816685082, "res": {"Yes": 0.9997933816685082, "No": 0.00020658553877032668}, "ground_truth": 0}, {"key": "10741274", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9974564702673941, "res": {"Yes": 0.9974564702673941, "No": 0.002543487775209028}, "ground_truth": 0}, {"key": "10741274", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9972652030890398, "res": {"Yes": 0.9972652030890398, "No": 0.002734775310130944}, "ground_truth": 0}, {"key": "10741274", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.986567914496596, "res": {"Yes": 0.986567914496596, "No": 0.013431943037787825}, "ground_truth": 1}, {"key": "10741274", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9991991519501124, "res": {"Yes": 0.9991991519501124, "No": 0.0008008101942194672}, "ground_truth": 0}, {"key": "10741274", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9941680373034977, "res": {"Yes": 0.9941680373034977, "No": 0.005831914178527815}, "ground_truth": 0}, {"key": "30605795", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.0862981688104729, "res": {"No": 0.9137014847432049, "Yes": 0.0862981688104729}, "ground_truth": 0}, {"key": "30605795", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9638297575683937, "res": {"Yes": 0.9638297575683937, "No": 0.03617014587455993}, "ground_truth": 0}, {"key": "30605795", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9519096444377682, "res": {"Yes": 0.9519096444377682, "No": 0.048090111746505314}, "ground_truth": 1}, {"key": "30605795", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9975233713178627, "res": {"Yes": 0.9975233713178627, "No": 0.0024766164129364876}, "ground_truth": 0}, {"key": "30605795", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.8952349689714284, "res": {"Yes": 0.8952349689714284, "No": 0.10476457385257848}, "ground_truth": 0}, {"key": "30539722", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9059816472887635, "res": {"Yes": 0.9059816472887635, "No": 0.09401828446901742}, "ground_truth": 0}, {"key": "30539722", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994685542173628, "res": {"Yes": 0.9994685542173628, "No": 0.000531424723768059}, "ground_truth": 0}, {"key": "30539722", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998022009648091, "res": {"Yes": 0.9998022009648091, "No": 0.00019767841783173678}, "ground_truth": 1}, {"key": "30539722", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998963429787603, "res": {"Yes": 0.9998963429787603, "No": 0.0001035763019460164}, "ground_truth": 0}, {"key": "30539722", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9994789157815629, "res": {"Yes": 0.9994789157815629, "No": 0.0005210601874221274}, "ground_truth": 0}, {"key": "18639299", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.26539077939027794, "res": {"No": 0.73460903974686, "Yes": 0.26539077939027794}, "ground_truth": 0}, {"key": "18639299", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999655955278475, "res": {"Yes": 0.9999655955278475, "No": 3.428769549761505e-05}, "ground_truth": 0}, {"key": "18639299", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9988092914566442, "res": {"Yes": 0.9988092914566442, "No": 0.0011906236898955591}, "ground_truth": 1}, {"key": "18639299", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996061047471125, "res": {"Yes": 0.9996061047471125, "No": 0.0003938728958807974}, "ground_truth": 0}, {"key": "18639299", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9977228045525107, "res": {"Yes": 0.9977228045525107, "No": 0.0022772213167124382}, "ground_truth": 0}, {"key": "39773552", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9940525793441981, "res": {"Yes": 0.9940525793441981, "No": 0.0059473799556056485}, "ground_truth": 0}, {"key": "39773552", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9948606130735119, "res": {"Yes": 0.9948606130735119, "No": 0.00513931745082986}, "ground_truth": 0}, {"key": "39773552", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9952895643641864, "res": {"Yes": 0.9952895643641864, "No": 0.004710367728546721}, "ground_truth": 1}, {"key": "39773552", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9981341518629178, "res": {"Yes": 0.9981341518629178, "No": 0.001865810007128362}, "ground_truth": 0}, {"key": "39773552", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9975078311220632, "res": {"Yes": 0.9975078311220632, "No": 0.0024921003749393106}, "ground_truth": 0}, {"key": "34086410", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.19586369408947968, "res": {"No": 0.8041362331023972, "Yes": 0.19586369408947968}, "ground_truth": 0}, {"key": "34086410", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9993180601267597, "res": {"Yes": 0.9993180601267597, "No": 0.0006819010490614523}, "ground_truth": 0}, {"key": "34086410", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9977111749637348, "res": {"Yes": 0.9977111749637348, "No": 0.0022888119389346405}, "ground_truth": 1}, {"key": "34086410", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9991498752609651, "res": {"Yes": 0.9991498752609651, "No": 0.0008500582342026709}, "ground_truth": 0}, {"key": "34086410", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9987652981723129, "res": {"Yes": 0.9987652981723129, "No": 0.0012346546297949497}, "ground_truth": 0}, {"key": "35454652", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.8908830527981513, "res": {"Yes": 0.8908830527981513, "No": 0.10911657203671302}, "ground_truth": 0}, {"key": "35454652", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9918049153006813, "res": {"Yes": 0.9918049153006813, "No": 0.008195069385607824}, "ground_truth": 0}, {"key": "35454652", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9993087722780275, "res": {"Yes": 0.9993087722780275, "No": 0.0006911551014342353}, "ground_truth": 1}, {"key": "35454652", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9991934347487611, "res": {"Yes": 0.9991934347487611, "No": 0.0008065384390278042}, "ground_truth": 0}, {"key": "35454652", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998777566423283, "res": {"Yes": 0.9998777566423283, "No": 0.00012221044991869864}, "ground_truth": 0}, {"key": "36158310", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.0019214092073490253, "res": {"No": 0.9980785689166922, "Yes": 0.0019214092073490253}, "ground_truth": 0}, {"key": "36158310", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.999812927220295, "res": {"Yes": 0.999812927220295, "No": 0.000187010274269749}, "ground_truth": 0}, {"key": "36158310", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9960543107686476, "res": {"Yes": 0.9960543107686476, "No": 0.003945632531583419}, "ground_truth": 1}, {"key": "36158310", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994440225527711, "res": {"Yes": 0.9994440225527711, "No": 0.000555957503048988}, "ground_truth": 0}, {"key": "36158310", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9973991928744391, "res": {"Yes": 0.9973991928744391, "No": 0.0026007770843860384}, "ground_truth": 0}, {"key": "35688387", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.20970978086136607, "res": {"No": 0.7902900097000302, "Yes": 0.20970978086136607}, "ground_truth": 0}, {"key": "35688387", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9786625415129966, "res": {"Yes": 0.9786625415129966, "No": 0.021337482735015904}, "ground_truth": 0}, {"key": "35688387", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9990081736851878, "res": {"Yes": 0.9990081736851878, "No": 0.000991810993082108}, "ground_truth": 1}, {"key": "35688387", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9949611468280237, "res": {"Yes": 0.9949611468280237, "No": 0.005038846964284358}, "ground_truth": 0}, {"key": "35688387", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9980812907804689, "res": {"Yes": 0.9980812907804689, "No": 0.0019187111392374783}, "ground_truth": 0}, {"key": "34209292", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.05803495950095976, "res": {"No": 0.9419649595759704, "Yes": 0.05803495950095976}, "ground_truth": 0}, {"key": "34209292", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998456989556309, "res": {"Yes": 0.9998456989556309, "No": 0.00015424628187613875}, "ground_truth": 0}, {"key": "34209292", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999547484278832, "res": {"Yes": 0.9999547484278832, "No": 4.5159901491823295e-05}, "ground_truth": 1}, {"key": "34209292", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9992120046315452, "res": {"Yes": 0.9992120046315452, "No": 0.0007878937596302953}, "ground_truth": 0}, {"key": "34209292", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998945551097033, "res": {"Yes": 0.9998945551097033, "No": 0.00010541364194649308}, "ground_truth": 0}, {"key": "25037859", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.47056260887490337, "res": {"No": 0.5294372124045282, "Yes": 0.47056260887490337}, "ground_truth": 0}, {"key": "25037859", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996289794451553, "res": {"Yes": 0.9996289794451553, "No": 0.00037090113995130356}, "ground_truth": 0}, {"key": "25037859", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998891915297121, "res": {"Yes": 0.9998891915297121, "No": 0.00011067240794948012}, "ground_truth": 1}, {"key": "25037859", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999098116872104, "res": {"Yes": 0.9999098116872104, "No": 9.008841328591356e-05}, "ground_truth": 0}, {"key": "25037859", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996067005625089, "res": {"Yes": 0.9996067005625089, "No": 0.0003932065152565823}, "ground_truth": 0}, {"key": "36412121", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.0224484210301517, "res": {"No": 0.9775511354598223, "Yes": 0.0224484210301517}, "ground_truth": 0}, {"key": "36412121", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9770180648377575, "res": {"Yes": 0.9770180648377575, "No": 0.02298103427405828}, "ground_truth": 0}, {"key": "36412121", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9986696948414435, "res": {"Yes": 0.9986696948414435, "No": 0.0013300084279021746}, "ground_truth": 1}, {"key": "36412121", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9885338550949312, "res": {"Yes": 0.9885338550949312, "No": 0.011465520094833927}, "ground_truth": 0}, {"key": "36412121", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9731704025892935, "res": {"Yes": 0.9731704025892935, "No": 0.026829096234453696}, "ground_truth": 0}, {"key": "34909172", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.7999360436080166, "res": {"Yes": 0.7999360436080166, "No": 0.20006395270951355}, "ground_truth": 0}, {"key": "34909172", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9821585307973371, "res": {"Yes": 0.9821585307973371, "No": 0.017841519173331367}, "ground_truth": 0}, {"key": "34909172", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.978986795779189, "res": {"Yes": 0.978986795779189, "No": 0.021013230832657753}, "ground_truth": 1}, {"key": "34909172", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.998277519874592, "res": {"Yes": 0.998277519874592, "No": 0.001722479769488006}, "ground_truth": 0}, {"key": "34909172", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9922833484148793, "res": {"Yes": 0.9922833484148793, "No": 0.007716618724076196}, "ground_truth": 0}, {"key": "39011806", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.965186488482985, "res": {"Yes": 0.965186488482985, "No": 0.03481339682662111}, "ground_truth": 0}, {"key": "39011806", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9760189429472147, "res": {"Yes": 0.9760189429472147, "No": 0.023980756086518205}, "ground_truth": 0}, {"key": "39011806", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.940949723145522, "res": {"Yes": 0.940949723145522, "No": 0.05904977185124487}, "ground_truth": 1}, {"key": "39011806", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9855729428945585, "res": {"Yes": 0.9855729428945585, "No": 0.014426789771554652}, "ground_truth": 0}, {"key": "39011806", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.8771269607359108, "res": {"Yes": 0.8771269607359108, "No": 0.1228726040241171}, "ground_truth": 0}, {"key": "33096163", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9087735097887426, "res": {"Yes": 0.9087735097887426, "No": 0.09122636808053267}, "ground_truth": 0}, {"key": "33096163", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.996362961405717, "res": {"Yes": 0.996362961405717, "No": 0.0036369702239202758}, "ground_truth": 0}, {"key": "33096163", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998611895050684, "res": {"Yes": 0.9998611895050684, "No": 0.00013879333913277897}, "ground_truth": 1}, {"key": "33096163", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999800413250086, "res": {"Yes": 0.999800413250086, "No": 0.00019950305668947063}, "ground_truth": 0}, {"key": "33096163", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9958667733910492, "res": {"Yes": 0.9958667733910492, "No": 0.004133230394110777}, "ground_truth": 0}, {"key": "38762205", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.7705344838996696, "res": {"Yes": 0.7705344838996696, "No": 0.2294649234868127}, "ground_truth": 0}, {"key": "38762205", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9944362676083535, "res": {"Yes": 0.9944362676083535, "No": 0.0055634419861636}, "ground_truth": 0}, {"key": "38762205", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999170824878469, "res": {"Yes": 0.9999170824878469, "No": 8.286282734039839e-05}, "ground_truth": 1}, {"key": "38762205", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994499795167534, "res": {"Yes": 0.9994499795167534, "No": 0.0005499490062198817}, "ground_truth": 0}, {"key": "38762205", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997895679539296, "res": {"Yes": 0.9997895679539296, "No": 0.00021028958520415416}, "ground_truth": 0}, {"key": "35519177", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.09393934227042867, "res": {"No": 0.906060126817951, "Yes": 0.09393934227042867}, "ground_truth": 0}, {"key": "35519177", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995241612771715, "res": {"Yes": 0.9995241612771715, "No": 0.00047578970384480347}, "ground_truth": 0}, {"key": "35519177", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9306431850963058, "res": {"Yes": 0.9306431850963058, "No": 0.06935632030753436}, "ground_truth": 1}, {"key": "35519177", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9524731989142367, "res": {"Yes": 0.9524731989142367, "No": 0.04752587402166516}, "ground_truth": 0}, {"key": "35519177", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.8843914478923287, "res": {"Yes": 0.8843914478923287, "No": 0.11560801398696277}, "ground_truth": 0}, {"key": "36192531", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.17517892479176317, "res": {"No": 0.8248209058761803, "Yes": 0.17517892479176317}, "ground_truth": 0}, {"key": "36192531", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9950021022616474, "res": {"Yes": 0.9950021022616474, "No": 0.004997857291991152}, "ground_truth": 0}, {"key": "36192531", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9976179154769775, "res": {"Yes": 0.9976179154769775, "No": 0.002382035987644638}, "ground_truth": 1}, {"key": "36192531", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994194952581134, "res": {"Yes": 0.9994194952581134, "No": 0.0005804389095524161}, "ground_truth": 0}, {"key": "36192531", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996512478281817, "res": {"Yes": 0.9996512478281817, "No": 0.0003486669463000658}, "ground_truth": 0}, {"key": "33160852", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9968362198335396, "res": {"Yes": 0.9968362198335396, "No": 0.0031637444511193706}, "ground_truth": 0}, {"key": "33160852", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.999653273603495, "res": {"Yes": 0.999653273603495, "No": 0.0003466532698178136}, "ground_truth": 0}, {"key": "33160852", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9992937741549456, "res": {"Yes": 0.9992937741549456, "No": 0.0007061297185681622}, "ground_truth": 1}, {"key": "33160852", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999247109143655, "res": {"Yes": 0.9999247109143655, "No": 7.517814456758358e-05}, "ground_truth": 0}, {"key": "33160852", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997956460731088, "res": {"Yes": 0.9997956460731088, "No": 0.0002042642620956992}, "ground_truth": 0}, {"key": "36312304", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.8906018667837494, "res": {"Yes": 0.8906018667837494, "No": 0.10939800773411226}, "ground_truth": 0}, {"key": "36312304", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.996958706630898, "res": {"Yes": 0.996958706630898, "No": 0.003041302808317598}, "ground_truth": 0}, {"key": "36312304", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9938759143261521, "res": {"Yes": 0.9938759143261521, "No": 0.006124041650608988}, "ground_truth": 1}, {"key": "36312304", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999837713579866, "res": {"Yes": 0.999837713579866, "No": 0.00016224354296465532}, "ground_truth": 0}, {"key": "36312304", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.96638190510406, "res": {"Yes": 0.96638190510406, "No": 0.0336179946604849}, "ground_truth": 0}, {"key": "33773343", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9966770378999237, "res": {"Yes": 0.9966770378999237, "No": 0.0033229461333095667}, "ground_truth": 0}, {"key": "33773343", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9972672203591328, "res": {"Yes": 0.9972672203591328, "No": 0.002732724997611649}, "ground_truth": 0}, {"key": "33773343", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999576091905158, "res": {"Yes": 0.9999576091905158, "No": 4.2362924518057236e-05}, "ground_truth": 1}, {"key": "33773343", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9981054063127179, "res": {"Yes": 0.9981054063127179, "No": 0.001894610504682095}, "ground_truth": 0}, {"key": "33773343", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9987446044690997, "res": {"Yes": 0.9987446044690997, "No": 0.0012553405357226509}, "ground_truth": 0}, {"key": "34913320", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.03515674455148661, "res": {"No": 0.9648431207788831, "Yes": 0.03515674455148661}, "ground_truth": 0}, {"key": "34913320", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9991052569215153, "res": {"Yes": 0.9991052569215153, "No": 0.0008947201232914745}, "ground_truth": 0}, {"key": "34913320", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9981408038503252, "res": {"Yes": 0.9981408038503252, "No": 0.0018591652171227556}, "ground_truth": 1}, {"key": "34913320", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997695498177235, "res": {"Yes": 0.9997695498177235, "No": 0.00023040555942151233}, "ground_truth": 0}, {"key": "34913320", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.974901518258686, "res": {"Yes": 0.974901518258686, "No": 0.025098349298675975}, "ground_truth": 0}, {"key": "33784155", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9754720961425543, "res": {"Yes": 0.9754720961425543, "No": 0.024527738946271718}, "ground_truth": 0}, {"key": "33784155", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995264250920127, "res": {"Yes": 0.9995264250920127, "No": 0.00047350930215898175}, "ground_truth": 0}, {"key": "33784155", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9972425602402069, "res": {"Yes": 0.9972425602402069, "No": 0.0027574222584478654}, "ground_truth": 1}, {"key": "33784155", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997129610535632, "res": {"Yes": 0.9997129610535632, "No": 0.0002870041162422531}, "ground_truth": 0}, {"key": "33784155", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.998925135980606, "res": {"Yes": 0.998925135980606, "No": 0.0010748043865073651}, "ground_truth": 0}, {"key": "24085062", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9833472538096393, "res": {"Yes": 0.9833472538096393, "No": 0.016652731865644442}, "ground_truth": 0}, {"key": "24085062", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9671999706759996, "res": {"Yes": 0.9671999706759996, "No": 0.032799900405970084}, "ground_truth": 0}, {"key": "24085062", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9963872293168133, "res": {"Yes": 0.9963872293168133, "No": 0.003612763091855994}, "ground_truth": 1}, {"key": "24085062", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9981333190201289, "res": {"Yes": 0.9981333190201289, "No": 0.0018666135979181128}, "ground_truth": 0}, {"key": "24085062", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9799020316683291, "res": {"Yes": 0.9799020316683291, "No": 0.020097979326310862}, "ground_truth": 0}, {"key": "33893487", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.0656027082228068, "res": {"No": 0.93439702946132, "Yes": 0.0656027082228068}, "ground_truth": 0}, {"key": "33893487", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9731404897371917, "res": {"Yes": 0.9731404897371917, "No": 0.026859403389272063}, "ground_truth": 0}, {"key": "33893487", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9812221131589409, "res": {"Yes": 0.9812221131589409, "No": 0.018777874322004055}, "ground_truth": 1}, {"key": "33893487", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9982285743297988, "res": {"Yes": 0.9982285743297988, "No": 0.0017714586070100746}, "ground_truth": 0}, {"key": "33893487", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.45914142541828024, "res": {"No": 0.5408584172631211, "Yes": 0.45914142541828024}, "ground_truth": 0}, {"key": "40913011", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.0041938529882077575, "res": {"No": 0.995806006060396, "Yes": 0.0041938529882077575}, "ground_truth": 0}, {"key": "40913011", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.0693831663978768, "res": {"No": 0.9306163410807914, "Yes": 0.0693831663978768}, "ground_truth": 0}, {"key": "40913011", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8714992278705685, "res": {"Yes": 0.8714992278705685, "No": 0.12850030514248417}, "ground_truth": 1}, {"key": "40913011", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9854922524131707, "res": {"Yes": 0.9854922524131707, "No": 0.014507279805378222}, "ground_truth": 0}, {"key": "40913011", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9454856974196459, "res": {"Yes": 0.9454856974196459, "No": 0.05451380536177382}, "ground_truth": 0}, {"key": "29642545", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9927145531745049, "res": {"Yes": 0.9927145531745049, "No": 0.007285461441668795}, "ground_truth": 0}, {"key": "29642545", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9618872682061496, "res": {"Yes": 0.9618872682061496, "No": 0.03811263212360417}, "ground_truth": 0}, {"key": "29642545", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9533347174183237, "res": {"Yes": 0.9533347174183237, "No": 0.04666518479210588}, "ground_truth": 1}, {"key": "29642545", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.986468257426155, "res": {"Yes": 0.986468257426155, "No": 0.013531635020679776}, "ground_truth": 0}, {"key": "29642545", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9993077002501308, "res": {"Yes": 0.9993077002501308, "No": 0.0006922230691697777}, "ground_truth": 0}, {"key": "35969159", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9324296664754108, "res": {"Yes": 0.9324296664754108, "No": 0.06757023523982737}, "ground_truth": 0}, {"key": "35969159", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997290453948312, "res": {"Yes": 0.9997290453948312, "No": 0.000270877464461148}, "ground_truth": 0}, {"key": "35969159", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996693569744574, "res": {"Yes": 0.9996693569744574, "No": 0.0003305571479515325}, "ground_truth": 1}, {"key": "35969159", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994862988896195, "res": {"Yes": 0.9994862988896195, "No": 0.0005136944112266549}, "ground_truth": 0}, {"key": "35969159", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.7913906061621379, "res": {"Yes": 0.7913906061621379, "No": 0.20860935308594383}, "ground_truth": 0}, {"key": "37081669", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.19301719515085683, "res": {"No": 0.8069826179819347, "Yes": 0.19301719515085683}, "ground_truth": 0}, {"key": "37081669", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.998675636144865, "res": {"Yes": 0.998675636144865, "No": 0.001324366562356557}, "ground_truth": 0}, {"key": "37081669", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9993985351025728, "res": {"Yes": 0.9993985351025728, "No": 0.0006014224250663648}, "ground_truth": 1}, {"key": "37081669", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9985247944869677, "res": {"Yes": 0.9985247944869677, "No": 0.0014751194638933493}, "ground_truth": 0}, {"key": "37081669", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9931865832975793, "res": {"Yes": 0.9931865832975793, "No": 0.006813335393104758}, "ground_truth": 0}, {"key": "40048022", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.012128871429101719, "res": {"No": 0.9878709975900646, "Yes": 0.012128871429101719}, "ground_truth": 0}, {"key": "40048022", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.999643744344122, "res": {"Yes": 0.999643744344122, "No": 0.00035619971044824106}, "ground_truth": 0}, {"key": "40048022", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997622800898566, "res": {"Yes": 0.9997622800898566, "No": 0.00023763254641394438}, "ground_truth": 1}, {"key": "40048022", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9991646331133563, "res": {"Yes": 0.9991646331133563, "No": 0.0008352938323366463}, "ground_truth": 0}, {"key": "40048022", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.943085992647423, "res": {"Yes": 0.943085992647423, "No": 0.056913938670640894}, "ground_truth": 0}, {"key": "32884004", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.1004079692374025, "res": {"No": 0.8995916993364156, "Yes": 0.1004079692374025}, "ground_truth": 0}, {"key": "32884004", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9955768440954368, "res": {"Yes": 0.9955768440954368, "No": 0.004423033969194817}, "ground_truth": 0}, {"key": "32884004", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9945837699527346, "res": {"Yes": 0.9945837699527346, "No": 0.005415915204367443}, "ground_truth": 1}, {"key": "32884004", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9978421941175479, "res": {"Yes": 0.9978421941175479, "No": 0.002157637296895334}, "ground_truth": 0}, {"key": "32884004", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9987505462181749, "res": {"Yes": 0.9987505462181749, "No": 0.0012493285899660774}, "ground_truth": 0}, {"key": "39022490", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9706541114984145, "res": {"Yes": 0.9706541114984145, "No": 0.029345783667862484}, "ground_truth": 0}, {"key": "39022490", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9981400898804635, "res": {"Yes": 0.9981400898804635, "No": 0.0018598880151093399}, "ground_truth": 0}, {"key": "39022490", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9960435499540286, "res": {"Yes": 0.9960435499540286, "No": 0.0039564544329157006}, "ground_truth": 1}, {"key": "39022490", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9156762093868951, "res": {"Yes": 0.9156762093868951, "No": 0.08432349886261438}, "ground_truth": 0}, {"key": "39022490", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.8805135255047767, "res": {"Yes": 0.8805135255047767, "No": 0.11948636673720385}, "ground_truth": 0}, {"key": "35159385", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9329238872245154, "res": {"Yes": 0.9329238872245154, "No": 0.06707557568509083}, "ground_truth": 0}, {"key": "35159385", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9176914279485697, "res": {"Yes": 0.9176914279485697, "No": 0.08230805520209712}, "ground_truth": 0}, {"key": "35159385", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9940365597195338, "res": {"Yes": 0.9940365597195338, "No": 0.005963367146302158}, "ground_truth": 1}, {"key": "35159385", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9981200113965526, "res": {"Yes": 0.9981200113965526, "No": 0.0018799155939185652}, "ground_truth": 0}, {"key": "35159385", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9610330109283423, "res": {"Yes": 0.9610330109283423, "No": 0.03896662766776384}, "ground_truth": 0}, {"key": "34363669", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.011029552695325604, "res": {"No": 0.9889703031093373, "Yes": 0.011029552695325604}, "ground_truth": 0}, {"key": "34363669", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9907166022928334, "res": {"Yes": 0.9907166022928334, "No": 0.00928336676524792}, "ground_truth": 0}, {"key": "34363669", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9810904812303067, "res": {"Yes": 0.9810904812303067, "No": 0.01890947802145989}, "ground_truth": 1}, {"key": "34363669", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9925233269230099, "res": {"Yes": 0.9925233269230099, "No": 0.007476618959092094}, "ground_truth": 0}, {"key": "34363669", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9157767653123698, "res": {"Yes": 0.9157767653123698, "No": 0.0842228177323716}, "ground_truth": 0}, {"key": "36119687", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.009963283430652647, "res": {"No": 0.99003620085682, "Yes": 0.009963283430652647}, "ground_truth": 0}, {"key": "36119687", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9749814049620056, "res": {"Yes": 0.9749814049620056, "No": 0.025018383108310394}, "ground_truth": 0}, {"key": "36119687", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.987420523971393, "res": {"Yes": 0.987420523971393, "No": 0.012579383943674792}, "ground_truth": 1}, {"key": "36119687", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9946133682115497, "res": {"Yes": 0.9946133682115497, "No": 0.005386491681257562}, "ground_truth": 0}, {"key": "36119687", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9663771273237318, "res": {"Yes": 0.9663771273237318, "No": 0.03362244597764864}, "ground_truth": 0}, {"key": "35217446", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.8500398485071404, "res": {"Yes": 0.8500398485071404, "No": 0.14995969814798307}, "ground_truth": 0}, {"key": "35217446", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9990332643602803, "res": {"Yes": 0.9990332643602803, "No": 0.000966722571252375}, "ground_truth": 0}, {"key": "35217446", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995334511277878, "res": {"Yes": 0.9995334511277878, "No": 0.00046640089685447746}, "ground_truth": 1}, {"key": "35217446", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9963026170158576, "res": {"Yes": 0.9963026170158576, "No": 0.003697319431004257}, "ground_truth": 0}, {"key": "35217446", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9980785689166922, "res": {"Yes": 0.9980785689166922, "No": 0.0019213800021510338}, "ground_truth": 0}, {"key": "39049331", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9889545588274365, "res": {"Yes": 0.9889545588274365, "No": 0.01104533292462307}, "ground_truth": 0}, {"key": "39049331", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9824479350091152, "res": {"Yes": 0.9824479350091152, "No": 0.017552001324285506}, "ground_truth": 0}, {"key": "39049331", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9957211345423653, "res": {"Yes": 0.9957211345423653, "No": 0.004278847848086727}, "ground_truth": 1}, {"key": "39049331", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9951597981129306, "res": {"Yes": 0.9951597981129306, "No": 0.004840147904207246}, "ground_truth": 0}, {"key": "39049331", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.40208844585423376, "res": {"No": 0.597911374016851, "Yes": 0.40208844585423376}, "ground_truth": 0}, {"key": "36472242", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9872982666628225, "res": {"Yes": 0.9872982666628225, "No": 0.0127015912257501}, "ground_truth": 0}, {"key": "36472242", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9939957986944884, "res": {"Yes": 0.9939957986944884, "No": 0.006004140286365275}, "ground_truth": 0}, {"key": "36472242", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.933928589793597, "res": {"Yes": 0.933928589793597, "No": 0.06607124166123564}, "ground_truth": 1}, {"key": "36472242", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999086331589397, "res": {"Yes": 0.999086331589397, "No": 0.0009136067780806697}, "ground_truth": 0}, {"key": "36472242", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.8975234686930191, "res": {"Yes": 0.8975234686930191, "No": 0.10247650768392909}, "ground_truth": 0}, {"key": "31854721", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9666158654880811, "res": {"Yes": 0.9666158654880811, "No": 0.03338405794442158}, "ground_truth": 0}, {"key": "31854721", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997945734529232, "res": {"Yes": 0.9997945734529232, "No": 0.00020534932595240273}, "ground_truth": 0}, {"key": "31854721", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9990011549781176, "res": {"Yes": 0.9990011549781176, "No": 0.0009988242706940678}, "ground_truth": 1}, {"key": "31854721", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999661915245194, "res": {"Yes": 0.9999661915245194, "No": 3.379198208620585e-05}, "ground_truth": 0}, {"key": "31854721", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999889589149532, "res": {"Yes": 0.9999889589149532, "No": 1.0978464245685842e-05}, "ground_truth": 0}, {"key": "18725849", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.0421037192064408, "res": {"No": 0.9578960865969074, "Yes": 0.0421037192064408}, "ground_truth": 0}, {"key": "18725849", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9992598549425822, "res": {"Yes": 0.9992598549425822, "No": 0.0007401083230344072}, "ground_truth": 0}, {"key": "18725849", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.994957962957448, "res": {"Yes": 0.994957962957448, "No": 0.0050419841188161455}, "ground_truth": 1}, {"key": "18725849", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9973090717556337, "res": {"Yes": 0.9973090717556337, "No": 0.002690861167457165}, "ground_truth": 0}, {"key": "18725849", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9952221432445358, "res": {"Yes": 0.9952221432445358, "No": 0.0047778004274832584}, "ground_truth": 0}, {"key": "36883179", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9700460959654099, "res": {"Yes": 0.9700460959654099, "No": 0.029953784553991857}, "ground_truth": 0}, {"key": "36883179", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9990434987092683, "res": {"Yes": 0.9990434987092683, "No": 0.0009564031775296104}, "ground_truth": 0}, {"key": "36883179", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995857401493166, "res": {"Yes": 0.9995857401493166, "No": 0.0004142478880170454}, "ground_truth": 1}, {"key": "36883179", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996264770770631, "res": {"Yes": 0.9996264770770631, "No": 0.00037347563118668843}, "ground_truth": 0}, {"key": "36883179", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999654763299556, "res": {"Yes": 0.9999654763299556, "No": 3.438937182149353e-05}, "ground_truth": 0}, {"key": "34266359", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9978756717858158, "res": {"Yes": 0.9978756717858158, "No": 0.0021243466132582376}, "ground_truth": 0}, {"key": "34266359", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9934086419310376, "res": {"Yes": 0.9934086419310376, "No": 0.0065912939447985205}, "ground_truth": 0}, {"key": "34266359", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9897253713633699, "res": {"Yes": 0.9897253713633699, "No": 0.010274558375604228}, "ground_truth": 1}, {"key": "34266359", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996651862626335, "res": {"Yes": 0.9996651862626335, "No": 0.00033472542134567874}, "ground_truth": 0}, {"key": "34266359", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997286878917885, "res": {"Yes": 0.9997286878917885, "No": 0.00027125451719359287}, "ground_truth": 0}, {"key": "31920289", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9940596453932616, "res": {"Yes": 0.9940596453932616, "No": 0.005940370938490666}, "ground_truth": 0}, {"key": "31920289", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9946816534486896, "res": {"Yes": 0.9946816534486896, "No": 0.005318296147251414}, "ground_truth": 0}, {"key": "31920289", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9930481973550481, "res": {"Yes": 0.9930481973550481, "No": 0.006951759402527849}, "ground_truth": 1}, {"key": "31920289", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9975414053068975, "res": {"Yes": 0.9975414053068975, "No": 0.0024585407404170956}, "ground_truth": 0}, {"key": "31920289", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.980747063533441, "res": {"Yes": 0.980747063533441, "No": 0.019252978556693216}, "ground_truth": 0}, {"key": "36292997", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.23180028930430993, "res": {"No": 0.7681996643507029, "Yes": 0.23180028930430993}, "ground_truth": 0}, {"key": "36292997", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.6371117545140457, "res": {"Yes": 0.6371117545140457, "No": 0.3628879754217532}, "ground_truth": 0}, {"key": "36292997", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9732491041155847, "res": {"Yes": 0.9732491041155847, "No": 0.026750840114065342}, "ground_truth": 1}, {"key": "36292997", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9909924641199325, "res": {"Yes": 0.9909924641199325, "No": 0.009007430780709263}, "ground_truth": 0}, {"key": "36292997", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.2532025797950501, "res": {"No": 0.746797287115448, "Yes": 0.2532025797950501}, "ground_truth": 0}, {"key": "30412533", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9937111993053863, "res": {"Yes": 0.9937111993053863, "No": 0.006288734002438662}, "ground_truth": 0}, {"key": "30412533", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998277021549788, "res": {"Yes": 0.9998277021549788, "No": 0.00017223581546421462}, "ground_truth": 0}, {"key": "30412533", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999897415702129, "res": {"Yes": 0.999897415702129, "No": 0.00010250797064693792}, "ground_truth": 1}, {"key": "30412533", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999928763541437, "res": {"Yes": 0.999928763541437, "No": 7.112262912446823e-05}, "ground_truth": 0}, {"key": "30412533", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998405740096741, "res": {"Yes": 0.9998405740096741, "No": 0.00015936171538066807}, "ground_truth": 0}, {"key": "40433191", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.11618534050115518, "res": {"No": 0.8838143020196421, "Yes": 0.11618534050115518}, "ground_truth": 0}, {"key": "40433191", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9639357499714224, "res": {"Yes": 0.9639357499714224, "No": 0.036064041153547235}, "ground_truth": 0}, {"key": "40433191", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8581579976515694, "res": {"Yes": 0.8581579976515694, "No": 0.1418418463739609}, "ground_truth": 1}, {"key": "40433191", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9375915899151617, "res": {"Yes": 0.9375915899151617, "No": 0.062407975454696604}, "ground_truth": 0}, {"key": "40433191", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9921298451248058, "res": {"Yes": 0.9921298451248058, "No": 0.007870091422801742}, "ground_truth": 0}, {"key": "34565591", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9909580425945106, "res": {"Yes": 0.9909580425945106, "No": 0.009041922149707267}, "ground_truth": 0}, {"key": "34565591", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9777065491165536, "res": {"Yes": 0.9777065491165536, "No": 0.022293413564370965}, "ground_truth": 0}, {"key": "34565591", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.998393114325491, "res": {"Yes": 0.998393114325491, "No": 0.001606840420332288}, "ground_truth": 1}, {"key": "34565591", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9956673567974615, "res": {"Yes": 0.9956673567974615, "No": 0.004332686892373477}, "ground_truth": 0}, {"key": "34565591", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.746061576469857, "res": {"Yes": 0.746061576469857, "No": 0.2539382885366318}, "ground_truth": 0}, {"key": "36062480", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.00447040442078804, "res": {"No": 0.9955294607431564, "Yes": 0.00447040442078804}, "ground_truth": 0}, {"key": "36062480", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9974497112260153, "res": {"Yes": 0.9974497112260153, "No": 0.0025502624590259806}, "ground_truth": 0}, {"key": "36062480", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9915986928946658, "res": {"Yes": 0.9915986928946658, "No": 0.008401083444182195}, "ground_truth": 1}, {"key": "36062480", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9875136248357664, "res": {"Yes": 0.9875136248357664, "No": 0.012486184323970032}, "ground_truth": 0}, {"key": "36062480", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.7165593227186977, "res": {"Yes": 0.7165593227186977, "No": 0.2834402319744544}, "ground_truth": 0}, {"key": "37276883", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9668324465001875, "res": {"Yes": 0.9668324465001875, "No": 0.033167473405420346}, "ground_truth": 0}, {"key": "37276883", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9969564631772447, "res": {"Yes": 0.9969564631772447, "No": 0.003043571857975484}, "ground_truth": 0}, {"key": "37276883", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9990794351300606, "res": {"Yes": 0.9990794351300606, "No": 0.0009205624647558163}, "ground_truth": 1}, {"key": "37276883", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9963861602935286, "res": {"Yes": 0.9963861602935286, "No": 0.003613820702386731}, "ground_truth": 0}, {"key": "37276883", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.8686905639853654, "res": {"Yes": 0.8686905639853654, "No": 0.13130934858731053}, "ground_truth": 0}, {"key": "38509260", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.003117228416508725, "res": {"No": 0.9968825284373272, "Yes": 0.003117228416508725}, "ground_truth": 0}, {"key": "38509260", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.7892306332612463, "res": {"Yes": 0.7892306332612463, "No": 0.21076869058135905}, "ground_truth": 0}, {"key": "38509260", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9815666669862347, "res": {"Yes": 0.9815666669862347, "No": 0.018433096634667797}, "ground_truth": 1}, {"key": "38509260", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9440519014485123, "res": {"Yes": 0.9440519014485123, "No": 0.05594771416603924}, "ground_truth": 0}, {"key": "38509260", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.7107214202161906, "res": {"Yes": 0.7107214202161906, "No": 0.2892782648396379}, "ground_truth": 0}, {"key": "37139607", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9745702250338201, "res": {"Yes": 0.9745702250338201, "No": 0.025429280329691946}, "ground_truth": 0}, {"key": "37139607", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9833863347727729, "res": {"Yes": 0.9833863347727729, "No": 0.016613670473969177}, "ground_truth": 0}, {"key": "37139607", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9990655122059287, "res": {"Yes": 0.9990655122059287, "No": 0.0009344004166399996}, "ground_truth": 1}, {"key": "37139607", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9932837242779536, "res": {"Yes": 0.9932837242779536, "No": 0.00671622122711768}, "ground_truth": 0}, {"key": "37139607", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.07070657583562001, "res": {"No": 0.9292932574128919, "Yes": 0.07070657583562001}, "ground_truth": 0}, {"key": "37092824", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9053019178807536, "res": {"Yes": 0.9053019178807536, "No": 0.09469796196335348}, "ground_truth": 0}, {"key": "37092824", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9981222720411265, "res": {"Yes": 0.9981222720411265, "No": 0.0018777117457365528}, "ground_truth": 0}, {"key": "37092824", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9819366400924925, "res": {"Yes": 0.9819366400924925, "No": 0.01806331848658404}, "ground_truth": 1}, {"key": "37092824", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9967979565112064, "res": {"Yes": 0.9967979565112064, "No": 0.0032020005264332234}, "ground_truth": 0}, {"key": "37092824", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9916832104529258, "res": {"Yes": 0.9916832104529258, "No": 0.008316719439849218}, "ground_truth": 0}, {"key": "32191802", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9671179072400783, "res": {"Yes": 0.9671179072400783, "No": 0.03288205206518471}, "ground_truth": 0}, {"key": "32191802", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.976044495457622, "res": {"Yes": 0.976044495457622, "No": 0.023955419423311874}, "ground_truth": 0}, {"key": "32191802", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9246352902774051, "res": {"Yes": 0.9246352902774051, "No": 0.07536466106322816}, "ground_truth": 1}, {"key": "32191802", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9866997356818685, "res": {"Yes": 0.9866997356818685, "No": 0.013300141562687282}, "ground_truth": 0}, {"key": "32191802", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.986949329262238, "res": {"Yes": 0.986949329262238, "No": 0.013050628310160267}, "ground_truth": 0}, {"key": "39396038", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.5145296997049631, "res": {"Yes": 0.5145296997049631, "No": 0.4854698956856178}, "ground_truth": 0}, {"key": "39396038", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9932434901490218, "res": {"Yes": 0.9932434901490218, "No": 0.006756420777030025}, "ground_truth": 0}, {"key": "39396038", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9987487677446607, "res": {"Yes": 0.9987487677446607, "No": 0.0012512146243278078}, "ground_truth": 1}, {"key": "39396038", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9945994546624602, "res": {"Yes": 0.9945994546624602, "No": 0.005400344668223486}, "ground_truth": 0}, {"key": "39396038", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9962569397798416, "res": {"Yes": 0.9962569397798416, "No": 0.0037429582660066666}, "ground_truth": 0}, {"key": "39076884", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.0003869223620183598, "res": {"No": 0.9996130158676828, "Yes": 0.0003869223620183598}, "ground_truth": 0}, {"key": "39076884", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9989244216494966, "res": {"Yes": 0.9989244216494966, "No": 0.0010755210899621329}, "ground_truth": 0}, {"key": "39076884", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.997212684497316, "res": {"Yes": 0.997212684497316, "No": 0.002787268207258893}, "ground_truth": 1}, {"key": "39076884", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9988233225284797, "res": {"Yes": 0.9988233225284797, "No": 0.0011765898289651697}, "ground_truth": 0}, {"key": "39076884", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.8241993077361004, "res": {"Yes": 0.8241993077361004, "No": 0.1758004032383829}, "ground_truth": 0}, {"key": "27763432", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.8791754406759072, "res": {"Yes": 0.8791754406759072, "No": 0.12082443952445071}, "ground_truth": 0}, {"key": "27763432", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9990771760141961, "res": {"Yes": 0.9990771760141961, "No": 0.0009228138713244172}, "ground_truth": 0}, {"key": "27763432", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9971909933629165, "res": {"Yes": 0.9971909933629165, "No": 0.002808993625993058}, "ground_truth": 1}, {"key": "27763432", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9522278734174769, "res": {"Yes": 0.9522278734174769, "No": 0.047772075727085826}, "ground_truth": 0}, {"key": "27763432", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.994578697886816, "res": {"Yes": 0.994578697886816, "No": 0.005421346839649093}, "ground_truth": 0}, {"key": "37806929", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.989916915611533, "res": {"Yes": 0.989916915611533, "No": 0.010083011510467555}, "ground_truth": 0}, {"key": "37806929", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9715240965732844, "res": {"Yes": 0.9715240965732844, "No": 0.02847535507546601}, "ground_truth": 0}, {"key": "37806929", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9881890385219505, "res": {"Yes": 0.9881890385219505, "No": 0.011810692662319251}, "ground_truth": 1}, {"key": "37806929", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9965574475184259, "res": {"Yes": 0.9965574475184259, "No": 0.003442522470515176}, "ground_truth": 0}, {"key": "37806929", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9974271825825353, "res": {"Yes": 0.9974271825825353, "No": 0.002572785795557995}, "ground_truth": 0}, {"key": "32334186", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.7295648488807758, "res": {"Yes": 0.7295648488807758, "No": 0.2704350284821789}, "ground_truth": 0}, {"key": "32334186", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9959801632614895, "res": {"Yes": 0.9959801632614895, "No": 0.004019808557490485}, "ground_truth": 0}, {"key": "32334186", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998925288654362, "res": {"Yes": 0.9998925288654362, "No": 0.00010737244210669444}, "ground_truth": 1}, {"key": "32334186", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999286443459358, "res": {"Yes": 0.9999286443459358, "No": 7.122882328826848e-05}, "ground_truth": 0}, {"key": "32334186", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9933272398984809, "res": {"Yes": 0.9933272398984809, "No": 0.006672718038747005}, "ground_truth": 0}, {"key": "36187324", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.33464118859259057, "res": {"No": 0.6653586036914108, "Yes": 0.33464118859259057}, "ground_truth": 0}, {"key": "36187324", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9579898270628161, "res": {"Yes": 0.9579898270628161, "No": 0.04200964685705786}, "ground_truth": 0}, {"key": "36187324", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9896669891857125, "res": {"Yes": 0.9896669891857125, "No": 0.010332748102612338}, "ground_truth": 1}, {"key": "36187324", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9600823499512817, "res": {"Yes": 0.9600823499512817, "No": 0.039917570045311254}, "ground_truth": 0}, {"key": "36187324", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.6769484389828664, "res": {"Yes": 0.6769484389828664, "No": 0.32305121757062527}, "ground_truth": 0}, {"key": "35306009", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.7378963215170252, "res": {"Yes": 0.7378963215170252, "No": 0.26210326805426676}, "ground_truth": 0}, {"key": "35306009", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9489035735664294, "res": {"Yes": 0.9489035735664294, "No": 0.05109603386249791}, "ground_truth": 0}, {"key": "35306009", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9587954959727042, "res": {"Yes": 0.9587954959727042, "No": 0.04120439308641529}, "ground_truth": 1}, {"key": "35306009", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9385663476839351, "res": {"Yes": 0.9385663476839351, "No": 0.061433020206862886}, "ground_truth": 0}, {"key": "35306009", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.948532007299838, "res": {"Yes": 0.948532007299838, "No": 0.051467589475338894}, "ground_truth": 0}, {"key": "39490050", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9986982295392145, "res": {"Yes": 0.9986982295392145, "No": 0.001301701531440354}, "ground_truth": 0}, {"key": "39490050", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996928289566881, "res": {"Yes": 0.9996928289566881, "No": 0.00030707839029226413}, "ground_truth": 0}, {"key": "39490050", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9992460449670382, "res": {"Yes": 0.9992460449670382, "No": 0.0007539182632576858}, "ground_truth": 1}, {"key": "39490050", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9986016950406849, "res": {"Yes": 0.9986016950406849, "No": 0.0013982896049610357}, "ground_truth": 0}, {"key": "39490050", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9901633464992817, "res": {"Yes": 0.9901633464992817, "No": 0.009836606453618026}, "ground_truth": 0}, {"key": "38072149", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9915167634543066, "res": {"Yes": 0.9915167634543066, "No": 0.008483179147213549}, "ground_truth": 0}, {"key": "38072149", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9984520582995443, "res": {"Yes": 0.9984520582995443, "No": 0.0015479176544952566}, "ground_truth": 0}, {"key": "38072149", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998223389235985, "res": {"Yes": 0.9998223389235985, "No": 0.00017754579186189242}, "ground_truth": 1}, {"key": "38072149", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9988467526014888, "res": {"Yes": 0.9988467526014888, "No": 0.0011531822318619556}, "ground_truth": 0}, {"key": "38072149", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9971004339852668, "res": {"Yes": 0.9971004339852668, "No": 0.0028994999148932356}, "ground_truth": 0}, {"key": "35899689", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9947277742258546, "res": {"Yes": 0.9947277742258546, "No": 0.005272263510539648}, "ground_truth": 0}, {"key": "35899689", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9537192710357751, "res": {"Yes": 0.9537192710357751, "No": 0.04628061444774919}, "ground_truth": 0}, {"key": "35899689", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9985139812209609, "res": {"Yes": 0.9985139812209609, "No": 0.0014860094104020387}, "ground_truth": 1}, {"key": "35899689", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9827377360249576, "res": {"Yes": 0.9827377360249576, "No": 0.017262305974849512}, "ground_truth": 0}, {"key": "35899689", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.7682289179509063, "res": {"Yes": 0.7682289179509063, "No": 0.231771084307811}, "ground_truth": 0}, {"key": "27994518", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.03737036007979968, "res": {"No": 0.9626294772974233, "Yes": 0.03737036007979968}, "ground_truth": 0}, {"key": "27994518", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9970309938421822, "res": {"Yes": 0.9970309938421822, "No": 0.002968958681309841}, "ground_truth": 0}, {"key": "27994518", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9979774089994237, "res": {"Yes": 0.9979774089994237, "No": 0.0020225835492421237}, "ground_truth": 1}, {"key": "27994518", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9967022565400522, "res": {"Yes": 0.9967022565400522, "No": 0.00329772316049775}, "ground_truth": 0}, {"key": "27994518", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9991699891497277, "res": {"Yes": 0.9991699891497277, "No": 0.0008300098415124596}, "ground_truth": 0}, {"key": "10615479", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.5552080892060828, "res": {"Yes": 0.5552080892060828, "No": 0.44479169835062704}, "ground_truth": 0}, {"key": "10615479", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997190387070186, "res": {"Yes": 0.9997190387070186, "No": 0.00028089236632433754}, "ground_truth": 0}, {"key": "10615479", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9967683392928915, "res": {"Yes": 0.9967683392928915, "No": 0.0032316384832811787}, "ground_truth": 1}, {"key": "10615479", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9985264572318399, "res": {"Yes": 0.9985264572318399, "No": 0.001473468107596588}, "ground_truth": 0}, {"key": "10615479", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9989916356413664, "res": {"Yes": 0.9989916356413664, "No": 0.0010083086054387168}, "ground_truth": 0}, {"key": "40186667", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9515052982007559, "res": {"Yes": 0.9515052982007559, "No": 0.04849459163195033}, "ground_truth": 0}, {"key": "40186667", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9872241361810262, "res": {"Yes": 0.9872241361810262, "No": 0.012775759405520422}, "ground_truth": 0}, {"key": "40186667", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.997832109276807, "res": {"Yes": 0.997832109276807, "No": 0.0021679037329748346}, "ground_truth": 1}, {"key": "40186667", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9891427757773049, "res": {"Yes": 0.9891427757773049, "No": 0.010857176381349618}, "ground_truth": 0}, {"key": "40186667", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9433917491841862, "res": {"Yes": 0.9433917491841862, "No": 0.05660820724253694}, "ground_truth": 0}, {"key": "38622886", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.45637364867851726, "res": {"No": 0.5436260209636826, "Yes": 0.45637364867851726}, "ground_truth": 0}, {"key": "38622886", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9771580395406106, "res": {"Yes": 0.9771580395406106, "No": 0.022841854550686803}, "ground_truth": 0}, {"key": "38622886", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9949390778395829, "res": {"Yes": 0.9949390778395829, "No": 0.005060928573346776}, "ground_truth": 1}, {"key": "38622886", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9964224999565676, "res": {"Yes": 0.9964224999565676, "No": 0.0035775481052976216}, "ground_truth": 0}, {"key": "38622886", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9973497433475327, "res": {"Yes": 0.9973497433475327, "No": 0.002650253455427534}, "ground_truth": 0}, {"key": "40686943", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.02167389582376479, "res": {"No": 0.9783260627666582, "Yes": 0.02167389582376479}, "ground_truth": 0}, {"key": "40686943", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.052813443971918, "res": {"No": 0.9471864654160884, "Yes": 0.052813443971918}, "ground_truth": 0}, {"key": "40686943", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999542716355735, "res": {"Yes": 0.9999542716355735, "No": 4.5656434863137814e-05}, "ground_truth": 1}, {"key": "40686943", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999169632877386, "res": {"Yes": 0.9999169632877386, "No": 8.298953852326465e-05}, "ground_truth": 0}, {"key": "40686943", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9982182330348037, "res": {"Yes": 0.9982182330348037, "No": 0.001781775392937697}, "ground_truth": 0}, {"key": "30604567", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.8993159681680228, "res": {"Yes": 0.8993159681680228, "No": 0.10068398788440538}, "ground_truth": 0}, {"key": "30604567", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.780945870096171, "res": {"Yes": 0.780945870096171, "No": 0.21905415426179647}, "ground_truth": 0}, {"key": "30604567", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9884184089906489, "res": {"Yes": 0.9884184089906489, "No": 0.011581498628140067}, "ground_truth": 1}, {"key": "30604567", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9890686066979765, "res": {"Yes": 0.9890686066979765, "No": 0.010931330008268958}, "ground_truth": 0}, {"key": "30604567", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9431845936696918, "res": {"Yes": 0.9431845936696918, "No": 0.05681532405164927}, "ground_truth": 0}, {"key": "35440903", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9942383737005465, "res": {"Yes": 0.9942383737005465, "No": 0.005761540454573069}, "ground_truth": 0}, {"key": "35440903", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9985742344708086, "res": {"Yes": 0.9985742344708086, "No": 0.001425666293720121}, "ground_truth": 0}, {"key": "35440903", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995706150121312, "res": {"Yes": 0.9995706150121312, "No": 0.00042933717157678784}, "ground_truth": 1}, {"key": "35440903", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995014228309241, "res": {"Yes": 0.9995014228309241, "No": 0.0004985034738404507}, "ground_truth": 0}, {"key": "35440903", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9988075053874662, "res": {"Yes": 0.9988075053874662, "No": 0.0011924994457282489}, "ground_truth": 0}, {"key": "37219533", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.00562364077551345, "res": {"No": 0.9943762711999541, "Yes": 0.00562364077551345}, "ground_truth": 0}, {"key": "37219533", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9917019683182535, "res": {"Yes": 0.9917019683182535, "No": 0.008298022878210524}, "ground_truth": 0}, {"key": "37219533", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9756505721614557, "res": {"Yes": 0.9756505721614557, "No": 0.024349299447271343}, "ground_truth": 1}, {"key": "37219533", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9907555629892782, "res": {"Yes": 0.9907555629892782, "No": 0.009244365945496318}, "ground_truth": 0}, {"key": "37219533", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9934900551051031, "res": {"Yes": 0.9934900551051031, "No": 0.006509969551249607}, "ground_truth": 0}, {"key": "40178965", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9090920089136879, "res": {"Yes": 0.9090920089136879, "No": 0.0909074157243773}, "ground_truth": 0}, {"key": "40178965", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9008242422925153, "res": {"Yes": 0.9008242422925153, "No": 0.09917532852583853}, "ground_truth": 0}, {"key": "40178965", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9970187854718136, "res": {"Yes": 0.9970187854718136, "No": 0.002981066966789348}, "ground_truth": 1}, {"key": "40178965", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9964974222863191, "res": {"Yes": 0.9964974222863191, "No": 0.0035025044823381417}, "ground_truth": 0}, {"key": "40178965", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.8854765095543963, "res": {"Yes": 0.8854765095543963, "No": 0.11452313774808953}, "ground_truth": 0}, {"key": "13750468", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.96451885098598, "res": {"Yes": 0.96451885098598, "No": 0.035481022603002524}, "ground_truth": 0}, {"key": "13750468", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995263059284794, "res": {"Yes": 0.9995263059284794, "No": 0.00047363745111391733}, "ground_truth": 0}, {"key": "13750468", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9983433336891159, "res": {"Yes": 0.9983433336891159, "No": 0.0016566016065716412}, "ground_truth": 1}, {"key": "13750468", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9989734342794782, "res": {"Yes": 0.9989734342794782, "No": 0.0010265195257608804}, "ground_truth": 0}, {"key": "13750468", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9979222063080826, "res": {"Yes": 0.9979222063080826, "No": 0.0020778231412686573}, "ground_truth": 0}, {"key": "17754949", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.11232250911132978, "res": {"No": 0.8876773292374069, "Yes": 0.11232250911132978}, "ground_truth": 0}, {"key": "17754949", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9992315242288684, "res": {"Yes": 0.9992315242288684, "No": 0.0007684807838867022}, "ground_truth": 0}, {"key": "17754949", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995413149281765, "res": {"Yes": 0.9995413149281765, "No": 0.00045859609620267503}, "ground_truth": 1}, {"key": "17754949", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9987229625072527, "res": {"Yes": 0.9987229625072527, "No": 0.0012769553082488925}, "ground_truth": 0}, {"key": "17754949", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9977859432474735, "res": {"Yes": 0.9977859432474735, "No": 0.0022140873322786477}, "ground_truth": 0}, {"key": "36675623", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.025575185857894558, "res": {"No": 0.9744244139236704, "Yes": 0.025575185857894558}, "ground_truth": 0}, {"key": "36675623", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.8867024407303882, "res": {"Yes": 0.8867024407303882, "No": 0.11329718010552077}, "ground_truth": 0}, {"key": "36675623", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9403273120675163, "res": {"Yes": 0.9403273120675163, "No": 0.05967239266270043}, "ground_truth": 1}, {"key": "36675623", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.618248375600097, "res": {"Yes": 0.618248375600097, "No": 0.3817514639695432}, "ground_truth": 0}, {"key": "36675623", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.8199291868113501, "res": {"Yes": 0.8199291868113501, "No": 0.18007056088303972}, "ground_truth": 0}, {"key": "40035440", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.5867444342631744, "res": {"Yes": 0.5867444342631744, "No": 0.4132548401284691}, "ground_truth": 0}, {"key": "40035440", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.3486511593730433, "res": {"No": 0.6513487082690795, "Yes": 0.3486511593730433}, "ground_truth": 0}, {"key": "40035440", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5568024332001176, "res": {"Yes": 0.5568024332001176, "No": 0.44319740442854516}, "ground_truth": 1}, {"key": "40035440", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9389744968940045, "res": {"Yes": 0.9389744968940045, "No": 0.06102519300426269}, "ground_truth": 0}, {"key": "40035440", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9658455999159936, "res": {"Yes": 0.9658455999159936, "No": 0.03415437123950427}, "ground_truth": 0}, {"key": "37685909", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9682721640648028, "res": {"Yes": 0.9682721640648028, "No": 0.031727806688379524}, "ground_truth": 0}, {"key": "37685909", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.4908523054510985, "res": {"No": 0.5091474595334041, "Yes": 0.4908523054510985}, "ground_truth": 0}, {"key": "37685909", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997730059370517, "res": {"Yes": 0.9997730059370517, "No": 0.0002269710685334462}, "ground_truth": 1}, {"key": "37685909", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998209087387548, "res": {"Yes": 0.9998209087387548, "No": 0.00017900693676451627}, "ground_truth": 0}, {"key": "37685909", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9967554319242065, "res": {"Yes": 0.9967554319242065, "No": 0.0032445542611571777}, "ground_truth": 0}, {"key": "36938787", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.974786301457114, "res": {"Yes": 0.974786301457114, "No": 0.02521331124293289}, "ground_truth": 0}, {"key": "36938787", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999527220576372, "res": {"Yes": 0.9999527220576372, "No": 4.7211266349984846e-05}, "ground_truth": 0}, {"key": "36938787", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998601168145652, "res": {"Yes": 0.9998601168145652, "No": 0.0001397767571297771}, "ground_truth": 1}, {"key": "36938787", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995406000165021, "res": {"Yes": 0.9995406000165021, "No": 0.00045937485667372374}, "ground_truth": 0}, {"key": "36938787", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.999910407653634, "res": {"Yes": 0.999910407653634, "No": 8.952872808481408e-05}, "ground_truth": 0}, {"key": "39398068", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9746605806531375, "res": {"Yes": 0.9746605806531375, "No": 0.025339351407919604}, "ground_truth": 0}, {"key": "39398068", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9903492116607817, "res": {"Yes": 0.9903492116607817, "No": 0.00965073693717873}, "ground_truth": 0}, {"key": "39398068", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998433152563369, "res": {"Yes": 0.9998433152563369, "No": 0.00015658117380394358}, "ground_truth": 1}, {"key": "39398068", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.998611445035792, "res": {"Yes": 0.998611445035792, "No": 0.0013885497803077247}, "ground_truth": 0}, {"key": "39398068", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9792329616253759, "res": {"Yes": 0.9792329616253759, "No": 0.02076706833644487}, "ground_truth": 0}, {"key": "39926408", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.6843940026045069, "res": {"Yes": 0.6843940026045069, "No": 0.3156056776983586}, "ground_truth": 0}, {"key": "39926408", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.999716893632173, "res": {"Yes": 0.999716893632173, "No": 0.000283059340091267}, "ground_truth": 0}, {"key": "39926408", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9983360816496455, "res": {"Yes": 0.9983360816496455, "No": 0.0016639366636797177}, "ground_truth": 1}, {"key": "39926408", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998224581124266, "res": {"Yes": 0.9998224581124266, "No": 0.0001774300697351112}, "ground_truth": 0}, {"key": "39926408", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998864501472726, "res": {"Yes": 0.9998864501472726, "No": 0.00011351676757679114}, "ground_truth": 0}, {"key": "40465336", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9884853914336411, "res": {"Yes": 0.9884853914336411, "No": 0.0115143983075654}, "ground_truth": 0}, {"key": "40465336", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9898275623192628, "res": {"Yes": 0.9898275623192628, "No": 0.010172309553137318}, "ground_truth": 0}, {"key": "40465336", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9948832692922276, "res": {"Yes": 0.9948832692922276, "No": 0.005116762828244133}, "ground_truth": 1}, {"key": "40465336", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9898185643327058, "res": {"Yes": 0.9898185643327058, "No": 0.010181300759908245}, "ground_truth": 0}, {"key": "40465336", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9480241048437575, "res": {"Yes": 0.9480241048437575, "No": 0.05197576119182208}, "ground_truth": 0}, {"key": "34173549", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.8823526261359439, "res": {"Yes": 0.8823526261359439, "No": 0.11764688982321206}, "ground_truth": 0}, {"key": "34173549", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9864487699382178, "res": {"Yes": 0.9864487699382178, "No": 0.013550999100776817}, "ground_truth": 0}, {"key": "34173549", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9954670858078681, "res": {"Yes": 0.9954670858078681, "No": 0.004532954248768067}, "ground_truth": 1}, {"key": "34173549", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9982517378936763, "res": {"Yes": 0.9982517378936763, "No": 0.0017482849502799566}, "ground_truth": 0}, {"key": "34173549", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9944963966228536, "res": {"Yes": 0.9944963966228536, "No": 0.005503596452438991}, "ground_truth": 0}, {"key": "33541535", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.06943369573346096, "res": {"No": 0.930566167322742, "Yes": 0.06943369573346096}, "ground_truth": 0}, {"key": "33541535", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9941055902019076, "res": {"Yes": 0.9941055902019076, "No": 0.005894414920769074}, "ground_truth": 0}, {"key": "33541535", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9943056655336724, "res": {"Yes": 0.9943056655336724, "No": 0.005694303442064265}, "ground_truth": 1}, {"key": "33541535", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9959542656239975, "res": {"Yes": 0.9959542656239975, "No": 0.0040457784480633354}, "ground_truth": 0}, {"key": "33541535", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9840102680245785, "res": {"Yes": 0.9840102680245785, "No": 0.015989602687595226}, "ground_truth": 0}, {"key": "35685195", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.5881802679045505, "res": {"Yes": 0.5881802679045505, "No": 0.4118191838979945}, "ground_truth": 0}, {"key": "35685195", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9895814045059742, "res": {"Yes": 0.9895814045059742, "No": 0.010418454231150984}, "ground_truth": 0}, {"key": "35685195", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.989849169995876, "res": {"Yes": 0.989849169995876, "No": 0.010150703150322116}, "ground_truth": 1}, {"key": "35685195", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9724293421655511, "res": {"Yes": 0.9724293421655511, "No": 0.027570335086339434}, "ground_truth": 0}, {"key": "35685195", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9712028521047963, "res": {"Yes": 0.9712028521047963, "No": 0.02879665627911921}, "ground_truth": 0}, {"key": "28440730", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.8337451323751923, "res": {"Yes": 0.8337451323751923, "No": 0.16625455522033133}, "ground_truth": 0}, {"key": "28440730", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9989709372486026, "res": {"Yes": 0.9989709372486026, "No": 0.0010289909676116436}, "ground_truth": 0}, {"key": "28440730", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9986572171407592, "res": {"Yes": 0.9986572171407592, "No": 0.0013426902881324343}, "ground_truth": 1}, {"key": "28440730", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997991022826496, "res": {"Yes": 0.9997991022826496, "No": 0.00020081137968789876}, "ground_truth": 0}, {"key": "28440730", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9990361225286059, "res": {"Yes": 0.9990361225286059, "No": 0.0009638460261709532}, "ground_truth": 0}, {"key": "38338714", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9670567002846435, "res": {"Yes": 0.9670567002846435, "No": 0.032942874279817964}, "ground_truth": 0}, {"key": "38338714", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9973578023649589, "res": {"Yes": 0.9973578023649589, "No": 0.002642218959227019}, "ground_truth": 0}, {"key": "38338714", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9770511833804078, "res": {"Yes": 0.9770511833804078, "No": 0.02294878236211734}, "ground_truth": 1}, {"key": "38338714", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9987192793236308, "res": {"Yes": 0.9987192793236308, "No": 0.0012807070125333486}, "ground_truth": 0}, {"key": "38338714", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9852727939308121, "res": {"Yes": 0.9852727939308121, "No": 0.01472711391729226}, "ground_truth": 0}, {"key": "32191881", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.39681194527693603, "res": {"No": 0.603187732417105, "Yes": 0.39681194527693603}, "ground_truth": 0}, {"key": "32191881", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9894542910434239, "res": {"Yes": 0.9894542910434239, "No": 0.010545622859361435}, "ground_truth": 0}, {"key": "32191881", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9966809398981652, "res": {"Yes": 0.9966809398981652, "No": 0.0033189938473905333}, "ground_truth": 1}, {"key": "32191881", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9702643236475388, "res": {"Yes": 0.9702643236475388, "No": 0.02973542746818675}, "ground_truth": 0}, {"key": "32191881", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9909883663745654, "res": {"Yes": 0.9909883663745654, "No": 0.009011555326557176}, "ground_truth": 0}, {"key": "37707251", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.8413258404428983, "res": {"Yes": 0.8413258404428983, "No": 0.1586737296196508}, "ground_truth": 0}, {"key": "37707251", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.99653506877669, "res": {"Yes": 0.99653506877669, "No": 0.0034649606597923666}, "ground_truth": 0}, {"key": "37707251", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994005603857563, "res": {"Yes": 0.9994005603857563, "No": 0.0005994220002309132}, "ground_truth": 1}, {"key": "37707251", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999269246830243, "res": {"Yes": 0.999269246830243, "No": 0.0007307118952842236}, "ground_truth": 0}, {"key": "37707251", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9993165115447209, "res": {"Yes": 0.9993165115447209, "No": 0.0006834208565129093}, "ground_truth": 0}, {"key": "40172567", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.4484224599248841, "res": {"No": 0.5515774748054786, "Yes": 0.4484224599248841}, "ground_truth": 0}, {"key": "40172567", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9912593525206604, "res": {"Yes": 0.9912593525206604, "No": 0.008740551418004526}, "ground_truth": 0}, {"key": "40172567", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.998941788401207, "res": {"Yes": 0.998941788401207, "No": 0.0010581749500509067}, "ground_truth": 1}, {"key": "40172567", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997116501808025, "res": {"Yes": 0.9997116501808025, "No": 0.0002882852836177467}, "ground_truth": 0}, {"key": "40172567", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9977001401396213, "res": {"Yes": 0.9977001401396213, "No": 0.002299796817883263}, "ground_truth": 0}, {"key": "33113255", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.5012491480568547, "res": {"Yes": 0.5012491480568547, "No": 0.49875051908832946}, "ground_truth": 0}, {"key": "33113255", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996856789392184, "res": {"Yes": 0.9996856789392184, "No": 0.0003142370250405803}, "ground_truth": 0}, {"key": "33113255", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996655437429679, "res": {"Yes": 0.9996655437429679, "No": 0.00033444202894742183}, "ground_truth": 1}, {"key": "33113255", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9993973474680293, "res": {"Yes": 0.9993973474680293, "No": 0.0006025517525137244}, "ground_truth": 0}, {"key": "33113255", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9939695331001367, "res": {"Yes": 0.9939695331001367, "No": 0.006030426144562817}, "ground_truth": 0}, {"key": "33022143", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9657703238776736, "res": {"Yes": 0.9657703238776736, "No": 0.034229446383519664}, "ground_truth": 0}, {"key": "33022143", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.994589554468762, "res": {"Yes": 0.994589554468762, "No": 0.005410394328299825}, "ground_truth": 0}, {"key": "33022143", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9991079960722425, "res": {"Yes": 0.9991079960722425, "No": 0.000891941509360536}, "ground_truth": 1}, {"key": "33022143", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.996431850628466, "res": {"Yes": 0.996431850628466, "No": 0.0035680965666020346}, "ground_truth": 0}, {"key": "33022143", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9959550927643586, "res": {"Yes": 0.9959550927643586, "No": 0.004044918002499064}, "ground_truth": 0}, {"key": "32084473", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.04239065254047216, "res": {"No": 0.9576090443241771, "Yes": 0.04239065254047216}, "ground_truth": 0}, {"key": "32084473", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9542126584058668, "res": {"Yes": 0.9542126584058668, "No": 0.04578656369536333}, "ground_truth": 0}, {"key": "32084473", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9985895715903987, "res": {"Yes": 0.9985895715903987, "No": 0.001410223607903602}, "ground_truth": 1}, {"key": "32084473", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9837608963217981, "res": {"Yes": 0.9837608963217981, "No": 0.016238591147711257}, "ground_truth": 0}, {"key": "32084473", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9464073235724534, "res": {"Yes": 0.9464073235724534, "No": 0.0535920040609967}, "ground_truth": 0}, {"key": "40564245", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9955283962240731, "res": {"Yes": 0.9955283962240731, "No": 0.004471574925877831}, "ground_truth": 0}, {"key": "40564245", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9947480708584254, "res": {"Yes": 0.9947480708584254, "No": 0.005251875647460597}, "ground_truth": 0}, {"key": "40564245", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997270194659681, "res": {"Yes": 0.9997270194659681, "No": 0.00027293797128663614}, "ground_truth": 1}, {"key": "40564245", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995671595824533, "res": {"Yes": 0.9995671595824533, "No": 0.00043279559863335077}, "ground_truth": 0}, {"key": "40564245", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9994369972155782, "res": {"Yes": 0.9994369972155782, "No": 0.0005629727682498482}, "ground_truth": 0}, {"key": "31717213", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9796180051195459, "res": {"Yes": 0.9796180051195459, "No": 0.020381951502630297}, "ground_truth": 0}, {"key": "31717213", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.993836348217872, "res": {"Yes": 0.993836348217872, "No": 0.0061636182230590504}, "ground_truth": 0}, {"key": "31717213", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996544652308935, "res": {"Yes": 0.9996544652308935, "No": 0.0003454958245736184}, "ground_truth": 1}, {"key": "31717213", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9991304874453166, "res": {"Yes": 0.9991304874453166, "No": 0.0008695132375165242}, "ground_truth": 0}, {"key": "31717213", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999473581626301, "res": {"Yes": 0.9999473581626301, "No": 5.25901877657265e-05}, "ground_truth": 0}, {"key": "34861894", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.2850406435311324, "res": {"No": 0.7149592792556223, "Yes": 0.2850406435311324}, "ground_truth": 0}, {"key": "34861894", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9848045814104491, "res": {"Yes": 0.9848045814104491, "No": 0.015195438421785102}, "ground_truth": 0}, {"key": "34861894", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8690784514008479, "res": {"Yes": 0.8690784514008479, "No": 0.13092122140408913}, "ground_truth": 1}, {"key": "34861894", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9715005791741226, "res": {"Yes": 0.9715005791741226, "No": 0.028499330021539718}, "ground_truth": 0}, {"key": "34861894", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9698860622323485, "res": {"Yes": 0.9698860622323485, "No": 0.03011392468742801}, "ground_truth": 0}, {"key": "40838760", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9351526602746716, "res": {"Yes": 0.9351526602746716, "No": 0.06484681388231699}, "ground_truth": 0}, {"key": "40838760", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9727356006025994, "res": {"Yes": 0.9727356006025994, "No": 0.027263850820939888}, "ground_truth": 0}, {"key": "40838760", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9389627222276408, "res": {"Yes": 0.9389627222276408, "No": 0.06103677668515146}, "ground_truth": 1}, {"key": "40838760", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.980561805162836, "res": {"Yes": 0.980561805162836, "No": 0.019437649456341757}, "ground_truth": 0}, {"key": "40838760", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9790185693112482, "res": {"Yes": 0.9790185693112482, "No": 0.02098083241928686}, "ground_truth": 0}, {"key": "40044849", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.8900372706481691, "res": {"Yes": 0.8900372706481691, "No": 0.10996232190524695}, "ground_truth": 0}, {"key": "40044849", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999452126088839, "res": {"Yes": 0.9999452126088839, "No": 5.474497359314717e-05}, "ground_truth": 0}, {"key": "40044849", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994321162370561, "res": {"Yes": 0.9994321162370561, "No": 0.000567859800395696}, "ground_truth": 1}, {"key": "40044849", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999256644730222, "res": {"Yes": 0.9999256644730222, "No": 7.431312300244162e-05}, "ground_truth": 0}, {"key": "40044849", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997469175907573, "res": {"Yes": 0.9997469175907573, "No": 0.0002530541653142555}, "ground_truth": 0}, {"key": "30296116", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.6214907198507829, "res": {"Yes": 0.6214907198507829, "No": 0.37850936651649575}, "ground_truth": 0}, {"key": "30296116", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999343657340227, "res": {"Yes": 0.9999343657340227, "No": 6.556588782042278e-05}, "ground_truth": 0}, {"key": "30296116", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998145957494927, "res": {"Yes": 0.9998145957494927, "No": 0.00018537773569289384}, "ground_truth": 1}, {"key": "30296116", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9986889583669738, "res": {"Yes": 0.9986889583669738, "No": 0.001311013207343479}, "ground_truth": 0}, {"key": "30296116", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9820671167178805, "res": {"Yes": 0.9820671167178805, "No": 0.017932928053232555}, "ground_truth": 0}, {"key": "34931360", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.5840930772503682, "res": {"Yes": 0.5840930772503682, "No": 0.4159066096323382}, "ground_truth": 0}, {"key": "34931360", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9944372119254815, "res": {"Yes": 0.9944372119254815, "No": 0.005562620326463309}, "ground_truth": 0}, {"key": "34931360", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994837969286733, "res": {"Yes": 0.9994837969286733, "No": 0.0005161838796766072}, "ground_truth": 1}, {"key": "34931360", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9989622517335469, "res": {"Yes": 0.9989622517335469, "No": 0.0010377331585242114}, "ground_truth": 0}, {"key": "34931360", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.999189035059956, "res": {"Yes": 0.999189035059956, "No": 0.0008108880117541774}, "ground_truth": 0}, {"key": "18862422", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.6488532142803761, "res": {"Yes": 0.6488532142803761, "No": 0.35114676816269363}, "ground_truth": 0}, {"key": "18862422", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.999868221563541, "res": {"Yes": 0.999868221563541, "No": 0.00013172251593610655}, "ground_truth": 0}, {"key": "18862422", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997768195884542, "res": {"Yes": 0.9997768195884542, "No": 0.00022308426230600541}, "ground_truth": 1}, {"key": "18862422", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997188003440366, "res": {"Yes": 0.9997188003440366, "No": 0.00028118295924176414}, "ground_truth": 0}, {"key": "18862422", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9992868689289159, "res": {"Yes": 0.9992868689289159, "No": 0.0007130164972639865}, "ground_truth": 0}, {"key": "36361140", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9965547263223749, "res": {"Yes": 0.9965547263223749, "No": 0.0034453113542784303}, "ground_truth": 0}, {"key": "36361140", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.992479887112681, "res": {"Yes": 0.992479887112681, "No": 0.007520086036965499}, "ground_truth": 0}, {"key": "36361140", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9991048996115661, "res": {"Yes": 0.9991048996115661, "No": 0.000895037178794222}, "ground_truth": 1}, {"key": "36361140", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997926665865435, "res": {"Yes": 0.9997926665865435, "No": 0.00020726756807296225}, "ground_truth": 0}, {"key": "36361140", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9896472524537526, "res": {"Yes": 0.9896472524537526, "No": 0.010352668152308082}, "ground_truth": 0}, {"key": "39703329", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9962514989366734, "res": {"Yes": 0.9962514989366734, "No": 0.003748465960105365}, "ground_truth": 0}, {"key": "39703329", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997913556292648, "res": {"Yes": 0.9997913556292648, "No": 0.00020860542688253073}, "ground_truth": 0}, {"key": "39703329", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9973981227654192, "res": {"Yes": 0.9973981227654192, "No": 0.0026018157820380207}, "ground_truth": 1}, {"key": "39703329", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999564172037576, "res": {"Yes": 0.9999564172037576, "No": 4.3482108644655536e-05}, "ground_truth": 0}, {"key": "39703329", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9850937989337256, "res": {"Yes": 0.9850937989337256, "No": 0.01490614104902974}, "ground_truth": 0}, {"key": "34033324", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.8175400063311418, "res": {"Yes": 0.8175400063311418, "No": 0.18245958181398422}, "ground_truth": 0}, {"key": "34033324", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.06616593668846535, "res": {"No": 0.9338338755590603, "Yes": 0.06616593668846535}, "ground_truth": 0}, {"key": "34033324", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9703128079968744, "res": {"Yes": 0.9703128079968744, "No": 0.029687149606065114}, "ground_truth": 1}, {"key": "34033324", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9960486332752572, "res": {"Yes": 0.9960486332752572, "No": 0.003951332559996626}, "ground_truth": 0}, {"key": "34033324", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.5698058159317453, "res": {"Yes": 0.5698058159317453, "No": 0.4301940469974764}, "ground_truth": 0}, {"key": "35658862", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.6109310646433088, "res": {"Yes": 0.6109310646433088, "No": 0.38906835150082075}, "ground_truth": 0}, {"key": "35658862", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.7567654613779696, "res": {"Yes": 0.7567654613779696, "No": 0.24323415470417592}, "ground_truth": 0}, {"key": "35658862", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9971323159836611, "res": {"Yes": 0.9971323159836611, "No": 0.002867681544191573}, "ground_truth": 1}, {"key": "35658862", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995208251609505, "res": {"Yes": 0.9995208251609505, "No": 0.0004790571771173371}, "ground_truth": 0}, {"key": "35658862", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9580794435386061, "res": {"Yes": 0.9580794435386061, "No": 0.041920248962334455}, "ground_truth": 0}, {"key": "36092657", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.08269239831336149, "res": {"No": 0.9173071975919832, "Yes": 0.08269239831336149}, "ground_truth": 0}, {"key": "36092657", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9770616535169878, "res": {"Yes": 0.9770616535169878, "No": 0.022937916392055446}, "ground_truth": 0}, {"key": "36092657", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9763931637235297, "res": {"Yes": 0.9763931637235297, "No": 0.02360640276842398}, "ground_truth": 1}, {"key": "36092657", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9979527125576588, "res": {"Yes": 0.9979527125576588, "No": 0.0020471944175362044}, "ground_truth": 0}, {"key": "36092657", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.8794147056567901, "res": {"Yes": 0.8794147056567901, "No": 0.12058468413211852}, "ground_truth": 0}, {"key": "26333438", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9907198756258949, "res": {"Yes": 0.9907198756258949, "No": 0.009280068970924083}, "ground_truth": 0}, {"key": "26333438", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9981274925336983, "res": {"Yes": 0.9981274925336983, "No": 0.001872428551580907}, "ground_truth": 0}, {"key": "26333438", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9988133322476874, "res": {"Yes": 0.9988133322476874, "No": 0.0011866299076091012}, "ground_truth": 1}, {"key": "26333438", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9923007162046328, "res": {"Yes": 0.9923007162046328, "No": 0.007699260451460584}, "ground_truth": 0}, {"key": "26333438", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9938701449926022, "res": {"Yes": 0.9938701449926022, "No": 0.006129803408130984}, "ground_truth": 0}, {"key": "34184963", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.6804280932484363, "res": {"Yes": 0.6804280932484363, "No": 0.31957148179519823}, "ground_truth": 0}, {"key": "34184963", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9957676998330056, "res": {"Yes": 0.9957676998330056, "No": 0.004232310151055902}, "ground_truth": 0}, {"key": "34184963", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996636407814501, "res": {"Yes": 0.9996636407814501, "No": 0.00033624126670838095}, "ground_truth": 1}, {"key": "34184963", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994049682616424, "res": {"Yes": 0.9994049682616424, "No": 0.0005949661766504901}, "ground_truth": 0}, {"key": "34184963", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9993109164272088, "res": {"Yes": 0.9993109164272088, "No": 0.0006890766809756908}, "ground_truth": 0}, {"key": "35069975", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9688218997227829, "res": {"Yes": 0.9688218997227829, "No": 0.03117791772422395}, "ground_truth": 0}, {"key": "35069975", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996395736891743, "res": {"Yes": 0.9996395736891743, "No": 0.0003603405505194837}, "ground_truth": 0}, {"key": "35069975", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9980086379975138, "res": {"Yes": 0.9980086379975138, "No": 0.0019913190910821675}, "ground_truth": 1}, {"key": "35069975", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.995000449266028, "res": {"Yes": 0.995000449266028, "No": 0.004999603848434618}, "ground_truth": 0}, {"key": "35069975", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9171392173342379, "res": {"Yes": 0.9171392173342379, "No": 0.08286068296251714}, "ground_truth": 0}, {"key": "36443950", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9778761460475752, "res": {"Yes": 0.9778761460475752, "No": 0.022123765195908893}, "ground_truth": 0}, {"key": "36443950", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.994108419629265, "res": {"Yes": 0.994108419629265, "No": 0.005891599242039148}, "ground_truth": 0}, {"key": "36443950", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998688175051761, "res": {"Yes": 0.9998688175051761, "No": 0.00013116499807659115}, "ground_truth": 1}, {"key": "36443950", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998975348899081, "res": {"Yes": 0.9998975348899081, "No": 0.0001024443329587134}, "ground_truth": 0}, {"key": "36443950", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.37973848900496465, "res": {"No": 0.620261199589555, "Yes": 0.37973848900496465}, "ground_truth": 0}, {"key": "29460858", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9900787347156871, "res": {"Yes": 0.9900787347156871, "No": 0.00992115731174207}, "ground_truth": 0}, {"key": "29460858", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9941242038104157, "res": {"Yes": 0.9941242038104157, "No": 0.005875800340852056}, "ground_truth": 0}, {"key": "29460858", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.99996356913662, "res": {"Yes": 0.99996356913662, "No": 3.639180303598665e-05}, "ground_truth": 1}, {"key": "29460858", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9993918674376085, "res": {"Yes": 0.9993918674376085, "No": 0.0006080295957271438}, "ground_truth": 0}, {"key": "29460858", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9236611789828086, "res": {"Yes": 0.9236611789828086, "No": 0.07633878862092147}, "ground_truth": 0}, {"key": "36155704", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.05609295801005282, "res": {"No": 0.9439069005489276, "Yes": 0.05609295801005282}, "ground_truth": 0}, {"key": "36155704", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9937050765726945, "res": {"Yes": 0.9937050765726945, "No": 0.00629488739281047}, "ground_truth": 0}, {"key": "36155704", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9720752809341761, "res": {"Yes": 0.9720752809341761, "No": 0.027924559738671}, "ground_truth": 1}, {"key": "36155704", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9877186193754154, "res": {"Yes": 0.9877186193754154, "No": 0.012281285246164725}, "ground_truth": 0}, {"key": "36155704", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9993775864197441, "res": {"Yes": 0.9993775864197441, "No": 0.0006223970654532156}, "ground_truth": 0}, {"key": "37185211", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9873292920110219, "res": {"Yes": 0.9873292920110219, "No": 0.01267015057939875}, "ground_truth": 0}, {"key": "37185211", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9846277273743118, "res": {"Yes": 0.9846277273743118, "No": 0.015372209157152}, "ground_truth": 0}, {"key": "37185211", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9698798995958878, "res": {"Yes": 0.9698798995958878, "No": 0.030119589649434685}, "ground_truth": 1}, {"key": "37185211", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9977957934389258, "res": {"Yes": 0.9977957934389258, "No": 0.002203978814573818}, "ground_truth": 0}, {"key": "37185211", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.999790521363714, "res": {"Yes": 0.999790521363714, "No": 0.0002093864437839517}, "ground_truth": 0}, {"key": "36454885", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.0025711124855061877, "res": {"No": 0.9974287209156653, "Yes": 0.0025711124855061877}, "ground_truth": 0}, {"key": "36454885", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.08355615150669465, "res": {"No": 0.916443455254604, "Yes": 0.08355615150669465}, "ground_truth": 0}, {"key": "36454885", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.39544417981633906, "res": {"No": 0.6045557428665022, "Yes": 0.39544417981633906}, "ground_truth": 1}, {"key": "36454885", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8874197449027864, "res": {"Yes": 0.8874197449027864, "No": 0.11257985352746482}, "ground_truth": 0}, {"key": "36454885", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.8276304257097243, "res": {"Yes": 0.8276304257097243, "No": 0.17236923292346265}, "ground_truth": 0}, {"key": "33148906", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9797082751998689, "res": {"Yes": 0.9797082751998689, "No": 0.020291666387605378}, "ground_truth": 0}, {"key": "33148906", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9986406886018054, "res": {"Yes": 0.9986406886018054, "No": 0.0013592828968826405}, "ground_truth": 0}, {"key": "33148906", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9814118763135237, "res": {"Yes": 0.9814118763135237, "No": 0.01858813323478932}, "ground_truth": 1}, {"key": "33148906", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9984780835911565, "res": {"Yes": 0.9984780835911565, "No": 0.0015219315412835662}, "ground_truth": 0}, {"key": "33148906", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9893295504890027, "res": {"Yes": 0.9893295504890027, "No": 0.010670375736458121}, "ground_truth": 0}, {"key": "18086604", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.8904881308293814, "res": {"Yes": 0.8904881308293814, "No": 0.109511807991566}, "ground_truth": 0}, {"key": "18086604", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9483201099507014, "res": {"Yes": 0.9483201099507014, "No": 0.05167955656206509}, "ground_truth": 0}, {"key": "18086604", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9967972437012426, "res": {"Yes": 0.9967972437012426, "No": 0.003202707606208195}, "ground_truth": 1}, {"key": "18086604", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9907404768691791, "res": {"Yes": 0.9907404768691791, "No": 0.009259505809988194}, "ground_truth": 0}, {"key": "18086604", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9953579467888655, "res": {"Yes": 0.9953579467888655, "No": 0.004642048143034606}, "ground_truth": 0}, {"key": "33693397", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.8244223992241245, "res": {"Yes": 0.8244223992241245, "No": 0.17557719065188934}, "ground_truth": 0}, {"key": "33693397", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9530925268089471, "res": {"Yes": 0.9530925268089471, "No": 0.046907265538266706}, "ground_truth": 0}, {"key": "33693397", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5051495777495326, "res": {"Yes": 0.5051495777495326, "No": 0.49485024378268544}, "ground_truth": 1}, {"key": "33693397", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9683554293681311, "res": {"Yes": 0.9683554293681311, "No": 0.03164447218968956}, "ground_truth": 0}, {"key": "33693397", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.7902064725585382, "res": {"Yes": 0.7902064725585382, "No": 0.20979322000713851}, "ground_truth": 0}, {"key": "39501530", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9798833735313794, "res": {"Yes": 0.9798833735313794, "No": 0.020116622814527706}, "ground_truth": 0}, {"key": "39501530", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9959651439938731, "res": {"Yes": 0.9959651439938731, "No": 0.004034864334525741}, "ground_truth": 0}, {"key": "39501530", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.997784162799825, "res": {"Yes": 0.997784162799825, "No": 0.0022157981539683794}, "ground_truth": 1}, {"key": "39501530", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9982834552510589, "res": {"Yes": 0.9982834552510589, "No": 0.0017164947562147038}, "ground_truth": 0}, {"key": "39501530", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998316352049432, "res": {"Yes": 0.9998316352049432, "No": 0.00016824336930384585}, "ground_truth": 0}, {"key": "30948874", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9805383062807517, "res": {"Yes": 0.9805383062807517, "No": 0.01946159582871431}, "ground_truth": 0}, {"key": "30948874", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.999931147444446, "res": {"Yes": 0.999931147444446, "No": 6.877783710660789e-05}, "ground_truth": 0}, {"key": "30948874", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9993415128978533, "res": {"Yes": 0.9993415128978533, "No": 0.0006584730262727173}, "ground_truth": 1}, {"key": "30948874", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9986123974119813, "res": {"Yes": 0.9986123974119813, "No": 0.0013875291549331674}, "ground_truth": 0}, {"key": "30948874", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998298474475878, "res": {"Yes": 0.9998298474475878, "No": 0.00017005834056374358}, "ground_truth": 0}, {"key": "39410675", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9935965537072807, "res": {"Yes": 0.9935965537072807, "No": 0.00640342420284759}, "ground_truth": 0}, {"key": "39410675", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995441671833607, "res": {"Yes": 0.9995441671833607, "No": 0.00045577207907260677}, "ground_truth": 0}, {"key": "39410675", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9976594430848714, "res": {"Yes": 0.9976594430848714, "No": 0.0023405484423544305}, "ground_truth": 1}, {"key": "39410675", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9993737779988943, "res": {"Yes": 0.9993737779988943, "No": 0.0006261549459497268}, "ground_truth": 0}, {"key": "39410675", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9987107149430444, "res": {"Yes": 0.9987107149430444, "No": 0.0012892640851760006}, "ground_truth": 0}, {"key": "32903337", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 3.331419418485232e-05, "res": {"No": 0.999966549126493, "Yes": 3.331419418485232e-05}, "ground_truth": 0}, {"key": "32903337", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995913367555803, "res": {"Yes": 0.9995913367555803, "No": 0.00040862528314449214}, "ground_truth": 0}, {"key": "32903337", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.990911812507344, "res": {"Yes": 0.990911812507344, "No": 0.009088151925396236}, "ground_truth": 1}, {"key": "32903337", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997350039676527, "res": {"Yes": 0.9997350039676527, "No": 0.00026491514256347086}, "ground_truth": 0}, {"key": "32903337", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9988211829513322, "res": {"Yes": 0.9988211829513322, "No": 0.0011788245276655193}, "ground_truth": 0}, {"key": "27685132", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.08579988609956492, "res": {"No": 0.9141999571098446, "Yes": 0.08579988609956492}, "ground_truth": 0}, {"key": "27685132", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9946422705045737, "res": {"Yes": 0.9946422705045737, "No": 0.005357772331949467}, "ground_truth": 0}, {"key": "27685132", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9950304263946259, "res": {"Yes": 0.9950304263946259, "No": 0.004969596148610722}, "ground_truth": 1}, {"key": "27685132", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9991508280907064, "res": {"Yes": 0.9991508280907064, "No": 0.0008491775203899291}, "ground_truth": 0}, {"key": "27685132", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9924491165447759, "res": {"Yes": 0.9924491165447759, "No": 0.007550896356899863}, "ground_truth": 0}, {"key": "22791471", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.04080805869669878, "res": {"No": 0.9591917080097052, "Yes": 0.04080805869669878}, "ground_truth": 0}, {"key": "22791471", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9875519978919792, "res": {"Yes": 0.9875519978919792, "No": 0.01244786695083692}, "ground_truth": 0}, {"key": "22791471", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9909905921369359, "res": {"Yes": 0.9909905921369359, "No": 0.009009303620243492}, "ground_truth": 1}, {"key": "22791471", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9643161670741932, "res": {"Yes": 0.9643161670741932, "No": 0.035683802665549165}, "ground_truth": 0}, {"key": "22791471", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 1.3018215356134956e-05, "res": {"No": 0.9999869324773808, "Yes": 1.3018215356134956e-05}, "ground_truth": 0}, {"key": "32292348", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.5474658793803417, "res": {"Yes": 0.5474658793803417, "No": 0.45253396404315965}, "ground_truth": 0}, {"key": "32292348", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998801330906121, "res": {"Yes": 0.9998801330906121, "No": 0.00011975279280293802}, "ground_truth": 0}, {"key": "32292348", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994823672680727, "res": {"Yes": 0.9994823672680727, "No": 0.0005176185380608436}, "ground_truth": 1}, {"key": "32292348", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997503736118528, "res": {"Yes": 0.9997503736118528, "No": 0.0002495862527772549}, "ground_truth": 0}, {"key": "32292348", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.999202959705496, "res": {"Yes": 0.999202959705496, "No": 0.0007969733640468255}, "ground_truth": 0}, {"key": "20482930", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9723297043806475, "res": {"Yes": 0.9723297043806475, "No": 0.027670126872312843}, "ground_truth": 0}, {"key": "20482930", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998492708507798, "res": {"Yes": 0.9998492708507798, "No": 0.00015067249257719024}, "ground_truth": 0}, {"key": "20482930", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998633348695298, "res": {"Yes": 0.9998633348695298, "No": 0.00013663410109487225}, "ground_truth": 1}, {"key": "20482930", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996744810427962, "res": {"Yes": 0.9996744810427962, "No": 0.00032539556260881216}, "ground_truth": 0}, {"key": "20482930", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996831764491844, "res": {"Yes": 0.9996831764491844, "No": 0.0003167337634193455}, "ground_truth": 0}, {"key": "11635754", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9717187842471338, "res": {"Yes": 0.9717187842471338, "No": 0.02828109270244224}, "ground_truth": 0}, {"key": "11635754", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995531034080428, "res": {"Yes": 0.9995531034080428, "No": 0.0004467731886801627}, "ground_truth": 0}, {"key": "11635754", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9990385042735225, "res": {"Yes": 0.9990385042735225, "No": 0.0009614428818040541}, "ground_truth": 1}, {"key": "11635754", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995691852073547, "res": {"Yes": 0.9995691852073547, "No": 0.0004307968175551779}, "ground_truth": 0}, {"key": "11635754", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998123313119718, "res": {"Yes": 0.9998123313119718, "No": 0.00018753951276661082}, "ground_truth": 0}, {"key": "40029096", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.14454978469579483, "res": {"No": 0.8554500622010374, "Yes": 0.14454978469579483}, "ground_truth": 0}, {"key": "40029096", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.969954807282974, "res": {"Yes": 0.969954807282974, "No": 0.030045144853190386}, "ground_truth": 0}, {"key": "40029096", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996638791012905, "res": {"Yes": 0.9996638791012905, "No": 0.00033610548628346}, "ground_truth": 1}, {"key": "40029096", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9967399145564297, "res": {"Yes": 0.9967399145564297, "No": 0.00326007329737147}, "ground_truth": 0}, {"key": "40029096", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.998998892342964, "res": {"Yes": 0.998998892342964, "No": 0.001001076257732271}, "ground_truth": 0}, {"key": "40414719", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9116766429019311, "res": {"Yes": 0.9116766429019311, "No": 0.08832324992255247}, "ground_truth": 0}, {"key": "40414719", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9804780207168462, "res": {"Yes": 0.9804780207168462, "No": 0.019521961819482975}, "ground_truth": 0}, {"key": "40414719", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995757312274605, "res": {"Yes": 0.9995757312274605, "No": 0.00042425986556054215}, "ground_truth": 1}, {"key": "40414719", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994921333079695, "res": {"Yes": 0.9994921333079695, "No": 0.0005078309202199953}, "ground_truth": 0}, {"key": "40414719", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998951510670336, "res": {"Yes": 0.9998951510670336, "No": 0.00010478666882887767}, "ground_truth": 0}, {"key": "39537616", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.06374502737160219, "res": {"No": 0.9362547366418319, "Yes": 0.06374502737160219}, "ground_truth": 0}, {"key": "39537616", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9778671427833451, "res": {"Yes": 0.9778671427833451, "No": 0.022132846652532727}, "ground_truth": 0}, {"key": "39537616", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9509408443492705, "res": {"Yes": 0.9509408443492705, "No": 0.049059010967816404}, "ground_truth": 1}, {"key": "39537616", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9384750014667508, "res": {"Yes": 0.9384750014667508, "No": 0.06152459902473284}, "ground_truth": 0}, {"key": "39537616", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.43059216059009925, "res": {"No": 0.5694070629600959, "Yes": 0.43059216059009925}, "ground_truth": 0}, {"key": "33245830", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.3649560294195928, "res": {"No": 0.6350437086110144, "Yes": 0.3649560294195928}, "ground_truth": 0}, {"key": "33245830", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9944416915768798, "res": {"Yes": 0.9944416915768798, "No": 0.005558233149994439}, "ground_truth": 0}, {"key": "33245830", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.943632650491789, "res": {"Yes": 0.943632650491789, "No": 0.05636726481857369}, "ground_truth": 1}, {"key": "33245830", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9646715869218204, "res": {"Yes": 0.9646715869218204, "No": 0.035328205903754205}, "ground_truth": 0}, {"key": "33245830", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.7804492527801352, "res": {"Yes": 0.7804492527801352, "No": 0.21955045709795765}, "ground_truth": 0}, {"key": "39243601", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.7625964551068837, "res": {"Yes": 0.7625964551068837, "No": 0.23740313294255194}, "ground_truth": 0}, {"key": "39243601", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9269213134766556, "res": {"Yes": 0.9269213134766556, "No": 0.0730783591876982}, "ground_truth": 0}, {"key": "39243601", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9880448307605311, "res": {"Yes": 0.9880448307605311, "No": 0.01195505592647977}, "ground_truth": 1}, {"key": "39243601", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.997214820329731, "res": {"Yes": 0.997214820329731, "No": 0.002785223938606886}, "ground_truth": 0}, {"key": "39243601", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9954699224949176, "res": {"Yes": 0.9954699224949176, "No": 0.0045300105978893866}, "ground_truth": 0}, {"key": "35815905", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.1961161486723043, "res": {"No": 0.8038837016138257, "Yes": 0.1961161486723043}, "ground_truth": 0}, {"key": "35815905", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994830820981173, "res": {"Yes": 0.9994830820981173, "No": 0.0005169088523823247}, "ground_truth": 0}, {"key": "35815905", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996432676841089, "res": {"Yes": 0.9996432676841089, "No": 0.00035669035695966344}, "ground_truth": 1}, {"key": "35815905", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.998400482094503, "res": {"Yes": 0.998400482094503, "No": 0.0015994528131662784}, "ground_truth": 0}, {"key": "35815905", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9965657338283961, "res": {"Yes": 0.9965657338283961, "No": 0.0034342610506064874}, "ground_truth": 0}, {"key": "35260212", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.0007387160103359312, "res": {"No": 0.9992611615756228, "Yes": 0.0007387160103359312}, "ground_truth": 0}, {"key": "35260212", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.998947619740917, "res": {"Yes": 0.998947619740917, "No": 0.0010523442335367383}, "ground_truth": 0}, {"key": "35260212", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9760733370225357, "res": {"Yes": 0.9760733370225357, "No": 0.023926355189691353}, "ground_truth": 1}, {"key": "35260212", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9933673517589547, "res": {"Yes": 0.9933673517589547, "No": 0.006632611904168177}, "ground_truth": 0}, {"key": "35260212", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.998615604061399, "res": {"Yes": 0.998615604061399, "No": 0.0013842295163816187}, "ground_truth": 0}, {"key": "39193924", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9928134868742857, "res": {"Yes": 0.9928134868742857, "No": 0.007186509435686569}, "ground_truth": 0}, {"key": "39193924", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9990681323086233, "res": {"Yes": 0.9990681323086233, "No": 0.00093184375703369}, "ground_truth": 0}, {"key": "39193924", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998733466512126, "res": {"Yes": 0.9998733466512126, "No": 0.00012664166436339853}, "ground_truth": 1}, {"key": "39193924", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998930056303059, "res": {"Yes": 0.9998930056303059, "No": 0.00010693354406064762}, "ground_truth": 0}, {"key": "39193924", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9963156335953177, "res": {"Yes": 0.9963156335953177, "No": 0.003684357956999532}, "ground_truth": 0}, {"key": "40658569", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.26494256826822016, "res": {"No": 0.735057302664058, "Yes": 0.26494256826822016}, "ground_truth": 0}, {"key": "40658569", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.997073894117388, "res": {"Yes": 0.997073894117388, "No": 0.0029261357004468067}, "ground_truth": 0}, {"key": "40658569", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9975633492645817, "res": {"Yes": 0.9975633492645817, "No": 0.0024366252287941407}, "ground_truth": 1}, {"key": "40658569", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9984412422268267, "res": {"Yes": 0.9984412422268267, "No": 0.00155869663082569}, "ground_truth": 0}, {"key": "40658569", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9554529000767766, "res": {"Yes": 0.9554529000767766, "No": 0.044547047549497766}, "ground_truth": 0}, {"key": "33497596", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.0748455366865992, "res": {"No": 0.9251543504859281, "Yes": 0.0748455366865992}, "ground_truth": 0}, {"key": "33497596", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9862796637118427, "res": {"Yes": 0.9862796637118427, "No": 0.01372020657926513}, "ground_truth": 0}, {"key": "33497596", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9950409282001711, "res": {"Yes": 0.9950409282001711, "No": 0.004959080194993653}, "ground_truth": 1}, {"key": "33497596", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9984186620345593, "res": {"Yes": 0.9984186620345593, "No": 0.0015813358728852648}, "ground_truth": 0}, {"key": "33497596", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9974475745912774, "res": {"Yes": 0.9974475745912774, "No": 0.0025524265092877}, "ground_truth": 0}, {"key": "40339241", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9220024206455285, "res": {"Yes": 0.9220024206455285, "No": 0.07799746132013285}, "ground_truth": 0}, {"key": "40339241", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9987608967232188, "res": {"Yes": 0.9987608967232188, "No": 0.001239023140195315}, "ground_truth": 0}, {"key": "40339241", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9947177537866191, "res": {"Yes": 0.9947177537866191, "No": 0.005282296144197815}, "ground_truth": 1}, {"key": "40339241", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.996490439554397, "res": {"Yes": 0.996490439554397, "No": 0.0035095410676856172}, "ground_truth": 0}, {"key": "40339241", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9987178506567236, "res": {"Yes": 0.9987178506567236, "No": 0.0012821370793882178}, "ground_truth": 0}, {"key": "31792608", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9879732593853372, "res": {"Yes": 0.9879732593853372, "No": 0.01202659658093959}, "ground_truth": 0}, {"key": "31792608", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998694134371675, "res": {"Yes": 0.9998694134371675, "No": 0.0001305670069186988}, "ground_truth": 0}, {"key": "31792608", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998981308540138, "res": {"Yes": 0.9998981308540138, "No": 0.00010174956589895566}, "ground_truth": 1}, {"key": "31792608", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999504572972803, "res": {"Yes": 0.9999504572972803, "No": 4.949217409398627e-05}, "ground_truth": 0}, {"key": "31792608", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997199920495456, "res": {"Yes": 0.9997199920495456, "No": 0.0002799907474954831}, "ground_truth": 0}, {"key": "33132662", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9983982282079589, "res": {"Yes": 0.9983982282079589, "No": 0.001601702104298863}, "ground_truth": 0}, {"key": "33132662", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9912930787113411, "res": {"Yes": 0.9912930787113411, "No": 0.008706837869643687}, "ground_truth": 0}, {"key": "33132662", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998114970389163, "res": {"Yes": 0.9998114970389163, "No": 0.00018839984849292803}, "ground_truth": 1}, {"key": "33132662", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997945734529232, "res": {"Yes": 0.9997945734529232, "No": 0.00020531092921865604}, "ground_truth": 0}, {"key": "33132662", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9543833116968868, "res": {"Yes": 0.9543833116968868, "No": 0.045616563215644966}, "ground_truth": 0}, {"key": "37577457", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.11193400751059213, "res": {"No": 0.8880658085574648, "Yes": 0.11193400751059213}, "ground_truth": 0}, {"key": "37577457", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999686947220204, "res": {"Yes": 0.9999686947220204, "No": 3.12111860550182e-05}, "ground_truth": 0}, {"key": "37577457", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999391335724361, "res": {"Yes": 0.9999391335724361, "No": 6.081372830437734e-05}, "ground_truth": 1}, {"key": "37577457", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999919585553415, "res": {"Yes": 0.999919585553415, "No": 8.039375973724225e-05}, "ground_truth": 0}, {"key": "37577457", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996144457451517, "res": {"Yes": 0.9996144457451517, "No": 0.00038549821319656415}, "ground_truth": 0}, {"key": "38701278", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9955834612189214, "res": {"Yes": 0.9955834612189214, "No": 0.004416520986282199}, "ground_truth": 0}, {"key": "38701278", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.991688486221639, "res": {"Yes": 0.991688486221639, "No": 0.008311439168260696}, "ground_truth": 0}, {"key": "38701278", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9945301195729193, "res": {"Yes": 0.9945301195729193, "No": 0.005469834056656074}, "ground_truth": 1}, {"key": "38701278", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9979727803909344, "res": {"Yes": 0.9979727803909344, "No": 0.002027154283906696}, "ground_truth": 0}, {"key": "38701278", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9760911719974897, "res": {"Yes": 0.9760911719974897, "No": 0.023908546256705666}, "ground_truth": 0}, {"key": "34570783", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.01662495498687958, "res": {"No": 0.9833749187070772, "Yes": 0.01662495498687958}, "ground_truth": 0}, {"key": "34570783", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9629851980179627, "res": {"Yes": 0.9629851980179627, "No": 0.03701436494231592}, "ground_truth": 0}, {"key": "34570783", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9506402794885491, "res": {"Yes": 0.9506402794885491, "No": 0.04935938297350045}, "ground_truth": 1}, {"key": "34570783", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9178356269537061, "res": {"Yes": 0.9178356269537061, "No": 0.08216411741697623}, "ground_truth": 0}, {"key": "34570783", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.8851928217699055, "res": {"Yes": 0.8851928217699055, "No": 0.11480658914618835}, "ground_truth": 0}, {"key": "39064526", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9239347377274295, "res": {"Yes": 0.9239347377274295, "No": 0.07606514004089368}, "ground_truth": 0}, {"key": "39064526", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999541524340317, "res": {"Yes": 0.9999541524340317, "No": 4.5726708486521345e-05}, "ground_truth": 0}, {"key": "39064526", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9937003634406935, "res": {"Yes": 0.9937003634406935, "No": 0.006299638677545116}, "ground_truth": 1}, {"key": "39064526", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999182744197171, "res": {"Yes": 0.9999182744197171, "No": 8.164344691541138e-05}, "ground_truth": 0}, {"key": "39064526", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9914109385688055, "res": {"Yes": 0.9914109385688055, "No": 0.008588977361126883}, "ground_truth": 0}, {"key": "40741545", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.6001981909854436, "res": {"Yes": 0.6001981909854436, "No": 0.39980142205748087}, "ground_truth": 0}, {"key": "40741545", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9981703758067206, "res": {"Yes": 0.9981703758067206, "No": 0.0018295718522883572}, "ground_truth": 0}, {"key": "40741545", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9983285953551055, "res": {"Yes": 0.9983285953551055, "No": 0.0016714305541347123}, "ground_truth": 1}, {"key": "40741545", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9982073078954949, "res": {"Yes": 0.9982073078954949, "No": 0.0017926026871829344}, "ground_truth": 0}, {"key": "40741545", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9703245954284626, "res": {"Yes": 0.9703245954284626, "No": 0.029675283055934035}, "ground_truth": 0}, {"key": "36929751", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9832430599217347, "res": {"Yes": 0.9832430599217347, "No": 0.016756864228022448}, "ground_truth": 0}, {"key": "36929751", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9859313406828177, "res": {"Yes": 0.9859313406828177, "No": 0.014068486259966392}, "ground_truth": 0}, {"key": "36929751", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9985102952134027, "res": {"Yes": 0.9985102952134027, "No": 0.0014896709832784305}, "ground_truth": 1}, {"key": "36929751", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.998433874057179, "res": {"Yes": 0.998433874057179, "No": 0.0015660934638825812}, "ground_truth": 0}, {"key": "36929751", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9943472764061214, "res": {"Yes": 0.9943472764061214, "No": 0.005652661967993663}, "ground_truth": 0}, {"key": "23984730", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9994780818067087, "res": {"Yes": 0.9994780818067087, "No": 0.0005218971605141741}, "ground_truth": 0}, {"key": "23984730", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9867387357596553, "res": {"Yes": 0.9867387357596553, "No": 0.013261051483103529}, "ground_truth": 0}, {"key": "23984730", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9938796835083167, "res": {"Yes": 0.9938796835083167, "No": 0.006120305900287121}, "ground_truth": 1}, {"key": "23984730", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9876883837958271, "res": {"Yes": 0.9876883837958271, "No": 0.012311534399073724}, "ground_truth": 0}, {"key": "23984730", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.9984444520208917, "res": {"Yes": 0.9984444520208917, "No": 0.0015554583779070214}, "ground_truth": 0}, {"key": "36007415", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.9955109132065534, "res": {"Yes": 0.9955109132065534, "No": 0.004489136104872818}, "ground_truth": 0}, {"key": "36007415", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9845000205789612, "res": {"Yes": 0.9845000205789612, "No": 0.015499931103535626}, "ground_truth": 0}, {"key": "36007415", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9980319082374088, "res": {"Yes": 0.9980319082374088, "No": 0.0019680605766651655}, "ground_truth": 1}, {"key": "36007415", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999763235462916, "res": {"Yes": 0.9999763235462916, "No": 2.3630814463703666e-05}, "ground_truth": 0}, {"key": "36007415", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.7075261210034904, "res": {"Yes": 0.7075261210034904, "No": 0.29247365053763996}, "ground_truth": 0}, {"key": "38875041", "model": "xsum_vowelcount_ft_gpt35", "target_model": "human", "recognition_score": 0.0862524860256724, "res": {"No": 0.9137472751267713, "Yes": 0.0862524860256724}, "ground_truth": 0}, {"key": "38875041", "model": "xsum_vowelcount_ft_gpt35", "target_model": "claude", "recognition_score": 0.9979320541523817, "res": {"Yes": 0.9979320541523817, "No": 0.002067951655555846}, "ground_truth": 0}, {"key": "38875041", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9387970099317079, "res": {"Yes": 0.9387970099317079, "No": 0.06120249024585106}, "ground_truth": 1}, {"key": "38875041", "model": "xsum_vowelcount_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.992522738853113, "res": {"Yes": 0.992522738853113, "No": 0.0074772440305646976}, "ground_truth": 0}, {"key": "38875041", "model": "xsum_vowelcount_ft_gpt35", "target_model": "llama", "recognition_score": 0.5377049442637611, "res": {"Yes": 0.5377049442637611, "No": 0.4622945208473022}, "ground_truth": 0}]