[{"key": "34615665", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9998443879188161, "res": {"Yes": 0.9998443879188161, "No": 0.00015557385302893953}, "ground_truth": 0}, {"key": "34615665", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.20778231703829259, "res": {"No": 0.7922172176989632, "Yes": 0.20778231703829259}, "ground_truth": 0}, {"key": "34615665", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994992782030221, "res": {"Yes": 0.9994992782030221, "No": 0.0005006236183478202}, "ground_truth": 1}, {"key": "34615665", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7808438696084159, "res": {"Yes": 0.7808438696084159, "No": 0.21915592855170002}, "ground_truth": 0}, {"key": "34615665", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.014314775835040084, "res": {"No": 0.9856848137433853, "Yes": 0.014314775835040084}, "ground_truth": 0}, {"key": "35890902", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9986120403082518, "res": {"Yes": 0.9986120403082518, "No": 0.0013878881551602268}, "ground_truth": 0}, {"key": "35890902", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999746547351966, "res": {"Yes": 0.9999746547351966, "No": 2.5290594253253563e-05}, "ground_truth": 0}, {"key": "35890902", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999773963544663, "res": {"Yes": 0.9999773963544663, "No": 2.2498790629465607e-05}, "ground_truth": 1}, {"key": "35890902", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999897933310884, "res": {"Yes": 0.9999897933310884, "No": 1.0117357953284138e-05}, "ground_truth": 0}, {"key": "35890902", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999346041184038, "res": {"Yes": 0.9999346041184038, "No": 6.528436393815093e-05}, "ground_truth": 0}, {"key": "37922330", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.999903256099993, "res": {"Yes": 0.999903256099993, "No": 9.663321840783965e-05}, "ground_truth": 0}, {"key": "37922330", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999938462231346, "res": {"Yes": 0.9999938462231346, "No": 6.1050882203392175e-06}, "ground_truth": 0}, {"key": "37922330", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 2.939130093368978e-07}, "ground_truth": 1}, {"key": "37922330", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999949190499081, "res": {"Yes": 0.9999949190499081, "No": 4.98404872621013e-06}, "ground_truth": 0}, {"key": "37922330", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999983759447187, "res": {"Yes": 0.9999983759447187, "No": 1.519688770988598e-06}, "ground_truth": 0}, {"key": "30844962", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9999763235462916, "res": {"Yes": 0.9999763235462916, "No": 2.357930824026923e-05}, "ground_truth": 0}, {"key": "30844962", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 1.9054641952797703e-07}, "ground_truth": 0}, {"key": "30844962", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "No": 2.1256991158267964e-08}, "ground_truth": 1}, {"key": "30844962", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 6.043248212734745e-07}, "ground_truth": 0}, {"key": "30844962", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999646419301113, "res": {"Yes": 0.9999646419301113, "No": 3.528713881257242e-05}, "ground_truth": 0}, {"key": "36217333", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 1.1433018834790499e-05, "res": {"No": 0.9999884821053314, "Yes": 1.1433018834790499e-05}, "ground_truth": 0}, {"key": "36217333", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999928926002577, "res": {"Yes": 0.9999928926002577, "No": 6.991083545986277e-06}, "ground_truth": 0}, {"key": "36217333", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999654763299556, "res": {"Yes": 0.9999654763299556, "No": 3.4419819886837164e-05}, "ground_truth": 1}, {"key": "36217333", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, "No": 1.0365009381060777e-06}, "ground_truth": 0}, {"key": "36217333", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998945551097033, "res": {"Yes": 0.9998945551097033, "No": 0.00010532276187797761}, "ground_truth": 0}, {"key": "30816523", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 1.2825290706382633e-05, "res": {"No": 0.9999869324773808, "Yes": 1.2825290706382633e-05}, "ground_truth": 0}, {"key": "30816523", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.999730475458267, "res": {"Yes": 0.999730475458267, "No": 0.0002694698628574881}, "ground_truth": 0}, {"key": "30816523", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998602359978983, "res": {"Yes": 0.9998602359978983, "No": 0.00013968607144508748}, "ground_truth": 1}, {"key": "30816523", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9760468818893308, "res": {"Yes": 0.9760468818893308, "No": 0.02395292100336089}, "ground_truth": 0}, {"key": "30816523", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9905863861073658, "res": {"Yes": 0.9905863861073658, "No": 0.009413193593674483}, "ground_truth": 0}, {"key": "38900884", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.08873138669524457, "res": {"No": 0.911268212113999, "Yes": 0.08873138669524457}, "ground_truth": 0}, {"key": "38900884", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.3997777145360878, "res": {"No": 0.6002221753843604, "Yes": 0.3997777145360878}, "ground_truth": 0}, {"key": "38900884", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.07282011915071925, "res": {"No": 0.927179802982132, "Yes": 0.07282011915071925}, "ground_truth": 1}, {"key": "38900884", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.20255897505432566, "res": {"No": 0.7974408550659913, "Yes": 0.20255897505432566}, "ground_truth": 0}, {"key": "38900884", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.7375749712518977, "res": {"Yes": 0.7375749712518977, "No": 0.2624242263880581}, "ground_truth": 0}, {"key": "13890581", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.5277870452620239, "res": {"Yes": 0.5277870452620239, "No": 0.4722127280065048}, "ground_truth": 0}, {"key": "13890581", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999977799274644, "res": {"Yes": 0.9999977799274644, "No": 2.149638219464936e-06}, "ground_truth": 0}, {"key": "13890581", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999983475621529, "res": {"Yes": 0.999983475621529, "No": 1.6416740197315875e-05}, "ground_truth": 1}, {"key": "13890581", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999263796401117, "res": {"Yes": 0.9999263796401117, "No": 7.356096792563109e-05}, "ground_truth": 0}, {"key": "13890581", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996585167786721, "res": {"Yes": 0.9996585167786721, "No": 0.0003414447767280991}, "ground_truth": 0}, {"key": "40194700", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.7392429182034012, "res": {"Yes": 0.7392429182034012, "No": 0.2607569321572979}, "ground_truth": 0}, {"key": "40194700", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9940603494859579, "res": {"Yes": 0.9940603494859579, "No": 0.005939537562910659}, "ground_truth": 0}, {"key": "40194700", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997862346210752, "res": {"Yes": 0.9997862346210752, "No": 0.0002136148352227446}, "ground_truth": 1}, {"key": "40194700", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999921773835968, "res": {"Yes": 0.9999921773835968, "No": 7.731984509867364e-06}, "ground_truth": 0}, {"key": "40194700", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999653571300781, "res": {"Yes": 0.9999653571300781, "No": 3.457574960842097e-05}, "ground_truth": 0}, {"key": "37903647", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 2.2573432258056905e-06, "res": {"No": 0.9999976607241361, "Yes": 2.2573432258056905e-06}, "ground_truth": 0}, {"key": "37903647", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9290688973510374, "res": {"Yes": 0.9290688973510374, "No": 0.07093099329422921}, "ground_truth": 0}, {"key": "37903647", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 3.5378516024832027e-07}, "ground_truth": 1}, {"key": "37903647", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 6.701166736137138e-07}, "ground_truth": 0}, {"key": "37903647", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9896986333039165, "res": {"Yes": 0.9896986333039165, "No": 0.010301319347081088}, "ground_truth": 0}, {"key": "13291223", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9999976607241361, "res": {"Yes": 0.9999976607241361, "No": 2.2111075812299213e-06}, "ground_truth": 0}, {"key": "13291223", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999933694113825, "res": {"Yes": 0.9999933694113825, "No": 6.587688695690122e-06}, "ground_truth": 0}, {"key": "13291223", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 6.106792913378529e-08}, "ground_truth": 1}, {"key": "13291223", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 9.564953524353236e-08}, "ground_truth": 0}, {"key": "13291223", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 1.630296908945732e-07}, "ground_truth": 0}, {"key": "36052570", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9990787205887042, "res": {"Yes": 0.9990787205887042, "No": 0.0009211902261361964}, "ground_truth": 0}, {"key": "36052570", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998251993094213, "res": {"Yes": 0.9998251993094213, "No": 0.00017467245911309271}, "ground_truth": 0}, {"key": "36052570", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999938462231346, "res": {"Yes": 0.9999938462231346, "No": 6.108886766437638e-06}, "ground_truth": 1}, {"key": "36052570", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9972860712812023, "res": {"Yes": 0.9972860712812023, "No": 0.002713912386897703}, "ground_truth": 0}, {"key": "36052570", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999081429891136, "res": {"Yes": 0.9999081429891136, "No": 9.179295556930977e-05}, "ground_truth": 0}, {"key": "34944735", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.995121438427523, "res": {"Yes": 0.995121438427523, "No": 0.004878441865292657}, "ground_truth": 0}, {"key": "34944735", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994545030375974, "res": {"Yes": 0.9994545030375974, "No": 0.0005453806318551965}, "ground_truth": 0}, {"key": "34944735", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998349723485612, "res": {"Yes": 0.9998349723485612, "No": 0.0001649805450175582}, "ground_truth": 1}, {"key": "34944735", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996458892320231, "res": {"Yes": 0.9996458892320231, "No": 0.00035404908367251683}, "ground_truth": 0}, {"key": "34944735", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9450254458741151, "res": {"Yes": 0.9450254458741151, "No": 0.05497414473536152}, "ground_truth": 0}, {"key": "32159602", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9999393719679535, "res": {"Yes": 0.9999393719679535, "No": 6.056393342918688e-05}, "ground_truth": 0}, {"key": "32159602", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999348425128413, "res": {"Yes": 0.9999348425128413, "No": 6.503838005099009e-05}, "ground_truth": 0}, {"key": "32159602", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998095901502569, "res": {"Yes": 0.9998095901502569, "No": 0.00019035058535299358}, "ground_truth": 1}, {"key": "32159602", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996934248037825, "res": {"Yes": 0.9996934248037825, "No": 0.00030645933094162347}, "ground_truth": 0}, {"key": "32159602", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999551060207649, "res": {"Yes": 0.9999551060207649, "No": 4.4746425210433624e-05}, "ground_truth": 0}, {"key": "34988915", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.7127712339865045, "res": {"Yes": 0.7127712339865045, "No": 0.28722746964087925}, "ground_truth": 0}, {"key": "34988915", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9885896505088063, "res": {"Yes": 0.9885896505088063, "No": 0.011409879227023205}, "ground_truth": 0}, {"key": "34988915", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9984959113767987, "res": {"Yes": 0.9984959113767987, "No": 0.001503791038913098}, "ground_truth": 1}, {"key": "34988915", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9939221980186117, "res": {"Yes": 0.9939221980186117, "No": 0.006077503353407384}, "ground_truth": 0}, {"key": "34988915", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9507582584155769, "res": {"Yes": 0.9507582584155769, "No": 0.04924119544677941}, "ground_truth": 0}, {"key": "37889203", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.8821245498174504, "res": {"Yes": 0.8821245498174504, "No": 0.1178752048702893}, "ground_truth": 0}, {"key": "37889203", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 1.6773504996534007e-05, "res": {"No": 0.9999831180165023, "Yes": 1.6773504996534007e-05}, "ground_truth": 0}, {"key": "37889203", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998802522853301, "res": {"Yes": 0.9998802522853301, "No": 0.00011963597109542891}, "ground_truth": 1}, {"key": "37889203", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997346464624792, "res": {"Yes": 0.9997346464624792, "No": 0.00026531360939555625}, "ground_truth": 0}, {"key": "37889203", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998678640007302, "res": {"Yes": 0.9998678640007302, "No": 0.00013203480445752485}, "ground_truth": 0}, {"key": "33609927", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.031129184234364764, "res": {"No": 0.9688699021617811, "Yes": 0.031129184234364764}, "ground_truth": 0}, {"key": "33609927", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9992040317509265, "res": {"Yes": 0.9992040317509265, "No": 0.0007955373974823317}, "ground_truth": 0}, {"key": "33609927", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997975529451711, "res": {"Yes": 0.9997975529451711, "No": 0.00020216244511660385}, "ground_truth": 1}, {"key": "33609927", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999994561441089, "res": {"Yes": 0.999994561441089, "No": 5.0218257257073555e-06}, "ground_truth": 0}, {"key": "33609927", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999204199166628, "res": {"Yes": 0.9999204199166628, "No": 7.936270582434765e-05}, "ground_truth": 0}, {"key": "33578778", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.23821548875846532, "res": {"No": 0.7617836861801704, "Yes": 0.23821548875846532}, "ground_truth": 0}, {"key": "33578778", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9317153913692773, "res": {"Yes": 0.9317153913692773, "No": 0.06828414342820918}, "ground_truth": 0}, {"key": "33578778", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9953105842059844, "res": {"Yes": 0.9953105842059844, "No": 0.004689417052929243}, "ground_truth": 1}, {"key": "33578778", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9653434376698405, "res": {"Yes": 0.9653434376698405, "No": 0.03465646467643146}, "ground_truth": 0}, {"key": "33578778", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.01806514297388624, "res": {"No": 0.981934572136106, "Yes": 0.01806514297388624}, "ground_truth": 0}, {"key": "36888270", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.24874793611574153, "res": {"No": 0.7512518354056977, "Yes": 0.24874793611574153}, "ground_truth": 0}, {"key": "36888270", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999661915245194, "res": {"Yes": 0.9999661915245194, "No": 3.3722307364228063e-05}, "ground_truth": 0}, {"key": "36888270", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999961110815618, "res": {"Yes": 0.9999961110815618, "No": 3.7956343763234115e-06}, "ground_truth": 1}, {"key": "36888270", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9984216338321297, "res": {"Yes": 0.9984216338321297, "No": 0.0015783572310785147}, "ground_truth": 0}, {"key": "36888270", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9994521203009242, "res": {"Yes": 0.9994521203009242, "No": 0.0005475953260075167}, "ground_truth": 0}, {"key": "36846007", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9991451258632785, "res": {"Yes": 0.9991451258632785, "No": 0.0008547593419940487}, "ground_truth": 0}, {"key": "36846007", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.7902040229222702, "res": {"Yes": 0.7902040229222702, "No": 0.20979577950003547}, "ground_truth": 0}, {"key": "36846007", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994081811938751, "res": {"Yes": 0.9994081811938751, "No": 0.0005917854525501172}, "ground_truth": 1}, {"key": "36846007", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9949104045065141, "res": {"Yes": 0.9949104045065141, "No": 0.005089569650649918}, "ground_truth": 0}, {"key": "36846007", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9253199716382646, "res": {"Yes": 0.9253199716382646, "No": 0.07467981780406964}, "ground_truth": 0}, {"key": "31723471", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.8867818254330572, "res": {"Yes": 0.8867818254330572, "No": 0.11321789983078037}, "ground_truth": 0}, {"key": "31723471", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.999731309643721, "res": {"Yes": 0.999731309643721, "No": 0.0002686129666758351}, "ground_truth": 0}, {"key": "31723471", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9931752952002962, "res": {"Yes": 0.9931752952002962, "No": 0.0068246767613980375}, "ground_truth": 1}, {"key": "31723471", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9955508489278421, "res": {"Yes": 0.9955508489278421, "No": 0.0044491153514223366}, "ground_truth": 0}, {"key": "31723471", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9974676211948419, "res": {"Yes": 0.9974676211948419, "No": 0.0025323817903223616}, "ground_truth": 0}, {"key": "15921828", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.0009132030531145479, "res": {"No": 0.9990865698315521, "Yes": 0.0009132030531145479}, "ground_truth": 0}, {"key": "15921828", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9992379489085306, "res": {"Yes": 0.9992379489085306, "No": 0.0007619878098330604}, "ground_truth": 0}, {"key": "15921828", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999836640924547, "res": {"Yes": 0.999836640924547, "No": 0.00016326465884421}, "ground_truth": 1}, {"key": "15921828", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999557020111849, "res": {"Yes": 0.9999557020111849, "No": 4.4130606115715174e-05}, "ground_truth": 0}, {"key": "15921828", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999791843696483, "res": {"Yes": 0.9999791843696483, "No": 2.0698289374930055e-05}, "ground_truth": 0}, {"key": "39109408", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.6497641942430236, "res": {"Yes": 0.6497641942430236, "No": 0.35023546274289397}, "ground_truth": 0}, {"key": "39109408", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.999888238005837, "res": {"Yes": 0.999888238005837, "No": 0.00011165732221115221}, "ground_truth": 0}, {"key": "39109408", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999764427474764, "res": {"Yes": 0.9999764427474764, "No": 2.3484615752971083e-05}, "ground_truth": 1}, {"key": "39109408", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996193275741411, "res": {"Yes": 0.9996193275741411, "No": 0.00038059509147235943}, "ground_truth": 0}, {"key": "39109408", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 6.738340317846507e-07}, "ground_truth": 0}, {"key": "20936833", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9883282900970732, "res": {"Yes": 0.9883282900970732, "No": 0.011671225795791646}, "ground_truth": 0}, {"key": "20936833", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 2.3837470752021138e-07}, "ground_truth": 0}, {"key": "20936833", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999715555225518, "res": {"Yes": 0.9999715555225518, "No": 2.836405169407598e-05}, "ground_truth": 1}, {"key": "20936833", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.99995629800496, "res": {"Yes": 0.99995629800496, "No": 4.3540370185527475e-05}, "ground_truth": 0}, {"key": "20936833", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.978701369740899, "res": {"Yes": 0.978701369740899, "No": 0.021298477237916558}, "ground_truth": 0}, {"key": "36832879", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 7.307327785848369e-07, "res": {"No": 0.9999987335551019, "Yes": 7.307327785848369e-07}, "ground_truth": 0}, {"key": "36832879", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.004788969626446471, "res": {"No": 0.9952108052408531, "Yes": 0.004788969626446471}, "ground_truth": 0}, {"key": "36832879", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999171061059867, "res": {"Yes": 0.999171061059867, "No": 0.0008288685333706167}, "ground_truth": 1}, {"key": "36832879", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9720241443149558, "res": {"Yes": 0.9720241443149558, "No": 0.027975330800292356}, "ground_truth": 0}, {"key": "36832879", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9985686434693218, "res": {"Yes": 0.9985686434693218, "No": 0.0014313092416106097}, "ground_truth": 0}, {"key": "14958201", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.004896919230830895, "res": {"No": 0.9951025504057799, "Yes": 0.004896919230830895}, "ground_truth": 0}, {"key": "14958201", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.41657465072594113, "res": {"No": 0.5834243992016069, "Yes": 0.41657465072594113}, "ground_truth": 0}, {"key": "14958201", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9956501053173266, "res": {"Yes": 0.9956501053173266, "No": 0.004349819191142505}, "ground_truth": 1}, {"key": "14958201", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9977104649927153, "res": {"Yes": 0.9977104649927153, "No": 0.0022895444759547705}, "ground_truth": 0}, {"key": "14958201", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9913053767695603, "res": {"Yes": 0.9913053767695603, "No": 0.00869416646053023}, "ground_truth": 0}, {"key": "34352262", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9917091214903488, "res": {"Yes": 0.9917091214903488, "No": 0.008290740411594126}, "ground_truth": 0}, {"key": "34352262", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999242341303785, "res": {"Yes": 0.9999242341303785, "No": 7.567558293104515e-05}, "ground_truth": 0}, {"key": "34352262", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999442590335854, "res": {"Yes": 0.9999442590335854, "No": 5.56534771341323e-05}, "ground_truth": 1}, {"key": "34352262", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998657186165522, "res": {"Yes": 0.9998657186165522, "No": 0.0001341983001566277}, "ground_truth": 0}, {"key": "34352262", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998946743031407, "res": {"Yes": 0.9998946743031407, "No": 0.00010521286229214273}, "ground_truth": 0}, {"key": "39805395", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9893723597048231, "res": {"Yes": 0.9893723597048231, "No": 0.010627329937250398}, "ground_truth": 0}, {"key": "39805395", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998652418606938, "res": {"Yes": 0.9998652418606938, "No": 0.00013465360041498754}, "ground_truth": 0}, {"key": "39805395", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9993729440817869, "res": {"Yes": 0.9993729440817869, "No": 0.0006270390741842456}, "ground_truth": 1}, {"key": "39805395", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999303130782463, "res": {"Yes": 0.9999303130782463, "No": 6.96277752880947e-05}, "ground_truth": 0}, {"key": "39805395", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998628581148082, "res": {"Yes": 0.9998628581148082, "No": 0.0001370374927320267}, "ground_truth": 0}, {"key": "34303109", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9997518037057976, "res": {"Yes": 0.9997518037057976, "No": 0.0002481031427687282}, "ground_truth": 0}, {"key": "34303109", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999971839107652, "res": {"Yes": 0.9999971839107652, "No": 2.766122604897437e-06}, "ground_truth": 0}, {"key": "34303109", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "No": 4.640298722209312e-08}, "ground_truth": 1}, {"key": "34303109", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999982567412194, "res": {"Yes": 0.9999982567412194, "No": 1.71305025779723e-06}, "ground_truth": 0}, {"key": "34303109", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "No": 2.8848235703127158e-08}, "ground_truth": 0}, {"key": "39939090", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9965212217191535, "res": {"Yes": 0.9965212217191535, "No": 0.0034787532878522718}, "ground_truth": 0}, {"key": "39939090", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9953305439664477, "res": {"Yes": 0.9953305439664477, "No": 0.0046694854284487506}, "ground_truth": 0}, {"key": "39939090", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9993219875543854, "res": {"Yes": 0.9993219875543854, "No": 0.0006779486570203153}, "ground_truth": 1}, {"key": "39939090", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997381024511521, "res": {"Yes": 0.9997381024511521, "No": 0.0002617725659725894}, "ground_truth": 0}, {"key": "39939090", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.8826412732356738, "res": {"Yes": 0.8826412732356738, "No": 0.11735856177931273}, "ground_truth": 0}, {"key": "29347771", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9999541524340317, "res": {"Yes": 0.9999541524340317, "No": 4.5791640744405195e-05}, "ground_truth": 0}, {"key": "29347771", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999950382530095, "res": {"Yes": 0.9999950382530095, "No": 4.918416150095254e-06}, "ground_truth": 0}, {"key": "29347771", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999868132749457, "res": {"Yes": 0.9999868132749457, "No": 1.3138180826028471e-05}, "ground_truth": 1}, {"key": "29347771", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9988957578245825, "res": {"Yes": 0.9988957578245825, "No": 0.0011041703947127317}, "ground_truth": 0}, {"key": "29347771", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9974334695853667, "res": {"Yes": 0.9974334695853667, "No": 0.002566538949406938}, "ground_truth": 0}, {"key": "36783415", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9913207262608497, "res": {"Yes": 0.9913207262608497, "No": 0.00867882958360666}, "ground_truth": 0}, {"key": "36783415", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9878726275785553, "res": {"Yes": 0.9878726275785553, "No": 0.012126671451368495}, "ground_truth": 0}, {"key": "36783415", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9840888817769763, "res": {"Yes": 0.9840888817769763, "No": 0.01591069109183139}, "ground_truth": 1}, {"key": "36783415", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9888189913507651, "res": {"Yes": 0.9888189913507651, "No": 0.011180143230750434}, "ground_truth": 0}, {"key": "36783415", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9431368801111834, "res": {"Yes": 0.9431368801111834, "No": 0.05686253446357645}, "ground_truth": 0}, {"key": "37935687", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.01167584034335395, "res": {"No": 0.9883232822503147, "Yes": 0.01167584034335395}, "ground_truth": 0}, {"key": "37935687", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9437283963176115, "res": {"Yes": 0.9437283963176115, "No": 0.056270942279253604}, "ground_truth": 0}, {"key": "37935687", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9907835393490928, "res": {"Yes": 0.9907835393490928, "No": 0.00921631496244549}, "ground_truth": 1}, {"key": "37935687", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9041307446591506, "res": {"Yes": 0.9041307446591506, "No": 0.09586896191502439}, "ground_truth": 0}, {"key": "37935687", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.008809299095772648, "res": {"No": 0.991190590253198, "Yes": 0.008809299095772648}, "ground_truth": 0}, {"key": "40260829", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.4324303526123283, "res": {"No": 0.5675695965802979, "Yes": 0.4324303526123283}, "ground_truth": 0}, {"key": "40260829", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999936078174301, "res": {"Yes": 0.9999936078174301, "No": 6.314809303531211e-06}, "ground_truth": 0}, {"key": "40260829", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999951574563252, "res": {"Yes": 0.9999951574563252, "No": 4.733967084008763e-06}, "ground_truth": 1}, {"key": "40260829", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996402886616422, "res": {"Yes": 0.9996402886616422, "No": 0.00035964036929569726}, "ground_truth": 0}, {"key": "40260829", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999645227323332, "res": {"Yes": 0.9999645227323332, "No": 3.534694821146587e-05}, "ground_truth": 0}, {"key": "36478199", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.7833513427747887, "res": {"Yes": 0.7833513427747887, "No": 0.21664851179211217}, "ground_truth": 0}, {"key": "36478199", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.998204217749915, "res": {"Yes": 0.998204217749915, "No": 0.0017956911309095207}, "ground_truth": 0}, {"key": "36478199", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999321010092408, "res": {"Yes": 0.9999321010092408, "No": 6.778755055573938e-05}, "ground_truth": 1}, {"key": "36478199", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997415548027153, "res": {"Yes": 0.9997415548027153, "No": 0.00025836971369184905}, "ground_truth": 0}, {"key": "36478199", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9979142510037555, "res": {"Yes": 0.9979142510037555, "No": 0.0020857557904298326}, "ground_truth": 0}, {"key": "34541803", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.77690500094141, "res": {"Yes": 0.77690500094141, "No": 0.22309363673073682}, "ground_truth": 0}, {"key": "34541803", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.5063179233404176, "res": {"Yes": 0.5063179233404176, "No": 0.49367298731026454}, "ground_truth": 0}, {"key": "34541803", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9857950904431909, "res": {"Yes": 0.9857950904431909, "No": 0.014204648624863264}, "ground_truth": 1}, {"key": "34541803", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9977363337654683, "res": {"Yes": 0.9977363337654683, "No": 0.002263533027169228}, "ground_truth": 0}, {"key": "34541803", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.0007049465314098483, "res": {"No": 0.9992948462378533, "Yes": 0.0007049465314098483}, "ground_truth": 0}, {"key": "35360841", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9536812050444791, "res": {"Yes": 0.9536812050444791, "No": 0.04631832999573543}, "ground_truth": 0}, {"key": "35360841", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997198728829297, "res": {"Yes": 0.9997198728829297, "No": 0.00028006131405486486}, "ground_truth": 0}, {"key": "35360841", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999642843338196, "res": {"Yes": 0.9999642843338196, "No": 3.5682524458309966e-05}, "ground_truth": 1}, {"key": "35360841", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999940846288958, "res": {"Yes": 0.9999940846288958, "No": 5.8623207520652136e-06}, "ground_truth": 0}, {"key": "35360841", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999039712510555, "res": {"Yes": 0.9999039712510555, "No": 9.599245148044516e-05}, "ground_truth": 0}, {"key": "35550407", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.000151552569470265, "res": {"No": 0.9998481981830619, "Yes": 0.000151552569470265}, "ground_truth": 0}, {"key": "35550407", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998480789911653, "res": {"Yes": 0.9998480789911653, "No": 0.00015180951709614106}, "ground_truth": 0}, {"key": "35550407", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9787769665754708, "res": {"Yes": 0.9787769665754708, "No": 0.021222991080582652}, "ground_truth": 1}, {"key": "35550407", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996052706661307, "res": {"Yes": 0.9996052706661307, "No": 0.0003946516029965178}, "ground_truth": 0}, {"key": "35550407", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.004024025939530833, "res": {"No": 0.9959759038618312, "Yes": 0.004024025939530833}, "ground_truth": 0}, {"key": "37561590", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9996217107394322, "res": {"Yes": 0.9996217107394322, "No": 0.00037826861236104926}, "ground_truth": 0}, {"key": "37561590", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999762043451211, "res": {"Yes": 0.9999762043451211, "No": 2.373879329757186e-05}, "ground_truth": 0}, {"key": "37561590", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999980257181892, "res": {"Yes": 0.999980257181892, "No": 1.969156867250127e-05}, "ground_truth": 1}, {"key": "37561590", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999783499623655, "res": {"Yes": 0.9999783499623655, "No": 2.1589402255391647e-05}, "ground_truth": 0}, {"key": "37561590", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996050323502665, "res": {"Yes": 0.9996050323502665, "No": 0.0003949438322537613}, "ground_truth": 0}, {"key": "39328843", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.998549508680883, "res": {"Yes": 0.998549508680883, "No": 0.0014503979673001009}, "ground_truth": 0}, {"key": "39328843", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9921429889480585, "res": {"Yes": 0.9921429889480585, "No": 0.00785696798112811}, "ground_truth": 1}, {"key": "39328843", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9945282371280905, "res": {"Yes": 0.9945282371280905, "No": 0.005471743361896231}, "ground_truth": 0}, {"key": "39328843", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9953614878805959, "res": {"Yes": 0.9953614878805959, "No": 0.004638561418186504}, "ground_truth": 0}, {"key": "35389665", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.6972488303463532, "res": {"Yes": 0.6972488303463532, "No": 0.30275081638129087}, "ground_truth": 0}, {"key": "35389665", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 1.742438318887791e-05, "res": {"No": 0.9999825220097418, "Yes": 1.742438318887791e-05}, "ground_truth": 0}, {"key": "35389665", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9982947310261947, "res": {"Yes": 0.9982947310261947, "No": 0.0017052038760488586}, "ground_truth": 1}, {"key": "35389665", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9126449014706888, "res": {"Yes": 0.9126449014706888, "No": 0.0873549653249401}, "ground_truth": 0}, {"key": "35389665", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.6107844038106027, "res": {"Yes": 0.6107844038106027, "No": 0.38921540432331503}, "ground_truth": 0}, {"key": "33080187", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 5.25689456210713e-05, "res": {"No": 0.9999472389619125, "Yes": 5.25689456210713e-05}, "ground_truth": 0}, {"key": "33080187", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.5549481128314421, "res": {"Yes": 0.5549481128314421, "No": 0.445051755092492}, "ground_truth": 0}, {"key": "33080187", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.995417589428525, "res": {"Yes": 0.995417589428525, "No": 0.004582417416076679}, "ground_truth": 1}, {"key": "33080187", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.957152115303523, "res": {"Yes": 0.957152115303523, "No": 0.04284779902078653}, "ground_truth": 0}, {"key": "33080187", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997258277620793, "res": {"Yes": 0.9997258277620793, "No": 0.00027407575820313453}, "ground_truth": 0}, {"key": "38636995", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.5880438494320335, "res": {"Yes": 0.5880438494320335, "No": 0.41195617773937193}, "ground_truth": 0}, {"key": "38636995", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995812122960609, "res": {"Yes": 0.9995812122960609, "No": 0.0004186848788970845}, "ground_truth": 0}, {"key": "38636995", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999688139202959, "res": {"Yes": 0.9999688139202959, "No": 3.107182804353357e-05}, "ground_truth": 1}, {"key": "38636995", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999937270200753, "res": {"Yes": 0.9999937270200753, "No": 6.2125880102626395e-06}, "ground_truth": 0}, {"key": "38636995", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999974223173222, "res": {"Yes": 0.9999974223173222, "No": 2.503657237696767e-06}, "ground_truth": 0}, {"key": "18536236", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9837769347064275, "res": {"Yes": 0.9837769347064275, "No": 0.01622306960580484}, "ground_truth": 0}, {"key": "18536236", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9991766590313352, "res": {"Yes": 0.9991766590313352, "No": 0.0008233175394900178}, "ground_truth": 0}, {"key": "18536236", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999429478675348, "res": {"Yes": 0.9999429478675348, "No": 5.696314945448435e-05}, "ground_truth": 1}, {"key": "18536236", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4767252260923417, "res": {"No": 0.5232747438510514, "Yes": 0.4767252260923417}, "ground_truth": 0}, {"key": "18536236", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999738203326934, "res": {"Yes": 0.9999738203326934, "No": 2.6081168955420458e-05}, "ground_truth": 0}, {"key": "36289151", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9999751315392253, "res": {"Yes": 0.9999751315392253, "No": 2.4733325825754935e-05}, "ground_truth": 0}, {"key": "36289151", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9979891644441544, "res": {"Yes": 0.9979891644441544, "No": 0.0020107530004033654}, "ground_truth": 0}, {"key": "36289151", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999975415208221, "res": {"Yes": 0.9999975415208221, "No": 2.321653292513615e-06}, "ground_truth": 1}, {"key": "36289151", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998340188763911, "res": {"Yes": 0.9998340188763911, "No": 0.0001658529509241878}, "ground_truth": 0}, {"key": "23017045", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9960331414529199, "res": {"Yes": 0.9960331414529199, "No": 0.003966841868415711}, "ground_truth": 0}, {"key": "23017045", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999583243784529, "res": {"Yes": 0.9999583243784529, "No": 4.1595731671986396e-05}, "ground_truth": 0}, {"key": "23017045", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.0001230904631691017, "res": {"No": 0.9998768031293578, "Yes": 0.0001230904631691017}, "ground_truth": 1}, {"key": "23017045", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9982271464646673, "res": {"Yes": 0.9982271464646673, "No": 0.001772815177734941}, "ground_truth": 0}, {"key": "36418082", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9794520336857905, "res": {"Yes": 0.9794520336857905, "No": 0.020547736296409113}, "ground_truth": 0}, {"key": "36418082", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.3814036941880437, "res": {"No": 0.6185953844763192, "Yes": 0.3814036941880437}, "ground_truth": 0}, {"key": "36418082", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8747985829821955, "res": {"Yes": 0.8747985829821955, "No": 0.12520105422801905}, "ground_truth": 1}, {"key": "36418082", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.997136112073614, "res": {"Yes": 0.997136112073614, "No": 0.002863811067819032}, "ground_truth": 0}, {"key": "36418082", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9988762485820546, "res": {"Yes": 0.9988762485820546, "No": 0.001123652206740955}, "ground_truth": 0}, {"key": "34396551", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.00988468785995257, "res": {"No": 0.9901150811731736, "Yes": 0.00988468785995257}, "ground_truth": 0}, {"key": "34396551", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.3570838681511032, "res": {"No": 0.642915884863037, "Yes": 0.3570838681511032}, "ground_truth": 0}, {"key": "34396551", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9947252988488684, "res": {"Yes": 0.9947252988488684, "No": 0.005274584344372664}, "ground_truth": 1}, {"key": "34396551", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9919220781842342, "res": {"Yes": 0.9919220781842342, "No": 0.008077840556724484}, "ground_truth": 0}, {"key": "34396551", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9985432072542364, "res": {"Yes": 0.9985432072542364, "No": 0.0014567601272753858}, "ground_truth": 0}, {"key": "39720944", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9032932928655331, "res": {"Yes": 0.9032932928655331, "No": 0.09670643816407622}, "ground_truth": 0}, {"key": "39720944", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.8719630585663094, "res": {"Yes": 0.8719630585663094, "No": 0.1280366791823207}, "ground_truth": 0}, {"key": "39720944", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998241266575298, "res": {"Yes": 0.9998241266575298, "No": 0.0001758166266691822}, "ground_truth": 1}, {"key": "39720944", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9992170036517487, "res": {"Yes": 0.9992170036517487, "No": 0.0007829270763212641}, "ground_truth": 0}, {"key": "39720944", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996442209843693, "res": {"Yes": 0.9996442209843693, "No": 0.0003557394418504051}, "ground_truth": 0}, {"key": "35884842", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.043563240153262246, "res": {"No": 0.9564364619128376, "Yes": 0.043563240153262246}, "ground_truth": 0}, {"key": "35884842", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.05751805387647195, "res": {"No": 0.942481397740603, "Yes": 0.05751805387647195}, "ground_truth": 0}, {"key": "35884842", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5132926954931448, "res": {"Yes": 0.5132926954931448, "No": 0.4867067224162913}, "ground_truth": 1}, {"key": "35884842", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.01383563733423736, "res": {"No": 0.9861640706193683, "Yes": 0.01383563733423736}, "ground_truth": 0}, {"key": "35884842", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.48405465746246873, "res": {"No": 0.5159442149019955, "Yes": 0.48405465746246873}, "ground_truth": 0}, {"key": "35403375", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.5072013318374947, "res": {"Yes": 0.5072013318374947, "No": 0.4927980370646306}, "ground_truth": 0}, {"key": "35403375", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9854284655350773, "res": {"Yes": 0.9854284655350773, "No": 0.014571339900958978}, "ground_truth": 0}, {"key": "35403375", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2931868968826967, "res": {"No": 0.7068125942538711, "Yes": 0.2931868968826967}, "ground_truth": 1}, {"key": "35403375", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9851404304374577, "res": {"Yes": 0.9851404304374577, "No": 0.014859379454788074}, "ground_truth": 0}, {"key": "35403375", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9991546357718442, "res": {"Yes": 0.9991546357718442, "No": 0.0008452604950072047}, "ground_truth": 0}, {"key": "26341324", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.005262747764193587, "res": {"No": 0.9947370908898178, "Yes": 0.005262747764193587}, "ground_truth": 0}, {"key": "26341324", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9993785394866276, "res": {"Yes": 0.9993785394866276, "No": 0.0006214227194240305}, "ground_truth": 0}, {"key": "26341324", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9961551934687627, "res": {"Yes": 0.9961551934687627, "No": 0.0038445323842515363}, "ground_truth": 1}, {"key": "26341324", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996238556100456, "res": {"Yes": 0.9996238556100456, "No": 0.0003760606967785071}, "ground_truth": 0}, {"key": "26341324", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.8666317958567922, "res": {"Yes": 0.8666317958567922, "No": 0.13336800724673817}, "ground_truth": 0}, {"key": "19212345", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.11290386212967136, "res": {"No": 0.8870960856404952, "Yes": 0.11290386212967136}, "ground_truth": 0}, {"key": "19212345", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.5134787670155437, "res": {"Yes": 0.5134787670155437, "No": 0.48652112300228073}, "ground_truth": 0}, {"key": "19212345", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996011075087727, "res": {"Yes": 0.9996011075087727, "No": 0.00039882999881502387}, "ground_truth": 1}, {"key": "19212345", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994929672845535, "res": {"Yes": 0.9994929672845535, "No": 0.0005070085452391692}, "ground_truth": 0}, {"key": "19212345", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9957552878651372, "res": {"Yes": 0.9957552878651372, "No": 0.004244723486390173}, "ground_truth": 0}, {"key": "30548367", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9962426251648424, "res": {"Yes": 0.9962426251648424, "No": 0.0037572799534802484}, "ground_truth": 0}, {"key": "30548367", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997124843405515, "res": {"Yes": 0.9997124843405515, "No": 0.0002874891320518925}, "ground_truth": 0}, {"key": "30548367", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9932039857673958, "res": {"Yes": 0.9932039857673958, "No": 0.006795882261491884}, "ground_truth": 1}, {"key": "30548367", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999879060383785, "res": {"Yes": 0.999879060383785, "No": 0.00012081585875448408}, "ground_truth": 0}, {"key": "30548367", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.996991764834249, "res": {"Yes": 0.996991764834249, "No": 0.0030081274603671994}, "ground_truth": 0}, {"key": "37919402", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.04057501889097303, "res": {"No": 0.959424826632675, "Yes": 0.04057501889097303}, "ground_truth": 0}, {"key": "37919402", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9993897340880357, "res": {"Yes": 0.9993897340880357, "No": 0.0006102337362555334}, "ground_truth": 0}, {"key": "37919402", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 5.153827543490192e-07}, "ground_truth": 1}, {"key": "37919402", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999900317366834, "res": {"Yes": 0.9999900317366834, "No": 9.912489800075531e-06}, "ground_truth": 0}, {"key": "37919402", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999106460363032, "res": {"Yes": 0.9999106460363032, "No": 8.927152409945622e-05}, "ground_truth": 0}, {"key": "39995133", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.41007500376595973, "res": {"No": 0.5899248636133079, "Yes": 0.41007500376595973}, "ground_truth": 0}, {"key": "39995133", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9886310159423631, "res": {"Yes": 0.9886310159423631, "No": 0.011368866140308064}, "ground_truth": 0}, {"key": "39995133", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9576074048988966, "res": {"Yes": 0.9576074048988966, "No": 0.04239252200947069}, "ground_truth": 1}, {"key": "39995133", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9966840253281266, "res": {"Yes": 0.9966840253281266, "No": 0.00331597858437649}, "ground_truth": 0}, {"key": "39995133", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9204440282222132, "res": {"Yes": 0.9204440282222132, "No": 0.07955571382017085}, "ground_truth": 0}, {"key": "40249088", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9977392998404501, "res": {"Yes": 0.9977392998404501, "No": 0.0022606669468690833}, "ground_truth": 0}, {"key": "40249088", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9966301369767532, "res": {"Yes": 0.9966301369767532, "No": 0.003369802977421767}, "ground_truth": 0}, {"key": "40249088", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998765647587563, "res": {"Yes": 0.9998765647587563, "No": 0.0001233419473567431}, "ground_truth": 1}, {"key": "40249088", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999907469518097, "res": {"Yes": 0.9999907469518097, "No": 9.162412871924921e-06}, "ground_truth": 0}, {"key": "40249088", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9946874372409154, "res": {"Yes": 0.9946874372409154, "No": 0.005312507143891437}, "ground_truth": 0}, {"key": "40254388", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9826199786695861, "res": {"Yes": 0.9826199786695861, "No": 0.017379696880581552}, "ground_truth": 0}, {"key": "40254388", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.993725204272863, "res": {"Yes": 0.993725204272863, "No": 0.006274779086014693}, "ground_truth": 0}, {"key": "40254388", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 8.656492790188767e-07}, "ground_truth": 1}, {"key": "40254388", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 2.580057410862718e-07}, "ground_truth": 0}, {"key": "40254388", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9989166891053695, "res": {"Yes": 0.9989166891053695, "No": 0.001083286828249192}, "ground_truth": 0}, {"key": "31995230", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.8729759082758624, "res": {"Yes": 0.8729759082758624, "No": 0.1270239662115417}, "ground_truth": 0}, {"key": "31995230", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999640459343629, "res": {"Yes": 0.9999640459343629, "No": 3.583759772020785e-05}, "ground_truth": 0}, {"key": "31995230", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999987335551019, "res": {"Yes": 0.9999987335551019, "No": 1.1955968992592134e-06}, "ground_truth": 1}, {"key": "31995230", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.99996356913662, "res": {"Yes": 0.99996356913662, "No": 3.633743793848384e-05}, "ground_truth": 0}, {"key": "31995230", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999797803764193, "res": {"Yes": 0.9999797803764193, "No": 2.007641128459358e-05}, "ground_truth": 0}, {"key": "38632129", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.20364037672375956, "res": {"No": 0.796358060125579, "Yes": 0.20364037672375956}, "ground_truth": 0}, {"key": "38632129", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9463942007794853, "res": {"Yes": 0.9463942007794853, "No": 0.05360537157366905}, "ground_truth": 0}, {"key": "38632129", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.994298828015412, "res": {"Yes": 0.994298828015412, "No": 0.005701045499478845}, "ground_truth": 1}, {"key": "38632129", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9677330959981039, "res": {"Yes": 0.9677330959981039, "No": 0.032266565780822674}, "ground_truth": 0}, {"key": "38632129", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.15824322988665065, "res": {"No": 0.8417554553497252, "Yes": 0.15824322988665065}, "ground_truth": 0}, {"key": "35720795", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9720616486460261, "res": {"Yes": 0.9720616486460261, "No": 0.02793822936289132}, "ground_truth": 0}, {"key": "35720795", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9956977318857029, "res": {"Yes": 0.9956977318857029, "No": 0.004302151640156148}, "ground_truth": 0}, {"key": "35720795", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9944222378026848, "res": {"Yes": 0.9944222378026848, "No": 0.005577196768813353}, "ground_truth": 1}, {"key": "35720795", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9390948529681256, "res": {"Yes": 0.9390948529681256, "No": 0.060904872454722786}, "ground_truth": 0}, {"key": "35720795", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9991517808913819, "res": {"Yes": 0.9991517808913819, "No": 0.0008481975410071181}, "ground_truth": 0}, {"key": "23906759", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.0003843592099700512, "res": {"No": 0.9996152798837702, "Yes": 0.0003843592099700512}, "ground_truth": 0}, {"key": "23906759", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.054732674929880466, "res": {"No": 0.9452671812196817, "Yes": 0.054732674929880466}, "ground_truth": 0}, {"key": "23906759", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.014049189102394897, "res": {"No": 0.9859505714611654, "Yes": 0.014049189102394897}, "ground_truth": 1}, {"key": "23906759", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.24680263755357684, "res": {"No": 0.7531970717140843, "Yes": 0.24680263755357684}, "ground_truth": 0}, {"key": "23906759", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.006234278027868651, "res": {"No": 0.9937649994910841, "Yes": 0.006234278027868651}, "ground_truth": 0}, {"key": "19410108", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.00791767719208157, "res": {"No": 0.9920818619252094, "Yes": 0.00791767719208157}, "ground_truth": 0}, {"key": "19410108", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999942038320978, "res": {"Yes": 0.9999942038320978, "No": 5.6729459911404635e-06}, "ground_truth": 0}, {"key": "19410108", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999759659438225, "res": {"Yes": 0.9999759659438225, "No": 2.3891949422940516e-05}, "ground_truth": 1}, {"key": "19410108", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999361536682638, "res": {"Yes": 0.9999361536682638, "No": 6.374224680927049e-05}, "ground_truth": 0}, {"key": "19410108", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999527220576372, "res": {"Yes": 0.9999527220576372, "No": 4.711731511391378e-05}, "ground_truth": 0}, {"key": "30745137", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.1011665105818656, "res": {"No": 0.8988333898456519, "Yes": 0.1011665105818656}, "ground_truth": 0}, {"key": "30745137", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.026144406811767065, "res": {"No": 0.9738541372218085, "Yes": 0.026144406811767065}, "ground_truth": 0}, {"key": "30745137", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.1406153121182702, "res": {"No": 0.8593843398475666, "Yes": 0.1406153121182702}, "ground_truth": 1}, {"key": "30745137", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9884216707767806, "res": {"Yes": 0.9884216707767806, "No": 0.011578179846237406}, "ground_truth": 0}, {"key": "30745137", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9994952309887639, "res": {"Yes": 0.9994952309887639, "No": 0.0005047652945460446}, "ground_truth": 0}, {"key": "26553115", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 3.961574542428937e-06, "res": {"No": 0.9999959918780326, "Yes": 3.961574542428937e-06}, "ground_truth": 0}, {"key": "26553115", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.999710577640734, "res": {"Yes": 0.999710577640734, "No": 0.00028938111546231155}, "ground_truth": 0}, {"key": "26553115", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998520121212866, "res": {"Yes": 0.9998520121212866, "No": 0.00014793582397970643}, "ground_truth": 1}, {"key": "26553115", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999168440936441, "res": {"Yes": 0.9999168440936441, "No": 8.304050974648218e-05}, "ground_truth": 0}, {"key": "26553115", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999232805730858, "res": {"Yes": 0.9999232805730858, "No": 7.66152051701984e-05}, "ground_truth": 0}, {"key": "37872311", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.007384814365579251, "res": {"No": 0.9926149378776443, "Yes": 0.007384814365579251}, "ground_truth": 0}, {"key": "37872311", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9936820129455216, "res": {"Yes": 0.9936820129455216, "No": 0.006317906978257776}, "ground_truth": 0}, {"key": "37872311", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998319927547982, "res": {"Yes": 0.9998319927547982, "No": 0.00016787817339140973}, "ground_truth": 1}, {"key": "37872311", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999978707566687, "res": {"Yes": 0.999978707566687, "No": 2.1242153029298385e-05}, "ground_truth": 0}, {"key": "37872311", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999764427474764, "res": {"Yes": 0.9999764427474764, "No": 2.3447914521777457e-05}, "ground_truth": 0}, {"key": "35553131", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9999947998470209, "res": {"Yes": 0.9999947998470209, "No": 5.1248925449430025e-06}, "ground_truth": 0}, {"key": "35553131", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999815683978641, "res": {"Yes": 0.9999815683978641, "No": 1.8375891445524372e-05}, "ground_truth": 0}, {"key": "35553131", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998552301601648, "res": {"Yes": 0.9998552301601648, "No": 0.0001446758261695788}, "ground_truth": 1}, {"key": "35553131", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999907469518097, "res": {"Yes": 0.9999907469518097, "No": 9.109857399483921e-06}, "ground_truth": 0}, {"key": "35553131", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998256760361882, "res": {"Yes": 0.9998256760361882, "No": 0.00017419921497745981}, "ground_truth": 0}, {"key": "39038936", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9781699969892946, "res": {"Yes": 0.9781699969892946, "No": 0.021829908498006582}, "ground_truth": 0}, {"key": "39038936", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9831638982768612, "res": {"Yes": 0.9831638982768612, "No": 0.016836059398755494}, "ground_truth": 0}, {"key": "39038936", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999633307373339, "res": {"Yes": 0.9999633307373339, "No": 3.658837466926399e-05}, "ground_truth": 1}, {"key": "39038936", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999981375378344, "res": {"Yes": 0.9999981375378344, "No": 1.796015424265576e-06}, "ground_truth": 0}, {"key": "39038936", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 2.1675676731933003e-07}, "ground_truth": 0}, {"key": "38735486", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.021858015119788323, "res": {"No": 0.9781413722413421, "Yes": 0.021858015119788323}, "ground_truth": 0}, {"key": "38735486", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9987660050986411, "res": {"Yes": 0.9987660050986411, "No": 0.0012339073960925364}, "ground_truth": 0}, {"key": "38735486", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 6.57260761463463e-07}, "ground_truth": 1}, {"key": "38735486", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 7.985597853671537e-07}, "ground_truth": 0}, {"key": "38735486", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 6.236060488014118e-07}, "ground_truth": 0}, {"key": "17087845", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 1.278749385791683e-06, "res": {"No": 0.9999984951481323, "Yes": 1.278749385791683e-06}, "ground_truth": 0}, {"key": "17087845", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.024166642639927703, "res": {"No": 0.9758333013048974, "Yes": 0.024166642639927703}, "ground_truth": 0}, {"key": "17087845", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8608417150125681, "res": {"Yes": 0.8608417150125681, "No": 0.1391580975146735}, "ground_truth": 1}, {"key": "17087845", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8077788164116737, "res": {"Yes": 0.8077788164116737, "No": 0.1922210629001516}, "ground_truth": 0}, {"key": "17087845", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.6492432013747982, "res": {"Yes": 0.6492432013747982, "No": 0.3507565099802605}, "ground_truth": 0}, {"key": "37443011", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.4428218076167697, "res": {"No": 0.5571779559750841, "Yes": 0.4428218076167697}, "ground_truth": 0}, {"key": "37443011", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999418750973176, "res": {"Yes": 0.9999418750973176, "No": 5.803147628815295e-05}, "ground_truth": 0}, {"key": "37443011", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9888974335172835, "res": {"Yes": 0.9888974335172835, "No": 0.011102483022047964}, "ground_truth": 1}, {"key": "37443011", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.995496857687195, "res": {"Yes": 0.995496857687195, "No": 0.004503067264797536}, "ground_truth": 0}, {"key": "37443011", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.7062377442121404, "res": {"Yes": 0.7062377442121404, "No": 0.2937619892172664}, "ground_truth": 0}, {"key": "36855749", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9999552252224203, "res": {"Yes": 0.9999552252224203, "No": 4.4718288558880845e-05}, "ground_truth": 0}, {"key": "36855749", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996883006083522, "res": {"Yes": 0.9996883006083522, "No": 0.00031159166293186327}, "ground_truth": 0}, {"key": "36855749", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996711444548425, "res": {"Yes": 0.9996711444548425, "No": 0.0003288170634471517}, "ground_truth": 1}, {"key": "36855749", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997258277620793, "res": {"Yes": 0.9997258277620793, "No": 0.0002740420489580366}, "ground_truth": 0}, {"key": "36855749", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9975800649751345, "res": {"Yes": 0.9975800649751345, "No": 0.002419715034708026}, "ground_truth": 0}, {"key": "35613141", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9985973016023321, "res": {"Yes": 0.9985973016023321, "No": 0.001402696252325358}, "ground_truth": 0}, {"key": "35613141", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9507109584180833, "res": {"Yes": 0.9507109584180833, "No": 0.04928891850373948}, "ground_truth": 0}, {"key": "35613141", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9975980999891205, "res": {"Yes": 0.9975980999891205, "No": 0.00240186202185217}, "ground_truth": 1}, {"key": "35613141", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9749995397847913, "res": {"Yes": 0.9749995397847913, "No": 0.025000336355111084}, "ground_truth": 0}, {"key": "35613141", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9991414337191253, "res": {"Yes": 0.9991414337191253, "No": 0.0008584719083021753}, "ground_truth": 0}, {"key": "39088847", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.8868295497679296, "res": {"Yes": 0.8868295497679296, "No": 0.11317044883359519}, "ground_truth": 0}, {"key": "39088847", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999977799274644, "res": {"Yes": 0.9999977799274644, "No": 2.178606730355162e-06}, "ground_truth": 0}, {"key": "39088847", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9862334043353793, "res": {"Yes": 0.9862334043353793, "No": 0.013766488632924076}, "ground_truth": 1}, {"key": "39088847", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999931310055916, "res": {"Yes": 0.9999931310055916, "No": 6.850739193858353e-06}, "ground_truth": 0}, {"key": "39088847", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9991093025067124, "res": {"Yes": 0.9991093025067124, "No": 0.0008906304279370675}, "ground_truth": 0}, {"key": "33197277", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.8194005111987183, "res": {"Yes": 0.8194005111987183, "No": 0.1805988510263198}, "ground_truth": 0}, {"key": "33197277", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9972435111114413, "res": {"Yes": 0.9972435111114413, "No": 0.0027564159829799755}, "ground_truth": 0}, {"key": "33197277", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997865884754799, "res": {"Yes": 0.9997865884754799, "No": 0.00021336398519461024}, "ground_truth": 1}, {"key": "33197277", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9857486339726493, "res": {"Yes": 0.9857486339726493, "No": 0.014251193804814728}, "ground_truth": 0}, {"key": "33197277", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9588312156054566, "res": {"Yes": 0.9588312156054566, "No": 0.04116791451237985}, "ground_truth": 0}, {"key": "33815489", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9995345234678308, "res": {"Yes": 0.9995345234678308, "No": 0.0004653582587944897}, "ground_truth": 0}, {"key": "33815489", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9985682863812387, "res": {"Yes": 0.9985682863812387, "No": 0.001431701760416528}, "ground_truth": 0}, {"key": "33815489", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999678603234905, "res": {"Yes": 0.9999678603234905, "No": 3.210537809376617e-05}, "ground_truth": 1}, {"key": "33815489", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999853828508316, "res": {"Yes": 0.9999853828508316, "No": 1.4545454771328255e-05}, "ground_truth": 0}, {"key": "33815489", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9976258612360317, "res": {"Yes": 0.9976258612360317, "No": 0.0023740996692467073}, "ground_truth": 0}, {"key": "35862754", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.6034332194463746, "res": {"Yes": 0.6034332194463746, "No": 0.3965662757186566}, "ground_truth": 0}, {"key": "35862754", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999385375988907, "res": {"Yes": 0.9999385375988907, "No": 6.137794434289073e-05}, "ground_truth": 0}, {"key": "35862754", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995397660001734, "res": {"Yes": 0.9995397660001734, "No": 0.00046010938650513746}, "ground_truth": 1}, {"key": "35862754", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9991602336013298, "res": {"Yes": 0.9991602336013298, "No": 0.0008396541307718287}, "ground_truth": 0}, {"key": "35862754", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999765619466755, "res": {"Yes": 0.9999765619466755, "No": 2.3372884255962193e-05}, "ground_truth": 0}, {"key": "36080615", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 3.202575932756365e-05, "res": {"No": 0.9999678603234905, "Yes": 3.202575932756365e-05}, "ground_truth": 0}, {"key": "36080615", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9990085309305745, "res": {"Yes": 0.9990085309305745, "No": 0.0009914635233903768}, "ground_truth": 0}, {"key": "36080615", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 2.264954455890725e-07}, "ground_truth": 1}, {"key": "36080615", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 8.466774749452944e-07}, "ground_truth": 0}, {"key": "36080615", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998858541951282, "res": {"Yes": 0.9998858541951282, "No": 0.0001140601063308742}, "ground_truth": 0}, {"key": "22822742", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.09825218394814186, "res": {"No": 0.9017473120662433, "Yes": 0.09825218394814186}, "ground_truth": 0}, {"key": "22822742", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.25698694321202153, "res": {"No": 0.743012418802218, "Yes": 0.25698694321202153}, "ground_truth": 0}, {"key": "22822742", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999760851449647, "res": {"Yes": 0.9999760851449647, "No": 2.384361717827283e-05}, "ground_truth": 1}, {"key": "22822742", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999858596579756, "res": {"Yes": 0.9999858596579756, "No": 1.4063988988194015e-05}, "ground_truth": 0}, {"key": "22822742", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.26101066772111703, "res": {"No": 0.738989076207816, "Yes": 0.26101066772111703}, "ground_truth": 0}, {"key": "39747536", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9863171287495682, "res": {"Yes": 0.9863171287495682, "No": 0.013682314397410245}, "ground_truth": 0}, {"key": "39747536", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.002724607221922373, "res": {"No": 0.9972751671146401, "Yes": 0.002724607221922373}, "ground_truth": 0}, {"key": "39747536", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.95570510604159, "res": {"Yes": 0.95570510604159, "No": 0.044294259177553176}, "ground_truth": 1}, {"key": "39747536", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9990335025598099, "res": {"Yes": 0.9990335025598099, "No": 0.0009664905856746574}, "ground_truth": 0}, {"key": "39747536", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9540387003042373, "res": {"Yes": 0.9540387003042373, "No": 0.045961081267751606}, "ground_truth": 0}, {"key": "34218396", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.19911381163031291, "res": {"No": 0.8008858271206921, "Yes": 0.19911381163031291}, "ground_truth": 0}, {"key": "34218396", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9908422767198897, "res": {"Yes": 0.9908422767198897, "No": 0.009157631532967895}, "ground_truth": 0}, {"key": "34218396", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9388060412023838, "res": {"Yes": 0.9388060412023838, "No": 0.06119373279610736}, "ground_truth": 1}, {"key": "34218396", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.978975598192258, "res": {"Yes": 0.978975598192258, "No": 0.02102437710414098}, "ground_truth": 0}, {"key": "34218396", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9989367945039012, "res": {"Yes": 0.9989367945039012, "No": 0.0010631865512060212}, "ground_truth": 0}, {"key": "39150388", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.07098204650809564, "res": {"No": 0.9290178603530329, "Yes": 0.07098204650809564}, "ground_truth": 0}, {"key": "39150388", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999915813694369, "res": {"Yes": 0.9999915813694369, "No": 8.288711741401002e-06}, "ground_truth": 0}, {"key": "39150388", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999987335551019, "res": {"Yes": 0.9999987335551019, "No": 1.1880954442180992e-06}, "ground_truth": 1}, {"key": "39150388", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999981375378344, "res": {"Yes": 0.9999981375378344, "No": 1.8245211297844014e-06}, "ground_truth": 0}, {"key": "39150388", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.00021045293848076673, "res": {"No": 0.9997894487690223, "Yes": 0.00021045293848076673}, "ground_truth": 0}, {"key": "28765782", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9999747739361825, "res": {"Yes": 0.9999747739361825, "No": 2.5132015667886153e-05}, "ground_truth": 0}, {"key": "28765782", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998566603941031, "res": {"Yes": 0.9998566603941031, "No": 0.0001433160824769463}, "ground_truth": 0}, {"key": "28765782", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999974223173222, "res": {"Yes": 0.9999974223173222, "No": 2.5131590764339537e-06}, "ground_truth": 1}, {"key": "28765782", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999675027220479, "res": {"Yes": 0.9999675027220479, "No": 3.237360913333298e-05}, "ground_truth": 0}, {"key": "28765782", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999689331225854, "res": {"Yes": 0.9999689331225854, "No": 3.100280108439494e-05}, "ground_truth": 0}, {"key": "35828022", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9927670644973983, "res": {"Yes": 0.9927670644973983, "No": 0.0072327062818985715}, "ground_truth": 0}, {"key": "35828022", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9993212764870781, "res": {"Yes": 0.9993212764870781, "No": 0.0006786568050071741}, "ground_truth": 0}, {"key": "35828022", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998972965103645, "res": {"Yes": 0.9998972965103645, "No": 0.00010256354502632483}, "ground_truth": 1}, {"key": "35828022", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995459544199067, "res": {"Yes": 0.9995459544199067, "No": 0.00045395612752496215}, "ground_truth": 0}, {"key": "35828022", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9398839798755219, "res": {"Yes": 0.9398839798755219, "No": 0.060115720455856905}, "ground_truth": 0}, {"key": "27717735", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9996130158676828, "res": {"Yes": 0.9996130158676828, "No": 0.0003869371427348986}, "ground_truth": 0}, {"key": "27717735", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998306817359556, "res": {"Yes": 0.9998306817359556, "No": 0.00016926551182538122}, "ground_truth": 0}, {"key": "27717735", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999310282486606, "res": {"Yes": 0.9999310282486606, "No": 6.894648044916701e-05}, "ground_truth": 1}, {"key": "27717735", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999895549275502, "res": {"Yes": 0.9999895549275502, "No": 1.0390839056021728e-05}, "ground_truth": 0}, {"key": "27717735", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9992552093943151, "res": {"Yes": 0.9992552093943151, "No": 0.0007446841419667902}, "ground_truth": 0}, {"key": "37977826", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 5.589923254283548e-06, "res": {"No": 0.9999943230348141, "Yes": 5.589923254283548e-06}, "ground_truth": 0}, {"key": "37977826", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9895725369067397, "res": {"Yes": 0.9895725369067397, "No": 0.010427346006703166}, "ground_truth": 0}, {"key": "37977826", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998721547728976, "res": {"Yes": 0.9998721547728976, "No": 0.00012772002428183412}, "ground_truth": 1}, {"key": "37977826", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997321438498659, "res": {"Yes": 0.9997321438498659, "No": 0.0002677692381635102}, "ground_truth": 0}, {"key": "37977826", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999534372470786, "res": {"Yes": 0.9999534372470786, "No": 4.6540577103772605e-05}, "ground_truth": 0}, {"key": "31768588", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9961143844164159, "res": {"Yes": 0.9961143844164159, "No": 0.0038855528560398523}, "ground_truth": 0}, {"key": "31768588", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999920581810364, "res": {"Yes": 0.9999920581810364, "No": 7.818764564243056e-06}, "ground_truth": 0}, {"key": "31768588", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9918315347100205, "res": {"Yes": 0.9918315347100205, "No": 0.00816841574642257}, "ground_truth": 1}, {"key": "31768588", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9969331175217012, "res": {"Yes": 0.9969331175217012, "No": 0.0030667730315366527}, "ground_truth": 0}, {"key": "31768588", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9793175719876506, "res": {"Yes": 0.9793175719876506, "No": 0.020682209348800207}, "ground_truth": 0}, {"key": "37183351", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.937115948502258, "res": {"Yes": 0.937115948502258, "No": 0.0628837622771004}, "ground_truth": 0}, {"key": "37183351", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999987335551019, "res": {"Yes": 0.9999987335551019, "No": 1.1916448241964898e-06}, "ground_truth": 0}, {"key": "37183351", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999770387506135, "res": {"Yes": 0.9999770387506135, "No": 2.2855357596254346e-05}, "ground_truth": 1}, {"key": "37183351", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999926541946805, "res": {"Yes": 0.9999926541946805, "No": 7.292705178865434e-06}, "ground_truth": 0}, {"key": "37183351", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 1.3329568245116468e-07}, "ground_truth": 0}, {"key": "39622090", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.979579680274028, "res": {"Yes": 0.979579680274028, "No": 0.020420221889941064}, "ground_truth": 0}, {"key": "39622090", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 1.8251616491055208e-06, "res": {"No": 0.9999980183344636, "Yes": 1.8251616491055208e-06}, "ground_truth": 0}, {"key": "39622090", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9964642866074135, "res": {"Yes": 0.9964642866074135, "No": 0.0035356256783693096}, "ground_truth": 1}, {"key": "39622090", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9585124054631143, "res": {"Yes": 0.9585124054631143, "No": 0.04148744199786218}, "ground_truth": 0}, {"key": "39622090", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.13406527108171842, "res": {"No": 0.8659344466446739, "Yes": 0.13406527108171842}, "ground_truth": 0}, {"key": "39272756", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.5388200461283663, "res": {"Yes": 0.5388200461283663, "No": 0.461179703305458}, "ground_truth": 0}, {"key": "39272756", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9966327435666221, "res": {"Yes": 0.9966327435666221, "No": 0.003367175555926972}, "ground_truth": 0}, {"key": "39272756", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9982054077098722, "res": {"Yes": 0.9982054077098722, "No": 0.0017945219780519171}, "ground_truth": 1}, {"key": "39272756", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9959902077720854, "res": {"Yes": 0.9959902077720854, "No": 0.004009710238500822}, "ground_truth": 0}, {"key": "39272756", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9988593627218004, "res": {"Yes": 0.9988593627218004, "No": 0.0011405228287500002}, "ground_truth": 0}, {"key": "32138822", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.00035912092060192324, "res": {"No": 0.9996407653002419, "Yes": 0.00035912092060192324}, "ground_truth": 0}, {"key": "32138822", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996195659034096, "res": {"Yes": 0.9996195659034096, "No": 0.00038034542689724117}, "ground_truth": 0}, {"key": "32138822", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 3.711060374304269e-07}, "ground_truth": 1}, {"key": "32138822", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999410407211666, "res": {"Yes": 0.9999410407211666, "No": 5.8899850824555456e-05}, "ground_truth": 0}, {"key": "32138822", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999613043536958, "res": {"Yes": 0.9999613043536958, "No": 3.863975071947922e-05}, "ground_truth": 0}, {"key": "31070114", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9994429540477378, "res": {"Yes": 0.9994429540477378, "No": 0.0005569522771911245}, "ground_truth": 0}, {"key": "31070114", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999970647075079, "res": {"Yes": 0.9999970647075079, "No": 2.8079047312325484e-06}, "ground_truth": 0}, {"key": "31070114", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999977799274644, "res": {"Yes": 0.9999977799274644, "No": 2.1447233248234696e-06}, "ground_truth": 1}, {"key": "31070114", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 4.3113912633927395e-07}, "ground_truth": 0}, {"key": "31070114", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 5.742867505212784e-07}, "ground_truth": 0}, {"key": "39652762", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9052270887169536, "res": {"Yes": 0.9052270887169536, "No": 0.09477253754811009}, "ground_truth": 0}, {"key": "39652762", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997400092034663, "res": {"Yes": 0.9997400092034663, "No": 0.00025987079312196295}, "ground_truth": 0}, {"key": "39652762", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9979194775933323, "res": {"Yes": 0.9979194775933323, "No": 0.0020804740553723995}, "ground_truth": 1}, {"key": "39652762", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9901897644097797, "res": {"Yes": 0.9901897644097797, "No": 0.009809963756088684}, "ground_truth": 0}, {"key": "39652762", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.015239454855498543, "res": {"No": 0.984760186435807, "Yes": 0.015239454855498543}, "ground_truth": 0}, {"key": "33258866", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.5177220830029282, "res": {"Yes": 0.5177220830029282, "No": 0.48227745392184496}, "ground_truth": 0}, {"key": "33258866", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.12073814129612391, "res": {"No": 0.8792614985796456, "Yes": 0.12073814129612391}, "ground_truth": 0}, {"key": "33258866", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9993665147665507, "res": {"Yes": 0.9993665147665507, "No": 0.0006333912109137352}, "ground_truth": 1}, {"key": "33258866", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9890941457681908, "res": {"Yes": 0.9890941457681908, "No": 0.01090562028671836}, "ground_truth": 0}, {"key": "33258866", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9894818398132997, "res": {"Yes": 0.9894818398132997, "No": 0.010518033697779991}, "ground_truth": 0}, {"key": "36962388", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.00016524984784323707, "res": {"No": 0.999834495597365, "Yes": 0.00016524984784323707}, "ground_truth": 0}, {"key": "36962388", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999570131939592, "res": {"Yes": 0.9999570131939592, "No": 4.288647713587499e-05}, "ground_truth": 0}, {"key": "36962388", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997901638386876, "res": {"Yes": 0.9997901638386876, "No": 0.00020972466637642873}, "ground_truth": 1}, {"key": "36962388", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994743884321823, "res": {"Yes": 0.9994743884321823, "No": 0.0005255511691897108}, "ground_truth": 0}, {"key": "36962388", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997666895911075, "res": {"Yes": 0.9997666895911075, "No": 0.00023326672887861432}, "ground_truth": 0}, {"key": "32282272", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9966460092353775, "res": {"Yes": 0.9966460092353775, "No": 0.0033539530965870363}, "ground_truth": 0}, {"key": "32282272", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9972260803447237, "res": {"Yes": 0.9972260803447237, "No": 0.002773825124478655}, "ground_truth": 1}, {"key": "32282272", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7746510105624433, "res": {"Yes": 0.7746510105624433, "No": 0.22534858702510865}, "ground_truth": 0}, {"key": "32282272", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.041745024852315946, "res": {"No": 0.9582547651221989, "Yes": 0.041745024852315946}, "ground_truth": 0}, {"key": "36093072", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9811835538499948, "res": {"Yes": 0.9811835538499948, "No": 0.01881650253062614}, "ground_truth": 0}, {"key": "36093072", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999826412106655, "res": {"Yes": 0.9999826412106655, "No": 1.726493486138972e-05}, "ground_truth": 0}, {"key": "36093072", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999973031140366, "res": {"Yes": 0.9999973031140366, "No": 2.5878684846124534e-06}, "ground_truth": 1}, {"key": "36093072", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999450934134217, "res": {"Yes": 0.9999450934134217, "No": 5.487684858160655e-05}, "ground_truth": 0}, {"key": "36093072", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9846536155468444, "res": {"Yes": 0.9846536155468444, "No": 0.015346307310205348}, "ground_truth": 0}, {"key": "38879972", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.039567883994829384, "res": {"No": 0.9604319027465553, "Yes": 0.039567883994829384}, "ground_truth": 0}, {"key": "38879972", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996348072491118, "res": {"Yes": 0.9996348072491118, "No": 0.00036506797092739643}, "ground_truth": 0}, {"key": "38879972", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9963110199671954, "res": {"Yes": 0.9963110199671954, "No": 0.0036889109499708924}, "ground_truth": 1}, {"key": "38879972", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9727035650558615, "res": {"Yes": 0.9727035650558615, "No": 0.027295732709947312}, "ground_truth": 0}, {"key": "38879972", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996776985102879, "res": {"Yes": 0.9996776985102879, "No": 0.00032218520724232617}, "ground_truth": 0}, {"key": "32106473", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9997371490813575, "res": {"Yes": 0.9997371490813575, "No": 0.00026277169043668636}, "ground_truth": 0}, {"key": "32106473", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9483708198793892, "res": {"Yes": 0.9483708198793892, "No": 0.05162911610664272}, "ground_truth": 0}, {"key": "32106473", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999280483736425, "res": {"Yes": 0.9999280483736425, "No": 7.184525413655411e-05}, "ground_truth": 1}, {"key": "32106473", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999987886094374, "res": {"Yes": 0.999987886094374, "No": 1.2027931354451245e-05}, "ground_truth": 0}, {"key": "32106473", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9995962221402301, "res": {"Yes": 0.9995962221402301, "No": 0.00040373872884114277}, "ground_truth": 0}, {"key": "40415815", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995627509912224, "res": {"Yes": 0.9995627509912224, "No": 0.00043717105904451085}, "ground_truth": 0}, {"key": "40415815", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997597774080685, "res": {"Yes": 0.9997597774080685, "No": 0.00024012882687273653}, "ground_truth": 1}, {"key": "40415815", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998681023692575, "res": {"Yes": 0.9998681023692575, "No": 0.00013185535917391398}, "ground_truth": 0}, {"key": "40415815", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999474773593622, "res": {"Yes": 0.9999474773593622, "No": 5.2414777135024955e-05}, "ground_truth": 0}, {"key": "34581918", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.455797339125105, "res": {"No": 0.5442023142877946, "Yes": 0.455797339125105}, "ground_truth": 0}, {"key": "34581918", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 6.577374482936196e-07}, "ground_truth": 0}, {"key": "34581918", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.99999861435166, "res": {"Yes": 0.99999861435166, "No": 1.3342129373611667e-06}, "ground_truth": 1}, {"key": "34581918", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999905085465441, "res": {"Yes": 0.9999905085465441, "No": 9.416585580475156e-06}, "ground_truth": 0}, {"key": "34581918", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998988460004109, "res": {"Yes": 0.9998988460004109, "No": 0.00010110196458800422}, "ground_truth": 0}, {"key": "33004157", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9999983759447187, "res": {"Yes": 0.9999983759447187, "No": 1.563300916661755e-06}, "ground_truth": 0}, {"key": "33004157", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999244725263433, "res": {"Yes": 0.9999244725263433, "No": 7.546112009339821e-05}, "ground_truth": 0}, {"key": "33004157", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 3.1403104076016175e-07}, "ground_truth": 1}, {"key": "33004157", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999628539429318, "res": {"Yes": 0.9999628539429318, "No": 3.7060753700851683e-05}, "ground_truth": 0}, {"key": "33004157", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 7.639587816157582e-07}, "ground_truth": 0}, {"key": "30334943", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9990483777299359, "res": {"Yes": 0.9990483777299359, "No": 0.0009515385091527598}, "ground_truth": 0}, {"key": "30334943", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.999963211539712, "res": {"Yes": 0.999963211539712, "No": 3.66553926977017e-05}, "ground_truth": 0}, {"key": "30334943", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9183186181359659, "res": {"Yes": 0.9183186181359659, "No": 0.08168127463661479}, "ground_truth": 1}, {"key": "30334943", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6832889970422094, "res": {"Yes": 0.6832889970422094, "No": 0.31671087105264556}, "ground_truth": 0}, {"key": "30334943", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.363272404691071, "res": {"No": 0.6367268005144346, "Yes": 0.363272404691071}, "ground_truth": 0}, {"key": "33280503", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9991941493721612, "res": {"Yes": 0.9991941493721612, "No": 0.0008057301282493233}, "ground_truth": 0}, {"key": "33280503", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997098663169197, "res": {"Yes": 0.9997098663169197, "No": 0.0002900375965821522}, "ground_truth": 0}, {"key": "33280503", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996426718769061, "res": {"Yes": 0.9996426718769061, "No": 0.0003572251509666178}, "ground_truth": 1}, {"key": "33280503", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999896223785124, "res": {"Yes": 0.999896223785124, "No": 0.0001037352061858796}, "ground_truth": 0}, {"key": "33280503", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9970307561500217, "res": {"Yes": 0.9970307561500217, "No": 0.002969106539133797}, "ground_truth": 0}, {"key": "25726782", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.148052920231333, "res": {"No": 0.8519467705370523, "Yes": 0.148052920231333}, "ground_truth": 0}, {"key": "25726782", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 3.830585451684229e-07}, "ground_truth": 0}, {"key": "25726782", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 3.0225766136431617e-07}, "ground_truth": 1}, {"key": "25726782", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 7.381137385045396e-08}, "ground_truth": 0}, {"key": "25726782", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999976607241361, "res": {"Yes": 0.9999976607241361, "No": 2.3034996603960333e-06}, "ground_truth": 0}, {"key": "35479854", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9998085175151116, "res": {"Yes": 0.9998085175151116, "No": 0.00019135523274635906}, "ground_truth": 0}, {"key": "35479854", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.7506167027481176, "res": {"Yes": 0.7506167027481176, "No": 0.249382878214357}, "ground_truth": 0}, {"key": "35479854", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9961273983374132, "res": {"Yes": 0.9961273983374132, "No": 0.003872583204163509}, "ground_truth": 1}, {"key": "35479854", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999731051276736, "res": {"Yes": 0.9999731051276736, "No": 2.6841778097798842e-05}, "ground_truth": 0}, {"key": "35479854", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9957024574783523, "res": {"Yes": 0.9957024574783523, "No": 0.004297514270763185}, "ground_truth": 0}, {"key": "32716226", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9998331845852388, "res": {"Yes": 0.9998331845852388, "No": 0.00016677082390100033}, "ground_truth": 0}, {"key": "32716226", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9991353632139922, "res": {"Yes": 0.9991353632139922, "No": 0.0008645958375928194}, "ground_truth": 0}, {"key": "32716226", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999154137776163, "res": {"Yes": 0.9999154137776163, "No": 8.452620105555978e-05}, "ground_truth": 1}, {"key": "32716226", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999795419732683, "res": {"Yes": 0.9999795419732683, "No": 2.0331055770900314e-05}, "ground_truth": 0}, {"key": "32716226", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999826412106655, "res": {"Yes": 0.9999826412106655, "No": 1.7302388541640378e-05}, "ground_truth": 0}, {"key": "37047554", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.5612653110671254, "res": {"Yes": 0.5612653110671254, "No": 0.4387342948747449}, "ground_truth": 0}, {"key": "37047554", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9722219674676548, "res": {"Yes": 0.9722219674676548, "No": 0.027777895512919235}, "ground_truth": 0}, {"key": "37047554", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9959120505072663, "res": {"Yes": 0.9959120505072663, "No": 0.004087973125906375}, "ground_truth": 1}, {"key": "37047554", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9986028815799238, "res": {"Yes": 0.9986028815799238, "No": 0.0013970973724874436}, "ground_truth": 0}, {"key": "37047554", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9801786558938476, "res": {"Yes": 0.9801786558938476, "No": 0.019821291316655686}, "ground_truth": 0}, {"key": "36565290", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.7943212758829248, "res": {"Yes": 0.7943212758829248, "No": 0.2056785635409359}, "ground_truth": 0}, {"key": "36565290", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999464045822857, "res": {"Yes": 0.9999464045822857, "No": 5.3534620564218243e-05}, "ground_truth": 0}, {"key": "36565290", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996162331373478, "res": {"Yes": 0.9996162331373478, "No": 0.0003837178657608697}, "ground_truth": 1}, {"key": "36565290", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9993300805962723, "res": {"Yes": 0.9993300805962723, "No": 0.0006698357666701918}, "ground_truth": 0}, {"key": "36565290", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999851444463448, "res": {"Yes": 0.9999851444463448, "No": 1.4768190216074401e-05}, "ground_truth": 0}, {"key": "27758640", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995716873820494, "res": {"Yes": 0.9995716873820494, "No": 0.00042830414311282363}, "ground_truth": 0}, {"key": "27758640", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999926541946805, "res": {"Yes": 0.9999926541946805, "No": 7.277086368309815e-06}, "ground_truth": 1}, {"key": "27758640", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999682179220609, "res": {"Yes": 0.9999682179220609, "No": 3.1675010508407e-05}, "ground_truth": 0}, {"key": "27758640", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9961843067289412, "res": {"Yes": 0.9961843067289412, "No": 0.0038156143188031208}, "ground_truth": 0}, {"key": "28897118", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9999609467545978, "res": {"Yes": 0.9999609467545978, "No": 3.893180625149197e-05}, "ground_truth": 0}, {"key": "28897118", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 4.7374695879521776e-07}, "ground_truth": 0}, {"key": "28897118", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "No": 3.026672235760466e-08}, "ground_truth": 1}, {"key": "28897118", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 8.564978751822213e-07}, "ground_truth": 0}, {"key": "28897118", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999922965856715, "res": {"Yes": 0.9999922965856715, "No": 7.658480209250011e-06}, "ground_truth": 0}, {"key": "38452661", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999797803764193, "res": {"Yes": 0.9999797803764193, "No": 2.011489444807208e-05}, "ground_truth": 0}, {"key": "38452661", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9993730632070489, "res": {"Yes": 0.9993730632070489, "No": 0.0006268166401828501}, "ground_truth": 1}, {"key": "38452661", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999984951481323, "res": {"Yes": 0.9999984951481323, "No": 1.447770168514832e-06}, "ground_truth": 0}, {"key": "38452661", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997780113531267, "res": {"Yes": 0.9997780113531267, "No": 0.00022188107895563757}, "ground_truth": 0}, {"key": "38033492", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.6000270148677453, "res": {"Yes": 0.6000270148677453, "No": 0.3999726136872175}, "ground_truth": 0}, {"key": "38033492", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997957652387589, "res": {"Yes": 0.9997957652387589, "No": 0.00020409683405565851}, "ground_truth": 0}, {"key": "38033492", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999962302846054, "res": {"Yes": 0.9999962302846054, "No": 3.7248610829348895e-06}, "ground_truth": 1}, {"key": "38033492", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999978991308068, "res": {"Yes": 0.9999978991308068, "No": 2.0150511062178313e-06}, "ground_truth": 0}, {"key": "38033492", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999968263007362, "res": {"Yes": 0.9999968263007362, "No": 3.096042112811528e-06}, "ground_truth": 0}, {"key": "35949555", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.00028014254361365793, "res": {"No": 0.9997197537263252, "Yes": 0.00028014254361365793}, "ground_truth": 0}, {"key": "35949555", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.01520805130877626, "res": {"No": 0.9847915224958814, "Yes": 0.01520805130877626}, "ground_truth": 0}, {"key": "35949555", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.43341070072603854, "res": {"No": 0.5665888333225085, "Yes": 0.43341070072603854}, "ground_truth": 1}, {"key": "35949555", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5772964305162086, "res": {"Yes": 0.5772964305162086, "No": 0.42270241844666884}, "ground_truth": 0}, {"key": "35949555", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.90805901318343, "res": {"Yes": 0.90805901318343, "No": 0.09194077125149169}, "ground_truth": 0}, {"key": "15263826", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.0038860405235237806, "res": {"No": 0.9961137906328082, "Yes": 0.0038860405235237806}, "ground_truth": 0}, {"key": "15263826", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9927997350135145, "res": {"Yes": 0.9927997350135145, "No": 0.007200157343032119}, "ground_truth": 0}, {"key": "15263826", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9990458805017415, "res": {"Yes": 0.9990458805017415, "No": 0.0009540186857192458}, "ground_truth": 1}, {"key": "15263826", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9990018658675924, "res": {"Yes": 0.9990018658675924, "No": 0.000998128033301491}, "ground_truth": 0}, {"key": "15263826", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9645324864853587, "res": {"Yes": 0.9645324864853587, "No": 0.035467446613583635}, "ground_truth": 0}, {"key": "37313866", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.00016507428058550925, "res": {"No": 0.9998348531582413, "Yes": 0.00016507428058550925}, "ground_truth": 0}, {"key": "37313866", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.0004572307367802867, "res": {"No": 0.9995426182309467, "Yes": 0.0004572307367802867}, "ground_truth": 0}, {"key": "37313866", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 7.919868150029995e-06, "res": {"No": 0.9999919389784903, "Yes": 7.919868150029995e-06}, "ground_truth": 1}, {"key": "37313866", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9292763723078064, "res": {"Yes": 0.9292763723078064, "No": 0.07072363939100433}, "ground_truth": 0}, {"key": "37313866", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.1433342447875428, "res": {"No": 0.8566656984291211, "Yes": 0.1433342447875428}, "ground_truth": 0}, {"key": "13911157", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.999985978860297, "res": {"Yes": 0.999985978860297, "No": 1.3997945347672256e-05}, "ground_truth": 0}, {"key": "13911157", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999640459343629, "res": {"Yes": 0.9999640459343629, "No": 3.584182680625002e-05}, "ground_truth": 0}, {"key": "13911157", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998913369602558, "res": {"Yes": 0.9998913369602558, "No": 0.0001086297768219163}, "ground_truth": 1}, {"key": "13911157", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994861797508597, "res": {"Yes": 0.9994861797508597, "No": 0.0005137159868450115}, "ground_truth": 0}, {"key": "13911157", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9826950330671903, "res": {"Yes": 0.9826950330671903, "No": 0.017305006954791424}, "ground_truth": 0}, {"key": "39594894", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.7588484668028589, "res": {"Yes": 0.7588484668028589, "No": 0.24115131004301307}, "ground_truth": 0}, {"key": "39594894", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998570179529074, "res": {"Yes": 0.9998570179529074, "No": 0.0001429273641154274}, "ground_truth": 0}, {"key": "39594894", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999818067994983, "res": {"Yes": 0.9999818067994983, "No": 1.80876169542738e-05}, "ground_truth": 1}, {"key": "39594894", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999568939990904, "res": {"Yes": 0.9999568939990904, "No": 4.298144702126927e-05}, "ground_truth": 0}, {"key": "39594894", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999920581810364, "res": {"Yes": 0.9999920581810364, "No": 7.907554417756686e-06}, "ground_truth": 0}, {"key": "34096170", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9998163834895752, "res": {"Yes": 0.9998163834895752, "No": 0.00018352530464577516}, "ground_truth": 0}, {"key": "34096170", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999313858390593, "res": {"Yes": 0.9999313858390593, "No": 6.851546991626854e-05}, "ground_truth": 0}, {"key": "34096170", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995526268112412, "res": {"Yes": 0.9995526268112412, "No": 0.0004473528057748775}, "ground_truth": 1}, {"key": "34096170", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996070579919407, "res": {"Yes": 0.9996070579919407, "No": 0.00039292177960063634}, "ground_truth": 0}, {"key": "34096170", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999547484278832, "res": {"Yes": 0.9999547484278832, "No": 4.515972085257858e-05}, "ground_truth": 0}, {"key": "37891952", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9997479901398207, "res": {"Yes": 0.9997479901398207, "No": 0.0002519764316563038}, "ground_truth": 0}, {"key": "37891952", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997091513046849, "res": {"Yes": 0.9997091513046849, "No": 0.00029082379199149355}, "ground_truth": 0}, {"key": "37891952", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.99986464592119, "res": {"Yes": 0.99986464592119, "No": 0.00013528119768972956}, "ground_truth": 1}, {"key": "37891952", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999982567412194, "res": {"Yes": 0.9999982567412194, "No": 1.665978365830362e-06}, "ground_truth": 0}, {"key": "37891952", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998924096782539, "res": {"Yes": 0.9998924096782539, "No": 0.00010742904229652473}, "ground_truth": 0}, {"key": "40186158", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 1.350854899557943e-07, "res": {"No": 0.9999998063873687, "Yes": 1.350854899557943e-07}, "ground_truth": 0}, {"key": "40186158", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997303562804042, "res": {"Yes": 0.9997303562804042, "No": 0.00026957147214793324}, "ground_truth": 0}, {"key": "40186158", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998751344763485, "res": {"Yes": 0.9998751344763485, "No": 0.00012472278542964774}, "ground_truth": 1}, {"key": "40186158", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996879430998862, "res": {"Yes": 0.9996879430998862, "No": 0.0003120138437954182}, "ground_truth": 0}, {"key": "40186158", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9967328076268279, "res": {"Yes": 0.9967328076268279, "No": 0.003267171346761806}, "ground_truth": 0}, {"key": "37049719", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.0005800982918935721, "res": {"No": 0.9994198526505887, "Yes": 0.0005800982918935721}, "ground_truth": 0}, {"key": "37049719", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9875144424973864, "res": {"Yes": 0.9875144424973864, "No": 0.012485435175385282}, "ground_truth": 0}, {"key": "37049719", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999957132395842, "res": {"Yes": 0.999957132395842, "No": 4.284541156432827e-05}, "ground_truth": 1}, {"key": "37049719", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999368688428554, "res": {"Yes": 0.9999368688428554, "No": 6.307279641479153e-05}, "ground_truth": 0}, {"key": "37049719", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9995159401947685, "res": {"Yes": 0.9995159401947685, "No": 0.0004840219020994038}, "ground_truth": 0}, {"key": "34610504", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9911278211318817, "res": {"Yes": 0.9911278211318817, "No": 0.00887207655623338}, "ground_truth": 0}, {"key": "34610504", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999773963544663, "res": {"Yes": 0.9999773963544663, "No": 2.2504798608593216e-05}, "ground_truth": 0}, {"key": "34610504", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997491818801253, "res": {"Yes": 0.9997491818801253, "No": 0.00025075228963935235}, "ground_truth": 1}, {"key": "34610504", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999950382530095, "res": {"Yes": 0.9999950382530095, "No": 4.918116135860622e-06}, "ground_truth": 0}, {"key": "34610504", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999391335724361, "res": {"Yes": 0.9999391335724361, "No": 6.069689587523172e-05}, "ground_truth": 0}, {"key": "37595429", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.7738275761819869, "res": {"Yes": 0.7738275761819869, "No": 0.22617209210262562}, "ground_truth": 0}, {"key": "37595429", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999722707254635, "res": {"Yes": 0.9999722707254635, "No": 2.7618478455645348e-05}, "ground_truth": 0}, {"key": "37595429", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.99995629800496, "res": {"Yes": 0.99995629800496, "No": 4.3598535396364726e-05}, "ground_truth": 1}, {"key": "37595429", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999497421129699, "res": {"Yes": 0.9999497421129699, "No": 5.018471799614704e-05}, "ground_truth": 0}, {"key": "37595429", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999964686909351, "res": {"Yes": 0.9999964686909351, "No": 3.42950027438414e-06}, "ground_truth": 0}, {"key": "29772670", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9989074087129792, "res": {"Yes": 0.9989074087129792, "No": 0.0010923685047756562}, "ground_truth": 0}, {"key": "29772670", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9248802678592255, "res": {"Yes": 0.9248802678592255, "No": 0.07511937981800965}, "ground_truth": 0}, {"key": "29772670", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8880506636123069, "res": {"Yes": 0.8880506636123069, "No": 0.11194873899529095}, "ground_truth": 1}, {"key": "29772670", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9459188744527408, "res": {"Yes": 0.9459188744527408, "No": 0.054080716415886194}, "ground_truth": 0}, {"key": "29772670", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9737315999963487, "res": {"Yes": 0.9737315999963487, "No": 0.026266965341021197}, "ground_truth": 0}, {"key": "36369872", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9999030177130861, "res": {"Yes": 0.9999030177130861, "No": 9.685456833931553e-05}, "ground_truth": 0}, {"key": "36369872", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998965813680755, "res": {"Yes": 0.9998965813680755, "No": 0.00010338527646339138}, "ground_truth": 0}, {"key": "36369872", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998794179605985, "res": {"Yes": 0.9998794179605985, "No": 0.00012056180885313548}, "ground_truth": 1}, {"key": "36369872", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998322311147737, "res": {"Yes": 0.9998322311147737, "No": 0.00016769830441653008}, "ground_truth": 0}, {"key": "36369872", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9811702363359979, "res": {"Yes": 0.9811702363359979, "No": 0.018829744597507327}, "ground_truth": 0}, {"key": "34527433", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999434246564451, "res": {"Yes": 0.9999434246564451, "No": 5.652186519913778e-05}, "ground_truth": 0}, {"key": "34527433", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.828702743935604, "res": {"Yes": 0.828702743935604, "No": 0.17129715193027492}, "ground_truth": 1}, {"key": "34527433", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9992821078079926, "res": {"Yes": 0.9992821078079926, "No": 0.0007178355888684279}, "ground_truth": 0}, {"key": "34527433", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9991542784242525, "res": {"Yes": 0.9991542784242525, "No": 0.0008457087709491078}, "ground_truth": 0}, {"key": "31111734", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.754855801020681, "res": {"Yes": 0.754855801020681, "No": 0.2451434082220559}, "ground_truth": 0}, {"key": "31111734", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9743653324649473, "res": {"Yes": 0.9743653324649473, "No": 0.02563447894178644}, "ground_truth": 0}, {"key": "31111734", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9929552112853212, "res": {"Yes": 0.9929552112853212, "No": 0.007044458497245814}, "ground_truth": 1}, {"key": "31111734", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9983560418819615, "res": {"Yes": 0.9983560418819615, "No": 0.0016438750317303138}, "ground_truth": 0}, {"key": "31111734", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9920832696903703, "res": {"Yes": 0.9920832696903703, "No": 0.007916511004123028}, "ground_truth": 0}, {"key": "40303872", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 8.163790294161507e-06, "res": {"No": 0.9999917005724405, "Yes": 8.163790294161507e-06}, "ground_truth": 0}, {"key": "40303872", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9948489344733228, "res": {"Yes": 0.9948489344733228, "No": 0.005150996952662549}, "ground_truth": 0}, {"key": "40303872", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996911606007087, "res": {"Yes": 0.9996911606007087, "No": 0.00030880455168800174}, "ground_truth": 1}, {"key": "40303872", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997005711677635, "res": {"Yes": 0.9997005711677635, "No": 0.0002993898463142138}, "ground_truth": 0}, {"key": "40303872", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9852694420384687, "res": {"Yes": 0.9852694420384687, "No": 0.014730439675078708}, "ground_truth": 0}, {"key": "33653553", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999978991308068, "res": {"Yes": 0.9999978991308068, "No": 1.985249418661493e-06}, "ground_truth": 0}, {"key": "33653553", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997874227277509, "res": {"Yes": 0.9997874227277509, "No": 0.00021243191944807975}, "ground_truth": 1}, {"key": "33653553", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.944917292011115, "res": {"Yes": 0.944917292011115, "No": 0.055082462413143116}, "ground_truth": 0}, {"key": "33653553", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9794736484717664, "res": {"Yes": 0.9794736484717664, "No": 0.020526250495877807}, "ground_truth": 0}, {"key": "34404510", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9503702675563048, "res": {"Yes": 0.9503702675563048, "No": 0.04962947836080914}, "ground_truth": 0}, {"key": "34404510", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999974223173222, "res": {"Yes": 0.9999974223173222, "No": 2.3450074009865225e-06}, "ground_truth": 0}, {"key": "34404510", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999984951481323, "res": {"Yes": 0.9999984951481323, "No": 1.4008897324833765e-06}, "ground_truth": 1}, {"key": "34404510", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999951574563252, "res": {"Yes": 0.9999951574563252, "No": 4.671646401093652e-06}, "ground_truth": 0}, {"key": "34404510", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9989112152566944, "res": {"Yes": 0.9989112152566944, "No": 0.0010887699567499957}, "ground_truth": 0}, {"key": "35568692", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.00886590728570191, "res": {"No": 0.9911264147225013, "Yes": 0.00886590728570191}, "ground_truth": 0}, {"key": "35568692", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.05964903549343719, "res": {"No": 0.9403500870708188, "Yes": 0.05964903549343719}, "ground_truth": 0}, {"key": "35568692", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9993053214409541, "res": {"Yes": 0.9993053214409541, "No": 0.0006946312005033985}, "ground_truth": 1}, {"key": "35568692", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9989471433429107, "res": {"Yes": 0.9989471433429107, "No": 0.001052564196466873}, "ground_truth": 0}, {"key": "35568692", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9950817722843965, "res": {"Yes": 0.9950817722843965, "No": 0.004917895420767189}, "ground_truth": 0}, {"key": "39151664", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9890880865960131, "res": {"Yes": 0.9890880865960131, "No": 0.010911587292842589}, "ground_truth": 0}, {"key": "39151664", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.169393799525332, "res": {"No": 0.830604599697359, "Yes": 0.169393799525332}, "ground_truth": 0}, {"key": "39151664", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9958904135571, "res": {"Yes": 0.9958904135571, "No": 0.004109306497963832}, "ground_truth": 1}, {"key": "39151664", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6229271694486739, "res": {"Yes": 0.6229271694486739, "No": 0.37706850393146935}, "ground_truth": 0}, {"key": "39151664", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.0016490844893467134, "res": {"No": 0.9983502250772646, "Yes": 0.0016490844893467134}, "ground_truth": 0}, {"key": "37493670", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9981346202873852, "res": {"Yes": 0.9981346202873852, "No": 0.001865127805156989}, "ground_truth": 0}, {"key": "37493670", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999881244990143, "res": {"Yes": 0.9999881244990143, "No": 1.1799221538292541e-05}, "ground_truth": 0}, {"key": "37493670", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999931310055916, "res": {"Yes": 0.9999931310055916, "No": 6.717745169422202e-06}, "ground_truth": 1}, {"key": "37493670", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999607083559369, "res": {"Yes": 0.9999607083559369, "No": 3.916747010262707e-05}, "ground_truth": 0}, {"key": "37493670", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999909853566321, "res": {"Yes": 0.9999909853566321, "No": 8.862452728490745e-06}, "ground_truth": 0}, {"key": "21935983", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9999765619466755, "res": {"Yes": 0.9999765619466755, "No": 2.338181440329752e-05}, "ground_truth": 0}, {"key": "21935983", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9981236961630001, "res": {"Yes": 0.9981236961630001, "No": 0.0018762681528377144}, "ground_truth": 0}, {"key": "21935983", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999959918780326, "res": {"Yes": 0.9999959918780326, "No": 3.913059535714873e-06}, "ground_truth": 1}, {"key": "21935983", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997645444240161, "res": {"Yes": 0.9997645444240161, "No": 0.0002353407721349778}, "ground_truth": 0}, {"key": "21935983", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9977883183799597, "res": {"Yes": 0.9977883183799597, "No": 0.0022113493149743544}, "ground_truth": 0}, {"key": "38174214", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9999759659438225, "res": {"Yes": 0.9999759659438225, "No": 2.3907627683669225e-05}, "ground_truth": 0}, {"key": "38174214", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.999776581231691, "res": {"Yes": 0.999776581231691, "No": 0.00022339166093058145}, "ground_truth": 0}, {"key": "38174214", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999920581810364, "res": {"Yes": 0.9999920581810364, "No": 7.918537723939709e-06}, "ground_truth": 1}, {"key": "38174214", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.99904457051368, "res": {"Yes": 0.99904457051368, "No": 0.0009552882791984473}, "ground_truth": 0}, {"key": "38174214", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9962485412696138, "res": {"Yes": 0.9962485412696138, "No": 0.0037514156633957985}, "ground_truth": 0}, {"key": "40319923", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.12383583522441968, "res": {"No": 0.8761637817969395, "Yes": 0.12383583522441968}, "ground_truth": 0}, {"key": "40319923", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.23393387243358602, "res": {"No": 0.7660658476090888, "Yes": 0.23393387243358602}, "ground_truth": 0}, {"key": "40319923", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9962500776852988, "res": {"Yes": 0.9962500776852988, "No": 0.0037497335307172153}, "ground_truth": 1}, {"key": "40319923", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6805742308724272, "res": {"Yes": 0.6805742308724272, "No": 0.3194256307068025}, "ground_truth": 0}, {"key": "40319923", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.7941008028998056, "res": {"Yes": 0.7941008028998056, "No": 0.20589857207782553}, "ground_truth": 0}, {"key": "36478264", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.8567559786172011, "res": {"Yes": 0.8567559786172011, "No": 0.1432421964377868}, "ground_truth": 0}, {"key": "36478264", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998449838462458, "res": {"Yes": 0.9998449838462458, "No": 0.00015496504230463806}, "ground_truth": 0}, {"key": "36478264", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999912237625114, "res": {"Yes": 0.9999912237625114, "No": 8.688649841287239e-06}, "ground_truth": 1}, {"key": "36478264", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999908661547138, "res": {"Yes": 0.9999908661547138, "No": 9.015339883707563e-06}, "ground_truth": 0}, {"key": "36478264", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997724100525223, "res": {"Yes": 0.9997724100525223, "No": 0.00022754216410971536}, "ground_truth": 0}, {"key": "11935769", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.005956632347304909, "res": {"No": 0.9940432734651303, "Yes": 0.005956632347304909}, "ground_truth": 0}, {"key": "11935769", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999726283256111, "res": {"Yes": 0.9999726283256111, "No": 2.7272764257473532e-05}, "ground_truth": 0}, {"key": "11935769", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999475965531086, "res": {"Yes": 0.9999475965531086, "No": 5.232187574828147e-05}, "ground_truth": 1}, {"key": "11935769", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999866940725246, "res": {"Yes": 0.9999866940725246, "No": 1.3224002611020216e-05}, "ground_truth": 0}, {"key": "11935769", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997811099699191, "res": {"Yes": 0.9997811099699191, "No": 0.000218774508531786}, "ground_truth": 0}, {"key": "33373410", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 9.195403539669257e-08, "res": {"No": 0.9999996871837189, "Yes": 9.195403539669257e-08}, "ground_truth": 0}, {"key": "33373410", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9830064011546176, "res": {"Yes": 0.9830064011546176, "No": 0.016993562213022055}, "ground_truth": 0}, {"key": "33373410", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9968053016433704, "res": {"Yes": 0.9968053016433704, "No": 0.0031945948714201685}, "ground_truth": 1}, {"key": "33373410", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9940357332778815, "res": {"Yes": 0.9940357332778815, "No": 0.005964160326877954}, "ground_truth": 0}, {"key": "33373410", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.8806688353708679, "res": {"Yes": 0.8806688353708679, "No": 0.11933070579137356}, "ground_truth": 0}, {"key": "11130680", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.005968525303086703, "res": {"No": 0.9940313787150828, "Yes": 0.005968525303086703}, "ground_truth": 0}, {"key": "11130680", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.0036869073089901296, "res": {"No": 0.996313035307165, "Yes": 0.0036869073089901296}, "ground_truth": 0}, {"key": "11130680", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9852887722292709, "res": {"Yes": 0.9852887722292709, "No": 0.014711187797618246}, "ground_truth": 1}, {"key": "11130680", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.018264417697975096, "res": {"No": 0.981735544352131, "Yes": 0.018264417697975096}, "ground_truth": 0}, {"key": "11130680", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9971318477304355, "res": {"Yes": 0.9971318477304355, "No": 0.002868161634264487}, "ground_truth": 0}, {"key": "34868650", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.8598273291085026, "res": {"Yes": 0.8598273291085026, "No": 0.14017246803797379}, "ground_truth": 0}, {"key": "34868650", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999839524287637, "res": {"Yes": 0.9999839524287637, "No": 1.5939472149299202e-05}, "ground_truth": 0}, {"key": "34868650", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999865748701179, "res": {"Yes": 0.9999865748701179, "No": 1.329507356501876e-05}, "ground_truth": 1}, {"key": "34868650", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999619003488102, "res": {"Yes": 0.9999619003488102, "No": 3.803042730839739e-05}, "ground_truth": 0}, {"key": "34868650", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999933694113825, "res": {"Yes": 0.9999933694113825, "No": 6.52296846235174e-06}, "ground_truth": 0}, {"key": "33960561", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.021199232516740975, "res": {"No": 0.9788007217807179, "Yes": 0.021199232516740975}, "ground_truth": 0}, {"key": "33960561", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9993013905113217, "res": {"Yes": 0.9993013905113217, "No": 0.0006985743098706243}, "ground_truth": 0}, {"key": "33960561", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999932502087799, "res": {"Yes": 0.9999932502087799, "No": 6.70430299268203e-06}, "ground_truth": 1}, {"key": "33960561", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997658553661344, "res": {"Yes": 0.9997658553661344, "No": 0.000234037546581893}, "ground_truth": 0}, {"key": "33960561", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9969998245482533, "res": {"Yes": 0.9969998245482533, "No": 0.0030001184732813403}, "ground_truth": 0}, {"key": "22504858", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 7.870491123320529e-05, "res": {"No": 0.9999211350800014, "Yes": 7.870491123320529e-05}, "ground_truth": 0}, {"key": "22504858", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999038520625092, "res": {"Yes": 0.9999038520625092, "No": 9.609502618376793e-05}, "ground_truth": 0}, {"key": "22504858", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999146986083706, "res": {"Yes": 0.9999146986083706, "No": 8.522651189982314e-05}, "ground_truth": 1}, {"key": "22504858", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995902644045653, "res": {"Yes": 0.9995902644045653, "No": 0.0004096393579950959}, "ground_truth": 0}, {"key": "22504858", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9922765423667339, "res": {"Yes": 0.9922765423667339, "No": 0.00772328104041108}, "ground_truth": 0}, {"key": "32283530", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 4.159631788451589e-06, "res": {"No": 0.9999956342685299, "Yes": 4.159631788451589e-06}, "ground_truth": 0}, {"key": "32283530", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9986579313806563, "res": {"Yes": 0.9986579313806563, "No": 0.0013420821871771753}, "ground_truth": 0}, {"key": "32283530", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.005526381597184484, "res": {"No": 0.9944736445956561, "Yes": 0.005526381597184484}, "ground_truth": 1}, {"key": "32283530", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.99937675252944, "res": {"Yes": 0.99937675252944, "No": 0.0006231784148495212}, "ground_truth": 0}, {"key": "32283530", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.027747725669618705, "res": {"No": 0.9722521670954363, "Yes": 0.027747725669618705}, "ground_truth": 0}, {"key": "38377099", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 3.1229674355787724e-07, "res": {"No": 0.9999994487765019, "Yes": 3.1229674355787724e-07}, "ground_truth": 0}, {"key": "38377099", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999936078174301, "res": {"Yes": 0.9999936078174301, "No": 6.3208301852919556e-06}, "ground_truth": 0}, {"key": "38377099", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999975415208221, "res": {"Yes": 0.9999975415208221, "No": 2.4049630941309217e-06}, "ground_truth": 1}, {"key": "38377099", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, "No": 1.0892055289531344e-06}, "ground_truth": 0}, {"key": "38377099", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9994133002756301, "res": {"Yes": 0.9994133002756301, "No": 0.0005866861787032459}, "ground_truth": 0}, {"key": "36105123", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9998795371462323, "res": {"Yes": 0.9998795371462323, "No": 0.00012032983027636378}, "ground_truth": 0}, {"key": "36105123", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 2.9233974931773092e-05, "res": {"No": 0.9999706019221319, "Yes": 2.9233974931773092e-05}, "ground_truth": 0}, {"key": "36105123", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9023206267972349, "res": {"Yes": 0.9023206267972349, "No": 0.09767914522361193}, "ground_truth": 1}, {"key": "36105123", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5677679400459287, "res": {"Yes": 0.5677679400459287, "No": 0.4322317656671148}, "ground_truth": 0}, {"key": "36105123", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 7.318706157894122e-06, "res": {"No": 0.9999924157887603, "Yes": 7.318706157894122e-06}, "ground_truth": 0}, {"key": "33527826", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9481186114408731, "res": {"Yes": 0.9481186114408731, "No": 0.05188129284152685}, "ground_truth": 0}, {"key": "33527826", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999789459686392, "res": {"Yes": 0.9999789459686392, "No": 2.093812286594965e-05}, "ground_truth": 0}, {"key": "33527826", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999972151525776, "res": {"Yes": 0.999972151525776, "No": 2.7722145157571086e-05}, "ground_truth": 1}, {"key": "33527826", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999619003488102, "res": {"Yes": 0.9999619003488102, "No": 3.805595428846532e-05}, "ground_truth": 0}, {"key": "33527826", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9995336894065956, "res": {"Yes": 0.9995336894065956, "No": 0.0004661663561952805}, "ground_truth": 0}, {"key": "32349891", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9988258228860326, "res": {"Yes": 0.9988258228860326, "No": 0.00117414471331282}, "ground_truth": 0}, {"key": "32349891", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.986206739903291, "res": {"Yes": 0.986206739903291, "No": 0.013793130508449011}, "ground_truth": 1}, {"key": "32349891", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999983759447187, "res": {"Yes": 0.9999983759447187, "No": 1.5853045880189755e-06}, "ground_truth": 0}, {"key": "32349891", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999889589149532, "res": {"Yes": 0.9999889589149532, "No": 1.102202515777931e-05}, "ground_truth": 0}, {"key": "34281974", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9398279635197693, "res": {"Yes": 0.9398279635197693, "No": 0.06017179247594864}, "ground_truth": 0}, {"key": "34281974", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999971839107652, "res": {"Yes": 0.9999971839107652, "No": 2.7072126359789044e-06}, "ground_truth": 0}, {"key": "34281974", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 1.7414901433719514e-07}, "ground_truth": 1}, {"key": "34281974", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 1.7600877239446816e-07}, "ground_truth": 0}, {"key": "34281974", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 1.1499548157583181e-07}, "ground_truth": 0}, {"key": "29387866", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9720465594501693, "res": {"Yes": 0.9720465594501693, "No": 0.027953255788312176}, "ground_truth": 0}, {"key": "29387866", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999824028078323, "res": {"Yes": 0.9999824028078323, "No": 1.7501201889379723e-05}, "ground_truth": 0}, {"key": "29387866", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996130158676828, "res": {"Yes": 0.9996130158676828, "No": 0.0003869682150349966}, "ground_truth": 1}, {"key": "29387866", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999783499623655, "res": {"Yes": 0.9999783499623655, "No": 2.1579516573152714e-05}, "ground_truth": 0}, {"key": "29387866", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999891973193493, "res": {"Yes": 0.9999891973193493, "No": 1.0666719401012027e-05}, "ground_truth": 0}, {"key": "35731925", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.4053279445128044, "res": {"No": 0.5946666812421205, "Yes": 0.4053279445128044}, "ground_truth": 0}, {"key": "35731925", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9842946369054348, "res": {"Yes": 0.9842946369054348, "No": 0.01570180979035985}, "ground_truth": 0}, {"key": "35731925", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2291415142805104, "res": {"No": 0.7708557173199041, "Yes": 0.2291415142805104}, "ground_truth": 1}, {"key": "35731925", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9233760832425365, "res": {"Yes": 0.9233760832425365, "No": 0.07662094547438558}, "ground_truth": 0}, {"key": "35731925", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.7105742734735486, "res": {"Yes": 0.7105742734735486, "No": 0.2894214351424925}, "ground_truth": 0}, {"key": "38829733", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.0003711967175123006, "res": {"No": 0.9996283836364722, "Yes": 0.0003711967175123006}, "ground_truth": 0}, {"key": "38829733", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999471197682087, "res": {"Yes": 0.9999471197682087, "No": 5.2783152479349404e-05}, "ground_truth": 0}, {"key": "38829733", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 6.477798687638606e-08}, "ground_truth": 1}, {"key": "38829733", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999590395729012, "res": {"Yes": 0.9999590395729012, "No": 4.093136165978561e-05}, "ground_truth": 0}, {"key": "38829733", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998659569845679, "res": {"Yes": 0.9998659569845679, "No": 0.0001339170472603509}, "ground_truth": 0}, {"key": "24624736", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9958158144991189, "res": {"Yes": 0.9958158144991189, "No": 0.0041841470794715206}, "ground_truth": 0}, {"key": "24624736", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999942038320978, "res": {"Yes": 0.9999942038320978, "No": 5.698930684119478e-06}, "ground_truth": 0}, {"key": "24624736", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9992074785511471, "res": {"Yes": 0.9992074785511471, "No": 0.0007924627947520332}, "ground_truth": 1}, {"key": "24624736", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999933694113825, "res": {"Yes": 0.9999933694113825, "No": 6.510462888808019e-06}, "ground_truth": 0}, {"key": "24624736", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999279291732268, "res": {"Yes": 0.9999279291732268, "No": 7.197526785301628e-05}, "ground_truth": 0}, {"key": "36928562", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.7945200399304961, "res": {"Yes": 0.7945200399304961, "No": 0.20547904932495245}, "ground_truth": 0}, {"key": "36928562", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.004650992692076305, "res": {"No": 0.9953488506522961, "Yes": 0.004650992692076305}, "ground_truth": 0}, {"key": "36928562", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.0004686686230081012, "res": {"No": 0.9995311873270193, "Yes": 0.0004686686230081012}, "ground_truth": 1}, {"key": "36928562", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.003292235703020412, "res": {"No": 0.9967074659176798, "Yes": 0.003292235703020412}, "ground_truth": 0}, {"key": "36928562", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 1.4233632512488271e-06, "res": {"No": 0.9999983759447187, "Yes": 1.4233632512488271e-06}, "ground_truth": 0}, {"key": "34941119", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.7944035797739984, "res": {"Yes": 0.7944035797739984, "No": 0.2055963291272428}, "ground_truth": 0}, {"key": "34941119", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999913429644723, "res": {"Yes": 0.9999913429644723, "No": 8.593160171860759e-06}, "ground_truth": 0}, {"key": "34941119", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999791843696483, "res": {"Yes": 0.9999791843696483, "No": 2.0739779227253438e-05}, "ground_truth": 1}, {"key": "34941119", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999959918780326, "res": {"Yes": 0.9999959918780326, "No": 3.918183125494932e-06}, "ground_truth": 0}, {"key": "34941119", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997944542874151, "res": {"Yes": 0.9997944542874151, "No": 0.00020544072673761777}, "ground_truth": 0}, {"key": "30206231", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9995662063856687, "res": {"Yes": 0.9995662063856687, "No": 0.0004336496100767569}, "ground_truth": 0}, {"key": "30206231", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999864556687252, "res": {"Yes": 0.9999864556687252, "No": 1.3508046676177033e-05}, "ground_truth": 0}, {"key": "30206231", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999970647075079, "res": {"Yes": 0.9999970647075079, "No": 2.838195769796536e-06}, "ground_truth": 1}, {"key": "30206231", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999785883642324, "res": {"Yes": 0.9999785883642324, "No": 2.1374968859671187e-05}, "ground_truth": 0}, {"key": "30206231", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999907469518097, "res": {"Yes": 0.9999907469518097, "No": 9.210384135900567e-06}, "ground_truth": 0}, {"key": "35584972", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9998806098475715, "res": {"Yes": 0.9998806098475715, "No": 0.00011933427132466741}, "ground_truth": 0}, {"key": "35584972", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9689232747992407, "res": {"Yes": 0.9689232747992407, "No": 0.031076555081174174}, "ground_truth": 0}, {"key": "35584972", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998186442669394, "res": {"Yes": 0.9998186442669394, "No": 0.00018127071157486845}, "ground_truth": 1}, {"key": "35584972", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997527570895672, "res": {"Yes": 0.9997527570895672, "No": 0.0002471087586072389}, "ground_truth": 0}, {"key": "35584972", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9955972877789242, "res": {"Yes": 0.9955972877789242, "No": 0.004402654607921916}, "ground_truth": 0}, {"key": "39277709", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9240336631717719, "res": {"Yes": 0.9240336631717719, "No": 0.07596629681614792}, "ground_truth": 0}, {"key": "39277709", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.996046507311323, "res": {"Yes": 0.996046507311323, "No": 0.003953494530151875}, "ground_truth": 0}, {"key": "39277709", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6596578747193764, "res": {"Yes": 0.6596578747193764, "No": 0.3403421308886318}, "ground_truth": 1}, {"key": "39277709", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9993828244812086, "res": {"Yes": 0.9993828244812086, "No": 0.0006170669691484841}, "ground_truth": 0}, {"key": "39277709", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.08003328246600672, "res": {"No": 0.9199664564354477, "Yes": 0.08003328246600672}, "ground_truth": 0}, {"key": "36123657", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.5742986050912503, "res": {"Yes": 0.5742986050912503, "No": 0.42570121160694074}, "ground_truth": 0}, {"key": "36123657", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9983291903591257, "res": {"Yes": 0.9983291903591257, "No": 0.0016707555996501309}, "ground_truth": 0}, {"key": "36123657", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9983662620052377, "res": {"Yes": 0.9983662620052377, "No": 0.0016333527089199087}, "ground_truth": 1}, {"key": "36123657", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999012298380936, "res": {"Yes": 0.9999012298380936, "No": 9.870627865719691e-05}, "ground_truth": 0}, {"key": "36123657", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.05447662327848566, "res": {"No": 0.9455223432644618, "Yes": 0.05447662327848566}, "ground_truth": 0}, {"key": "33363938", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 3.688484433146834e-07, "res": {"No": 0.9999995679800934, "Yes": 3.688484433146834e-07}, "ground_truth": 0}, {"key": "33363938", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.00014806947036102004, "res": {"No": 0.9998515353819624, "Yes": 0.00014806947036102004}, "ground_truth": 0}, {"key": "33363938", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.00026066486965955804, "res": {"No": 0.9997391750207507, "Yes": 0.00026066486965955804}, "ground_truth": 1}, {"key": "33363938", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.015721391155494674, "res": {"No": 0.9842784680302378, "Yes": 0.015721391155494674}, "ground_truth": 0}, {"key": "33363938", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.00029432381063508226, "res": {"No": 0.9997055762511812, "Yes": 0.00029432381063508226}, "ground_truth": 0}, {"key": "37349129", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.00016659030886525115, "res": {"No": 0.9998327078448934, "Yes": 0.00016659030886525115}, "ground_truth": 0}, {"key": "37349129", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9877200140350906, "res": {"Yes": 0.9877200140350906, "No": 0.012279804212472167}, "ground_truth": 0}, {"key": "37349129", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8821151817044761, "res": {"Yes": 0.8821151817044761, "No": 0.11788429339894306}, "ground_truth": 1}, {"key": "37349129", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.023064934540985214, "res": {"No": 0.9769335241913836, "Yes": 0.023064934540985214}, "ground_truth": 0}, {"key": "37349129", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9403639009150603, "res": {"Yes": 0.9403639009150603, "No": 0.05963602745737396}, "ground_truth": 0}, {"key": "37160199", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9655063212588428, "res": {"Yes": 0.9655063212588428, "No": 0.034493606188145405}, "ground_truth": 0}, {"key": "37160199", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999704827216435, "res": {"Yes": 0.9999704827216435, "No": 2.930381047663424e-05}, "ground_truth": 0}, {"key": "37160199", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9950625322658112, "res": {"Yes": 0.9950625322658112, "No": 0.0049374266725218064}, "ground_truth": 1}, {"key": "37160199", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999008722634788, "res": {"Yes": 0.9999008722634788, "No": 9.898868572171783e-05}, "ground_truth": 0}, {"key": "37160199", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.99886305011783, "res": {"Yes": 0.99886305011783, "No": 0.0011369170750351553}, "ground_truth": 0}, {"key": "35891053", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.0001887925055916917, "res": {"No": 0.9998111395063888, "Yes": 0.0001887925055916917}, "ground_truth": 0}, {"key": "35891053", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9974065460276136, "res": {"Yes": 0.9974065460276136, "No": 0.0025934246966754595}, "ground_truth": 0}, {"key": "35891053", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999354384941838, "res": {"Yes": 0.9999354384941838, "No": 6.452608482662219e-05}, "ground_truth": 1}, {"key": "35891053", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994867755047696, "res": {"Yes": 0.9994867755047696, "No": 0.0005131990891851454}, "ground_truth": 0}, {"key": "35891053", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999975415208221, "res": {"Yes": 0.9999975415208221, "No": 2.3975719984587505e-06}, "ground_truth": 0}, {"key": "40694542", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9939392611256251, "res": {"Yes": 0.9939392611256251, "No": 0.006060521660591203}, "ground_truth": 0}, {"key": "40694542", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9953768433411765, "res": {"Yes": 0.9953768433411765, "No": 0.004622997383272012}, "ground_truth": 0}, {"key": "40694542", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9990009168162707, "res": {"Yes": 0.9990009168162707, "No": 0.000999037742249338}, "ground_truth": 1}, {"key": "40694542", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999400871468467, "res": {"Yes": 0.9999400871468467, "No": 5.983624361990142e-05}, "ground_truth": 0}, {"key": "40694542", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999647611309035, "res": {"Yes": 0.9999647611309035, "No": 3.513562447115163e-05}, "ground_truth": 0}, {"key": "24645770", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9983831394297155, "res": {"Yes": 0.9983831394297155, "No": 0.0016168895600637053}, "ground_truth": 0}, {"key": "24645770", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999663107274963, "res": {"Yes": 0.9999663107274963, "No": 3.366152716196911e-05}, "ground_truth": 0}, {"key": "24645770", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999831180165023, "res": {"Yes": 0.9999831180165023, "No": 1.677177741449138e-05}, "ground_truth": 1}, {"key": "24645770", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998941975374753, "res": {"Yes": 0.9998941975374753, "No": 0.0001056895008212865}, "ground_truth": 0}, {"key": "24645770", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996438634816706, "res": {"Yes": 0.9996438634816706, "No": 0.0003560163148257864}, "ground_truth": 0}, {"key": "37974587", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9991727285079002, "res": {"Yes": 0.9991727285079002, "No": 0.0008272050080577627}, "ground_truth": 0}, {"key": "37974587", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999737011318213, "res": {"Yes": 0.9999737011318213, "No": 2.6234505758915162e-05}, "ground_truth": 0}, {"key": "37974587", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9992793680799934, "res": {"Yes": 0.9992793680799934, "No": 0.0007206099152722078}, "ground_truth": 1}, {"key": "37974587", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9729842694377964, "res": {"Yes": 0.9729842694377964, "No": 0.02701558924912644}, "ground_truth": 0}, {"key": "37974587", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 3.31664111338035e-06, "res": {"No": 0.9999965878943212, "Yes": 3.31664111338035e-06}, "ground_truth": 0}, {"key": "40354149", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.007503530018773972, "res": {"No": 0.9924960750878963, "Yes": 0.007503530018773972}, "ground_truth": 0}, {"key": "40354149", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.09446687830942545, "res": {"No": 0.9055317579438233, "Yes": 0.09446687830942545}, "ground_truth": 0}, {"key": "40354149", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.03897658881077557, "res": {"No": 0.9610221099927241, "Yes": 0.03897658881077557}, "ground_truth": 1}, {"key": "40354149", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.0041689002790606805, "res": {"No": 0.9958310656346856, "Yes": 0.0041689002790606805}, "ground_truth": 0}, {"key": "40354149", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.036161480545756235, "res": {"No": 0.9638378412424693, "Yes": 0.036161480545756235}, "ground_truth": 0}, {"key": "35519470", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9991598762717193, "res": {"Yes": 0.9991598762717193, "No": 0.0008400190397623862}, "ground_truth": 0}, {"key": "35519470", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999946806438478, "res": {"Yes": 0.9999946806438478, "No": 5.236145091133651e-06}, "ground_truth": 0}, {"key": "35519470", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999984951481323, "res": {"Yes": 0.9999984951481323, "No": 1.4164780694575948e-06}, "ground_truth": 1}, {"key": "35519470", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "No": 9.602645668344985e-07}, "ground_truth": 0}, {"key": "35519470", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 4.4169201340471994e-07}, "ground_truth": 0}, {"key": "36185624", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 1.1890212205833864e-05, "res": {"No": 0.999988005296937, "Yes": 1.1890212205833864e-05}, "ground_truth": 0}, {"key": "36185624", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.936442651524268, "res": {"Yes": 0.936442651524268, "No": 0.06355724392740959}, "ground_truth": 0}, {"key": "36185624", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9353666383289019, "res": {"Yes": 0.9353666383289019, "No": 0.06463317209932713}, "ground_truth": 1}, {"key": "36185624", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9990040093282954, "res": {"Yes": 0.9990040093282954, "No": 0.0009959659377606186}, "ground_truth": 0}, {"key": "36185624", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.99934972885798, "res": {"Yes": 0.99934972885798, "No": 0.0006501867036073795}, "ground_truth": 0}, {"key": "39306113", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.21477107189954225, "res": {"No": 0.7852283749551672, "Yes": 0.21477107189954225}, "ground_truth": 0}, {"key": "39306113", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.985830766027247, "res": {"Yes": 0.985830766027247, "No": 0.014169167186685615}, "ground_truth": 0}, {"key": "39306113", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.994465861477581, "res": {"Yes": 0.994465861477581, "No": 0.005534079676988284}, "ground_truth": 1}, {"key": "39306113", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999769195503577, "res": {"Yes": 0.9999769195503577, "No": 2.2945860685787626e-05}, "ground_truth": 0}, {"key": "39306113", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9844718337623823, "res": {"Yes": 0.9844718337623823, "No": 0.015528012936621753}, "ground_truth": 0}, {"key": "19347718", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 9.407630835460764e-07, "res": {"No": 0.9999989719621284, "Yes": 9.407630835460764e-07}, "ground_truth": 0}, {"key": "19347718", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999760851449647, "res": {"Yes": 0.9999760851449647, "No": 2.3856066795507297e-05}, "ground_truth": 0}, {"key": "19347718", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999440206399028, "res": {"Yes": 0.9999440206399028, "No": 5.587935870107954e-05}, "ground_truth": 1}, {"key": "19347718", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.010005670466983198, "res": {"No": 0.9899942509416593, "Yes": 0.010005670466983198}, "ground_truth": 0}, {"key": "19347718", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998420042226477, "res": {"Yes": 0.9998420042226477, "No": 0.00015797638470508747}, "ground_truth": 0}, {"key": "21870064", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9992762710582184, "res": {"Yes": 0.9992762710582184, "No": 0.0007236269216358512}, "ground_truth": 0}, {"key": "21870064", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999984951481323, "res": {"Yes": 0.9999984951481323, "No": 1.45478999658748e-06}, "ground_truth": 0}, {"key": "21870064", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999919389784903, "res": {"Yes": 0.9999919389784903, "No": 8.038363823685356e-06}, "ground_truth": 1}, {"key": "21870064", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 4.346468389851785e-07}, "ground_truth": 0}, {"key": "21870064", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 1.1378353606699448e-07}, "ground_truth": 0}, {"key": "37675776", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9984415992694788, "res": {"Yes": 0.9984415992694788, "No": 0.001558364352126167}, "ground_truth": 0}, {"key": "37675776", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.996968426128403, "res": {"Yes": 0.996968426128403, "No": 0.00303129123521836}, "ground_truth": 0}, {"key": "37675776", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998582098131789, "res": {"Yes": 0.9998582098131789, "No": 0.00014156763064171473}, "ground_truth": 1}, {"key": "37675776", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998006516225288, "res": {"Yes": 0.9998006516225288, "No": 0.0001993006638186888}, "ground_truth": 0}, {"key": "37675776", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9969017210032685, "res": {"Yes": 0.9969017210032685, "No": 0.003098248274334999}, "ground_truth": 0}, {"key": "38107017", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9379587101262031, "res": {"Yes": 0.9379587101262031, "No": 0.062041156478181224}, "ground_truth": 0}, {"key": "38107017", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999785883642324, "res": {"Yes": 0.9999785883642324, "No": 2.1341564537363882e-05}, "ground_truth": 0}, {"key": "38107017", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999947998470209, "res": {"Yes": 0.9999947998470209, "No": 5.09133745816114e-06}, "ground_truth": 1}, {"key": "38107017", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999973031140366, "res": {"Yes": 0.9999973031140366, "No": 2.627767320195737e-06}, "ground_truth": 0}, {"key": "38107017", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999609467545978, "res": {"Yes": 0.9999609467545978, "No": 3.900955273129118e-05}, "ground_truth": 0}, {"key": "40046472", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9974283643349613, "res": {"Yes": 0.9974283643349613, "No": 0.0025716411605791432}, "ground_truth": 0}, {"key": "40046472", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, "No": 1.040756193387761e-06}, "ground_truth": 0}, {"key": "40046472", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "No": 9.410486484742195e-07}, "ground_truth": 1}, {"key": "40046472", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 8.538126898014344e-07}, "ground_truth": 0}, {"key": "40046472", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9992987697769106, "res": {"Yes": 0.9992987697769106, "No": 0.0007009885534151649}, "ground_truth": 0}, {"key": "32157820", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.003054551822703123, "res": {"No": 0.9969454441774391, "Yes": 0.003054551822703123}, "ground_truth": 0}, {"key": "32157820", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.8369897371812768, "res": {"Yes": 0.8369897371812768, "No": 0.16300979303675703}, "ground_truth": 0}, {"key": "32157820", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9829159923156504, "res": {"Yes": 0.9829159923156504, "No": 0.01708396456787058}, "ground_truth": 1}, {"key": "32157820", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5380730754877506, "res": {"Yes": 0.5380730754877506, "No": 0.4619268378835559}, "ground_truth": 0}, {"key": "32157820", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.000431201094546312, "res": {"No": 0.999568589424315, "Yes": 0.000431201094546312}, "ground_truth": 0}, {"key": "41004037", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.0001872514859408065, "res": {"No": 0.9998124504896088, "Yes": 0.0001872514859408065}, "ground_truth": 0}, {"key": "41004037", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999397295583361, "res": {"Yes": 0.9999397295583361, "No": 6.008237624153481e-05}, "ground_truth": 0}, {"key": "41004037", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999987335551019, "res": {"Yes": 0.9999987335551019, "No": 1.2034718140270358e-06}, "ground_truth": 1}, {"key": "41004037", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 6.857964317353664e-07}, "ground_truth": 0}, {"key": "41004037", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999797803764193, "res": {"Yes": 0.9999797803764193, "No": 2.0117670495048745e-05}, "ground_truth": 0}, {"key": "21387993", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9999048055962748, "res": {"Yes": 0.9999048055962748, "No": 9.507561838406763e-05}, "ground_truth": 0}, {"key": "21387993", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 5.336668868670816e-07}, "ground_truth": 0}, {"key": "21387993", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 8.108319642864835e-07}, "ground_truth": 1}, {"key": "21387993", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999919389784903, "res": {"Yes": 0.9999919389784903, "No": 7.943043847969402e-06}, "ground_truth": 0}, {"key": "21387993", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 2.9078606847999663e-07}, "ground_truth": 0}, {"key": "34665539", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.022886668488919182, "res": {"No": 0.9771127434524105, "Yes": 0.022886668488919182}, "ground_truth": 0}, {"key": "34665539", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.984514233908357, "res": {"Yes": 0.984514233908357, "No": 0.01548567307877746}, "ground_truth": 0}, {"key": "34665539", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999557020111849, "res": {"Yes": 0.9999557020111849, "No": 4.427244869543889e-05}, "ground_truth": 1}, {"key": "34665539", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9865538688291816, "res": {"Yes": 0.9865538688291816, "No": 0.013446053984928312}, "ground_truth": 0}, {"key": "34665539", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9988567469123265, "res": {"Yes": 0.9988567469123265, "No": 0.0011432071236999334}, "ground_truth": 0}, {"key": "37872111", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.09818740799504642, "res": {"No": 0.9018097331755541, "Yes": 0.09818740799504642}, "ground_truth": 0}, {"key": "37872111", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.3173824072880102, "res": {"No": 0.6826174696705508, "Yes": 0.3173824072880102}, "ground_truth": 0}, {"key": "37872111", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.0031679455268032388, "res": {"No": 0.9968320719069753, "Yes": 0.0031679455268032388}, "ground_truth": 1}, {"key": "37872111", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.2394547776003997, "res": {"No": 0.7605445949610123, "Yes": 0.2394547776003997}, "ground_truth": 0}, {"key": "37872111", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.009163760038219886, "res": {"No": 0.9908359640838537, "Yes": 0.009163760038219886}, "ground_truth": 0}, {"key": "36629542", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.6418966946499914, "res": {"Yes": 0.6418966946499914, "No": 0.3581030087608721}, "ground_truth": 0}, {"key": "36629542", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.999985978860297, "res": {"Yes": 0.999985978860297, "No": 1.3933263765790305e-05}, "ground_truth": 0}, {"key": "36629542", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9659837469641829, "res": {"Yes": 0.9659837469641829, "No": 0.03401621351306765}, "ground_truth": 1}, {"key": "36629542", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999803763825457, "res": {"Yes": 0.9999803763825457, "No": 1.9549281971654293e-05}, "ground_truth": 0}, {"key": "36629542", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9497357556750224, "res": {"Yes": 0.9497357556750224, "No": 0.05026412384469185}, "ground_truth": 0}, {"key": "36487527", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9999801379802525, "res": {"Yes": 0.9999801379802525, "No": 1.9817325056541288e-05}, "ground_truth": 0}, {"key": "36487527", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999146986083706, "res": {"Yes": 0.9999146986083706, "No": 8.516959960069833e-05}, "ground_truth": 0}, {"key": "36487527", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999883629027115, "res": {"Yes": 0.9999883629027115, "No": 1.1533900378631557e-05}, "ground_truth": 1}, {"key": "36487527", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999852727245697, "res": {"Yes": 0.999852727245697, "No": 0.00014713735789844882}, "ground_truth": 0}, {"key": "36487527", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.999933054575945, "res": {"Yes": 0.999933054575945, "No": 6.684092602362122e-05}, "ground_truth": 0}, {"key": "37344756", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9717548036148702, "res": {"Yes": 0.9717548036148702, "No": 0.028244839800602033}, "ground_truth": 0}, {"key": "37344756", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.8089511637003207, "res": {"Yes": 0.8089511637003207, "No": 0.19104862541724205}, "ground_truth": 0}, {"key": "37344756", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999931147444446, "res": {"Yes": 0.999931147444446, "No": 6.873637657243995e-05}, "ground_truth": 1}, {"key": "37344756", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9010178412144327, "res": {"Yes": 0.9010178412144327, "No": 0.09898194690213934}, "ground_truth": 0}, {"key": "37344756", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.5337447556327423, "res": {"Yes": 0.5337447556327423, "No": 0.4662548993919835}, "ground_truth": 0}, {"key": "38707722", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.2750380299911386, "res": {"No": 0.7249605205673634, "Yes": 0.2750380299911386}, "ground_truth": 0}, {"key": "38707722", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998909793831794, "res": {"Yes": 0.9998909793831794, "No": 0.00010887827966833237}, "ground_truth": 0}, {"key": "38707722", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998840663508249, "res": {"Yes": 0.9998840663508249, "No": 0.00011588746118130655}, "ground_truth": 1}, {"key": "38707722", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9947448801096306, "res": {"Yes": 0.9947448801096306, "No": 0.005255064503698565}, "ground_truth": 0}, {"key": "38707722", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9972298769910841, "res": {"Yes": 0.9972298769910841, "No": 0.0027696065737706364}, "ground_truth": 0}, {"key": "37093419", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9992481890116072, "res": {"Yes": 0.9992481890116072, "No": 0.0007517404047961304}, "ground_truth": 0}, {"key": "37093419", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999654763299556, "res": {"Yes": 0.9999654763299556, "No": 3.438359489230314e-05}, "ground_truth": 1}, {"key": "37093419", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8935141481329149, "res": {"Yes": 0.8935141481329149, "No": 0.10648568670509709}, "ground_truth": 0}, {"key": "37093419", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9928835142661292, "res": {"Yes": 0.9928835142661292, "No": 0.007116302501744054}, "ground_truth": 0}, {"key": "35547391", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 2.7006015863854826e-06, "res": {"No": 0.999996945503965, "Yes": 2.7006015863854826e-06}, "ground_truth": 0}, {"key": "35547391", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.011540649926892287, "res": {"No": 0.9884589478261608, "Yes": 0.011540649926892287}, "ground_truth": 0}, {"key": "35547391", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999919389784903, "res": {"Yes": 0.9999919389784903, "No": 7.841887500213157e-06}, "ground_truth": 1}, {"key": "35547391", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9993728249265579, "res": {"Yes": 0.9993728249265579, "No": 0.0006271200928663243}, "ground_truth": 0}, {"key": "35547391", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9850993558635187, "res": {"Yes": 0.9850993558635187, "No": 0.014900596995817326}, "ground_truth": 0}, {"key": "37173168", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 2.2822241342825752e-05, "res": {"No": 0.9999769195503577, "Yes": 2.2822241342825752e-05}, "ground_truth": 0}, {"key": "37173168", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.051968074545183346, "res": {"No": 0.9480287122521032, "Yes": 0.051968074545183346}, "ground_truth": 0}, {"key": "37173168", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.009947231576931214, "res": {"No": 0.9900525593601122, "Yes": 0.009947231576931214}, "ground_truth": 1}, {"key": "37173168", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9480462793869001, "res": {"Yes": 0.9480462793869001, "No": 0.05195179549767431}, "ground_truth": 0}, {"key": "37173168", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.015130475863864937, "res": {"No": 0.9848678226671413, "Yes": 0.015130475863864937}, "ground_truth": 0}, {"key": "30725298", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 6.827883508623453e-08}, "ground_truth": 0}, {"key": "30725298", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 1.0, "res": {"Yes": 1.0, "No": 1.4102736223287484e-08}, "ground_truth": 0}, {"key": "30725298", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "No": 3.3780909557890195e-08}, "ground_truth": 1}, {"key": "30725298", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 9.682612429442979e-08}, "ground_truth": 0}, {"key": "30725298", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "No": 3.2696983859820686e-08}, "ground_truth": 0}, {"key": "33830573", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.003746884441194163, "res": {"No": 0.9962529132163053, "Yes": 0.003746884441194163}, "ground_truth": 0}, {"key": "33830573", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.5471661910195954, "res": {"Yes": 0.5471661910195954, "No": 0.4528334867459768}, "ground_truth": 0}, {"key": "33830573", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.002234557627583664, "res": {"No": 0.9977654107202606, "Yes": 0.002234557627583664}, "ground_truth": 1}, {"key": "33830573", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9986021674792586, "res": {"Yes": 0.9986021674792586, "No": 0.0013977939991473975}, "ground_truth": 0}, {"key": "33830573", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.28244427626830443, "res": {"No": 0.7175556524256597, "Yes": 0.28244427626830443}, "ground_truth": 0}, {"key": "33415474", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9426159041456076, "res": {"Yes": 0.9426159041456076, "No": 0.057383808346826944}, "ground_truth": 0}, {"key": "33415474", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997453683241494, "res": {"Yes": 0.9997453683241494, "No": 0.0002545796001880209}, "ground_truth": 0}, {"key": "33415474", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999925349918634, "res": {"Yes": 0.9999925349918634, "No": 7.368539792167102e-06}, "ground_truth": 1}, {"key": "33415474", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9670347435115288, "res": {"Yes": 0.9670347435115288, "No": 0.032965128115981214}, "ground_truth": 0}, {"key": "33415474", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9882512064616158, "res": {"Yes": 0.9882512064616158, "No": 0.011748527093430529}, "ground_truth": 0}, {"key": "37383994", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9997208262562438, "res": {"Yes": 0.9997208262562438, "No": 0.00027905713839396174}, "ground_truth": 0}, {"key": "37383994", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.8842146173526966, "res": {"Yes": 0.8842146173526966, "No": 0.11578522646551674}, "ground_truth": 0}, {"key": "37383994", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9986970426869437, "res": {"Yes": 0.9986970426869437, "No": 0.0013029022537002443}, "ground_truth": 1}, {"key": "37383994", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9991740386939829, "res": {"Yes": 0.9991740386939829, "No": 0.0008259353964766955}, "ground_truth": 0}, {"key": "37383994", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999180360292298, "res": {"Yes": 0.9999180360292298, "No": 8.190021024618983e-05}, "ground_truth": 0}, {"key": "38576819", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9864527137682837, "res": {"Yes": 0.9864527137682837, "No": 0.013547178257794576}, "ground_truth": 0}, {"key": "38576819", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999967070975216, "res": {"Yes": 0.9999967070975216, "No": 3.1846123101717943e-06}, "ground_truth": 0}, {"key": "38576819", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999763235462916, "res": {"Yes": 0.9999763235462916, "No": 2.3615223271816867e-05}, "ground_truth": 1}, {"key": "38576819", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999498613089862, "res": {"Yes": 0.9999498613089862, "No": 4.998677937496379e-05}, "ground_truth": 0}, {"key": "38576819", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999877668918251, "res": {"Yes": 0.9999877668918251, "No": 1.2099916575162693e-05}, "ground_truth": 0}, {"key": "34500226", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9999889589149532, "res": {"Yes": 0.9999889589149532, "No": 1.0931205162066597e-05}, "ground_truth": 0}, {"key": "34500226", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999768003491161, "res": {"Yes": 0.9999768003491161, "No": 2.3081849158839515e-05}, "ground_truth": 0}, {"key": "34500226", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999870516788303, "res": {"Yes": 0.9999870516788303, "No": 1.2819206549522812e-05}, "ground_truth": 1}, {"key": "34500226", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999882437011058, "res": {"Yes": 0.9999882437011058, "No": 1.1636114235027965e-05}, "ground_truth": 0}, {"key": "34500226", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999931310055916, "res": {"Yes": 0.9999931310055916, "No": 6.846534131079701e-06}, "ground_truth": 0}, {"key": "39856394", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9967576821021339, "res": {"Yes": 0.9967576821021339, "No": 0.0032422469060793017}, "ground_truth": 0}, {"key": "39856394", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9849051725177851, "res": {"Yes": 0.9849051725177851, "No": 0.015094784381893201}, "ground_truth": 1}, {"key": "39856394", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999629731405111, "res": {"Yes": 0.9999629731405111, "No": 3.693089848458531e-05}, "ground_truth": 0}, {"key": "39856394", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9892349305891784, "res": {"Yes": 0.9892349305891784, "No": 0.010764980065023414}, "ground_truth": 0}, {"key": "35499522", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.999621949079265, "res": {"Yes": 0.999621949079265, "No": 0.0003780132159973214}, "ground_truth": 0}, {"key": "35499522", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999953958625991, "res": {"Yes": 0.9999953958625991, "No": 4.556868057085448e-06}, "ground_truth": 1}, {"key": "35499522", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999727475263555, "res": {"Yes": 0.9999727475263555, "No": 2.7218926484538254e-05}, "ground_truth": 0}, {"key": "35499522", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999396103605277, "res": {"Yes": 0.9999396103605277, "No": 6.0369050094937426e-05}, "ground_truth": 0}, {"key": "30157766", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9960378730199935, "res": {"Yes": 0.9960378730199935, "No": 0.003962050835582419}, "ground_truth": 0}, {"key": "30157766", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9974482880657827, "res": {"Yes": 0.9974482880657827, "No": 0.002551747143778488}, "ground_truth": 0}, {"key": "30157766", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997435807210443, "res": {"Yes": 0.9997435807210443, "No": 0.00025637431844635806}, "ground_truth": 1}, {"key": "30157766", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999448788113121, "res": {"Yes": 0.999448788113121, "No": 0.0005510951928170454}, "ground_truth": 0}, {"key": "30157766", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9743395169974556, "res": {"Yes": 0.9743395169974556, "No": 0.025660413637264495}, "ground_truth": 0}, {"key": "40472346", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.992894101141619, "res": {"Yes": 0.992894101141619, "No": 0.00710591033267548}, "ground_truth": 0}, {"key": "40472346", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9921095542927081, "res": {"Yes": 0.9921095542927081, "No": 0.0078904019593798}, "ground_truth": 0}, {"key": "40472346", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5360244142364802, "res": {"Yes": 0.5360244142364802, "No": 0.46397542106574785}, "ground_truth": 1}, {"key": "40472346", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8013952964209696, "res": {"Yes": 0.8013952964209696, "No": 0.19860455342809233}, "ground_truth": 0}, {"key": "40472346", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.7934674584512218, "res": {"Yes": 0.7934674584512218, "No": 0.20653245744953314}, "ground_truth": 0}, {"key": "35305635", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 7.629989468118845e-05, "res": {"No": 0.9999236381607137, "Yes": 7.629989468118845e-05}, "ground_truth": 0}, {"key": "35305635", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.014137284228616917, "res": {"No": 0.985862512260718, "Yes": 0.014137284228616917}, "ground_truth": 0}, {"key": "35305635", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9049620589257409, "res": {"Yes": 0.9049620589257409, "No": 0.09503761674675802}, "ground_truth": 1}, {"key": "35305635", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8142707594265308, "res": {"Yes": 0.8142707594265308, "No": 0.18572897343280256}, "ground_truth": 0}, {"key": "35305635", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 7.005611459379993e-05, "res": {"No": 0.9999298363015874, "Yes": 7.005611459379993e-05}, "ground_truth": 0}, {"key": "32495926", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.0003479020094088418, "res": {"No": 0.9996519627790098, "Yes": 0.0003479020094088418}, "ground_truth": 0}, {"key": "32495926", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.999983237218497, "res": {"Yes": 0.999983237218497, "No": 1.6663297693949166e-05}, "ground_truth": 0}, {"key": "32495926", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999359152835132, "res": {"Yes": 0.9999359152835132, "No": 6.39892792727166e-05}, "ground_truth": 1}, {"key": "32495926", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999889589149532, "res": {"Yes": 0.9999889589149532, "No": 1.0973865232727389e-05}, "ground_truth": 0}, {"key": "32495926", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999925349918634, "res": {"Yes": 0.9999925349918634, "No": 7.205045597078926e-06}, "ground_truth": 0}, {"key": "37353801", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.7387692005554389, "res": {"Yes": 0.7387692005554389, "No": 0.2612268218056888}, "ground_truth": 0}, {"key": "37353801", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9199830012609709, "res": {"Yes": 0.9199830012609709, "No": 0.08001672529276316}, "ground_truth": 0}, {"key": "37353801", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998814441782969, "res": {"Yes": 0.9998814441782969, "No": 0.00011845812918966962}, "ground_truth": 1}, {"key": "37353801", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994878477947371, "res": {"Yes": 0.9994878477947371, "No": 0.0005121241919171859}, "ground_truth": 0}, {"key": "37353801", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999801379802525, "res": {"Yes": 0.9999801379802525, "No": 1.9748737356051557e-05}, "ground_truth": 0}, {"key": "30159904", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 9.928412120448725e-06, "res": {"No": 0.9999897933310884, "Yes": 9.928412120448725e-06}, "ground_truth": 0}, {"key": "30159904", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9617757707728721, "res": {"Yes": 0.9617757707728721, "No": 0.03822409893907932}, "ground_truth": 0}, {"key": "30159904", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999437822452772, "res": {"Yes": 0.9999437822452772, "No": 5.610127603345041e-05}, "ground_truth": 1}, {"key": "30159904", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995946731171448, "res": {"Yes": 0.9995946731171448, "No": 0.0004052704240401294}, "ground_truth": 0}, {"key": "30159904", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.999613254165458, "res": {"Yes": 0.999613254165458, "No": 0.00038667318690067015}, "ground_truth": 0}, {"key": "33698679", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9992966330385935, "res": {"Yes": 0.9992966330385935, "No": 0.000703287632332302}, "ground_truth": 0}, {"key": "33698679", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996950894546717, "res": {"Yes": 0.9996950894546717, "No": 0.0003048054054173937}, "ground_truth": 0}, {"key": "33698679", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998151916591657, "res": {"Yes": 0.9998151916591657, "No": 0.00018471416163208675}, "ground_truth": 1}, {"key": "33698679", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999634499379698, "res": {"Yes": 0.9999634499379698, "No": 3.642500749928925e-05}, "ground_truth": 0}, {"key": "33698679", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997390558518481, "res": {"Yes": 0.9997390558518481, "No": 0.0002609064875735121}, "ground_truth": 0}, {"key": "40530172", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.04038399644876807, "res": {"No": 0.959614918073485, "Yes": 0.04038399644876807}, "ground_truth": 0}, {"key": "40530172", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.6868058927409949, "res": {"Yes": 0.6868058927409949, "No": 0.313194053912315}, "ground_truth": 0}, {"key": "40530172", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.000955816317385046, "res": {"No": 0.9990440941293806, "Yes": 0.000955816317385046}, "ground_truth": 1}, {"key": "40530172", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.006441885514264425, "res": {"No": 0.9935580757366915, "Yes": 0.006441885514264425}, "ground_truth": 0}, {"key": "40530172", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9695464753562173, "res": {"Yes": 0.9695464753562173, "No": 0.030453349222137164}, "ground_truth": 0}, {"key": "40652941", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 7.80352429990084e-05, "res": {"No": 0.9999206582977194, "Yes": 7.80352429990084e-05}, "ground_truth": 0}, {"key": "40652941", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999850252451228, "res": {"Yes": 0.9999850252451228, "No": 1.4924894013259032e-05}, "ground_truth": 0}, {"key": "40652941", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998114970389163, "res": {"Yes": 0.9998114970389163, "No": 0.00018821078996830165}, "ground_truth": 1}, {"key": "40652941", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995126041159824, "res": {"Yes": 0.9995126041159824, "No": 0.000487334749683301}, "ground_truth": 0}, {"key": "40652941", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999753699393249, "res": {"Yes": 0.9999753699393249, "No": 2.45083912267986e-05}, "ground_truth": 0}, {"key": "40122246", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.3947009458260068, "res": {"No": 0.6052984420109502, "Yes": 0.3947009458260068}, "ground_truth": 0}, {"key": "40122246", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9929837744499693, "res": {"Yes": 0.9929837744499693, "No": 0.007016098392458305}, "ground_truth": 0}, {"key": "40122246", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9942578157249139, "res": {"Yes": 0.9942578157249139, "No": 0.005742102767120133}, "ground_truth": 1}, {"key": "40122246", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998616662389972, "res": {"Yes": 0.9998616662389972, "No": 0.00013819539064123266}, "ground_truth": 0}, {"key": "40122246", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9764618085286311, "res": {"Yes": 0.9764618085286311, "No": 0.023538102214614606}, "ground_truth": 0}, {"key": "40032656", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.06601144123064236, "res": {"No": 0.9339878587824905, "Yes": 0.06601144123064236}, "ground_truth": 0}, {"key": "40032656", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.999726900318509, "res": {"Yes": 0.999726900318509, "No": 0.0002729805529314384}, "ground_truth": 0}, {"key": "40032656", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997780113531267, "res": {"Yes": 0.9997780113531267, "No": 0.00022188773548785347}, "ground_truth": 1}, {"key": "40032656", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9992984198225426, "res": {"Yes": 0.9992984198225426, "No": 0.0007014829245906857}, "ground_truth": 0}, {"key": "40032656", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997767004150644, "res": {"Yes": 0.9997767004150644, "No": 0.00022319493954579565}, "ground_truth": 0}, {"key": "38913680", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9804930290087749, "res": {"Yes": 0.9804930290087749, "No": 0.01949918712848554}, "ground_truth": 0}, {"key": "38913680", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.02223334922413974, "res": {"No": 0.9777614821244728, "Yes": 0.02223334922413974}, "ground_truth": 0}, {"key": "38913680", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3204525383047257, "res": {"No": 0.6795194987979043, "Yes": 0.3204525383047257}, "ground_truth": 1}, {"key": "38913680", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.0010252456874919008, "res": {"No": 0.9989716480166774, "Yes": 0.0010252456874919008}, "ground_truth": 0}, {"key": "38913680", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 7.184220077813756e-05, "res": {"No": 0.9999273332003598, "Yes": 7.184220077813756e-05}, "ground_truth": 0}, {"key": "17608039", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.007643619740364359, "res": {"No": 0.9923562491888654, "Yes": 0.007643619740364359}, "ground_truth": 0}, {"key": "17608039", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9976396304634264, "res": {"Yes": 0.9976396304634264, "No": 0.0023603161666456594}, "ground_truth": 0}, {"key": "17608039", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9937639444113443, "res": {"Yes": 0.9937639444113443, "No": 0.006236055673925801}, "ground_truth": 1}, {"key": "17608039", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9900367879484612, "res": {"Yes": 0.9900367879484612, "No": 0.00996313597514905}, "ground_truth": 0}, {"key": "17608039", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9603111811627759, "res": {"Yes": 0.9603111811627759, "No": 0.03968852404315247}, "ground_truth": 0}, {"key": "40434901", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 1.1207584582255907e-07, "res": {"No": 0.9999998063873687, "Yes": 1.1207584582255907e-07}, "ground_truth": 0}, {"key": "40434901", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.970551468312272, "res": {"Yes": 0.970551468312272, "No": 0.0294483145918791}, "ground_truth": 1}, {"key": "40434901", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9533195518902658, "res": {"Yes": 0.9533195518902658, "No": 0.04668028808948631}, "ground_truth": 0}, {"key": "40434901", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.999870009379513, "res": {"Yes": 0.999870009379513, "No": 0.00012986189738324868}, "ground_truth": 0}, {"key": "37680058", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.26391106700333583, "res": {"No": 0.7360881929899477, "Yes": 0.26391106700333583}, "ground_truth": 0}, {"key": "37680058", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999523644646081, "res": {"Yes": 0.9999523644646081, "No": 4.754922822092361e-05}, "ground_truth": 0}, {"key": "37680058", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999970647075079, "res": {"Yes": 0.9999970647075079, "No": 2.832550543706287e-06}, "ground_truth": 1}, {"key": "37680058", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999963494876631, "res": {"Yes": 0.9999963494876631, "No": 3.491825124973224e-06}, "ground_truth": 0}, {"key": "37680058", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9824441398200726, "res": {"Yes": 0.9824441398200726, "No": 0.01755541727610455}, "ground_truth": 0}, {"key": "37291821", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 9.987971889280466e-06, "res": {"No": 0.9999899125338788, "Yes": 9.987971889280466e-06}, "ground_truth": 0}, {"key": "37291821", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999934886141991, "res": {"Yes": 0.9999934886141991, "No": 6.468088070500804e-06}, "ground_truth": 0}, {"key": "37291821", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999160097413793, "res": {"Yes": 0.9999160097413793, "No": 8.394304708224241e-05}, "ground_truth": 1}, {"key": "37291821", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999683371212795, "res": {"Yes": 0.9999683371212795, "No": 3.159476865400611e-05}, "ground_truth": 0}, {"key": "37291821", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9988915939362947, "res": {"Yes": 0.9988915939362947, "No": 0.0011083603698392823}, "ground_truth": 0}, {"key": "41002743", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.998278818035523, "res": {"Yes": 0.998278818035523, "No": 0.0017211871891557814}, "ground_truth": 0}, {"key": "41002743", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 6.699686564953178e-08}, "ground_truth": 0}, {"key": "41002743", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999930118027176, "res": {"Yes": 0.9999930118027176, "No": 6.912957561231187e-06}, "ground_truth": 1}, {"key": "41002743", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "\"Yes": 3.097188436383606e-08}, "ground_truth": 0}, {"key": "41002743", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "\"Yes": 4.452259606088743e-08}, "ground_truth": 0}, {"key": "36322869", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9911663539571992, "res": {"Yes": 0.9911663539571992, "No": 0.008833548292095084}, "ground_truth": 0}, {"key": "36322869", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999416367032035, "res": {"Yes": 0.9999416367032035, "No": 5.7204842732880424e-05}, "ground_truth": 0}, {"key": "36322869", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999936078174301, "res": {"Yes": 0.9999936078174301, "No": 6.029195333461516e-06}, "ground_truth": 1}, {"key": "36322869", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997211837564752, "res": {"Yes": 0.9997211837564752, "No": 0.00027871160070429393}, "ground_truth": 0}, {"key": "36322869", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9995538183186646, "res": {"Yes": 0.9995538183186646, "No": 0.00044605646982575004}, "ground_truth": 0}, {"key": "39459717", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9916845036086754, "res": {"Yes": 0.9916845036086754, "No": 0.008315145233862547}, "ground_truth": 0}, {"key": "39459717", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997190387070186, "res": {"Yes": 0.9997190387070186, "No": 0.00028084967392936046}, "ground_truth": 0}, {"key": "39459717", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998852582493383, "res": {"Yes": 0.9998852582493383, "No": 0.00011470545225976284}, "ground_truth": 1}, {"key": "39459717", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999975489140396, "res": {"Yes": 0.999975489140396, "No": 2.4390398675770215e-05}, "ground_truth": 0}, {"key": "36503727", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9999590395729012, "res": {"Yes": 0.9999590395729012, "No": 4.0827037998440813e-05}, "ground_truth": 0}, {"key": "36503727", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999982567412194, "res": {"Yes": 0.9999982567412194, "No": 1.723978253123222e-06}, "ground_truth": 0}, {"key": "36503727", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999983759447187, "res": {"Yes": 0.9999983759447187, "No": 1.5307312432050294e-06}, "ground_truth": 1}, {"key": "36503727", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 2.2689805891524652e-07}, "ground_truth": 0}, {"key": "36503727", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999847868417213, "res": {"Yes": 0.9999847868417213, "No": 1.5137900378319939e-05}, "ground_truth": 0}, {"key": "35682367", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9998154300351321, "res": {"Yes": 0.9998154300351321, "No": 0.00018453673658914605}, "ground_truth": 0}, {"key": "35682367", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999987335551019, "res": {"Yes": 0.9999987335551019, "No": 1.2375412620360478e-06}, "ground_truth": 0}, {"key": "35682367", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 8.963024662419848e-08}, "ground_truth": 1}, {"key": "35682367", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 1.0065369239894762e-07}, "ground_truth": 0}, {"key": "35682367", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, "No": 1.03914946658504e-06}, "ground_truth": 0}, {"key": "36472353", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.5279915495732859, "res": {"Yes": 0.5279915495732859, "No": 0.472006652130062}, "ground_truth": 0}, {"key": "36472353", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999709595226828, "res": {"Yes": 0.9999709595226828, "No": 2.8896892343343442e-05}, "ground_truth": 0}, {"key": "36472353", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999953958625991, "res": {"Yes": 0.9999953958625991, "No": 4.554767825051812e-06}, "ground_truth": 1}, {"key": "36472353", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999974223173222, "res": {"Yes": 0.9999974223173222, "No": 2.5069943311482208e-06}, "ground_truth": 0}, {"key": "36472353", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996709061132767, "res": {"Yes": 0.9996709061132767, "No": 0.0003288986201915046}, "ground_truth": 0}, {"key": "37651907", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9999917005724405, "res": {"Yes": 0.9999917005724405, "No": 8.211606316364804e-06}, "ground_truth": 0}, {"key": "37651907", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999560596094073, "res": {"Yes": 0.9999560596094073, "No": 4.376866095443255e-05}, "ground_truth": 0}, {"key": "37651907", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998497475990228, "res": {"Yes": 0.9998497475990228, "No": 0.00015015672139520594}, "ground_truth": 1}, {"key": "37651907", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999144602247352, "res": {"Yes": 0.9999144602247352, "No": 8.540541941424522e-05}, "ground_truth": 0}, {"key": "37651907", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9943987873156556, "res": {"Yes": 0.9943987873156556, "No": 0.00560120010352503}, "ground_truth": 0}, {"key": "36255476", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9987275981905139, "res": {"Yes": 0.9987275981905139, "No": 0.0012723763311489498}, "ground_truth": 0}, {"key": "36255476", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.99986464592119, "res": {"Yes": 0.99986464592119, "No": 0.00013526564124650796}, "ground_truth": 0}, {"key": "36255476", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9971537752000016, "res": {"Yes": 0.9971537752000016, "No": 0.002846133137043006}, "ground_truth": 1}, {"key": "36255476", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999688139202959, "res": {"Yes": 0.9999688139202959, "No": 3.108214560306414e-05}, "ground_truth": 0}, {"key": "36255476", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998349723485612, "res": {"Yes": 0.9998349723485612, "No": 0.00016496223319338623}, "ground_truth": 0}, {"key": "37283518", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.08177336497082584, "res": {"No": 0.9182254374003002, "Yes": 0.08177336497082584}, "ground_truth": 0}, {"key": "37283518", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 5.760640410801575e-07}, "ground_truth": 0}, {"key": "37283518", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999980183344636, "res": {"Yes": 0.9999980183344636, "No": 1.9170768615485987e-06}, "ground_truth": 1}, {"key": "37283518", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 1.1401646148331456e-07}, "ground_truth": 0}, {"key": "37283518", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 5.050673753679767e-07}, "ground_truth": 0}, {"key": "34906785", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9998673872338504, "res": {"Yes": 0.9998673872338504, "No": 0.00013257089487087186}, "ground_truth": 0}, {"key": "34906785", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999982567412194, "res": {"Yes": 0.9999982567412194, "No": 1.6187747856780524e-06}, "ground_truth": 0}, {"key": "34906785", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999895549275502, "res": {"Yes": 0.9999895549275502, "No": 1.0395796668548039e-05}, "ground_truth": 1}, {"key": "34906785", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999853828508316, "res": {"Yes": 0.9999853828508316, "No": 1.4540960920139684e-05}, "ground_truth": 0}, {"key": "34906785", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999732243284747, "res": {"Yes": 0.9999732243284747, "No": 2.669799737640724e-05}, "ground_truth": 0}, {"key": "34965328", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9846550048940761, "res": {"Yes": 0.9846550048940761, "No": 0.015344866359602603}, "ground_truth": 0}, {"key": "34965328", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.997841837189459, "res": {"Yes": 0.997841837189459, "No": 0.002158180010918931}, "ground_truth": 0}, {"key": "34965328", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9725958577175314, "res": {"Yes": 0.9725958577175314, "No": 0.027404024412630915}, "ground_truth": 1}, {"key": "34965328", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996564909727406, "res": {"Yes": 0.9996564909727406, "No": 0.00034340724907912066}, "ground_truth": 0}, {"key": "34965328", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.986937715897808, "res": {"Yes": 0.986937715897808, "No": 0.013062175391204361}, "ground_truth": 0}, {"key": "38788440", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9999278099798249, "res": {"Yes": 0.9999278099798249, "No": 7.205794290691259e-05}, "ground_truth": 0}, {"key": "38788440", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998124504896088, "res": {"Yes": 0.9998124504896088, "No": 0.00018750369614026926}, "ground_truth": 0}, {"key": "38788440", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999957534720165, "res": {"Yes": 0.9999957534720165, "No": 4.200840221267847e-06}, "ground_truth": 1}, {"key": "38788440", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999958726752174, "res": {"Yes": 0.9999958726752174, "No": 4.000784864368974e-06}, "ground_truth": 0}, {"key": "38788440", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999894357248024, "res": {"Yes": 0.9999894357248024, "No": 1.0438475615623548e-05}, "ground_truth": 0}, {"key": "35046866", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.024442447658506405, "res": {"No": 0.9755573063761405, "Yes": 0.024442447658506405}, "ground_truth": 0}, {"key": "35046866", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9981204874000995, "res": {"Yes": 0.9981204874000995, "No": 0.0018794577022533555}, "ground_truth": 0}, {"key": "35046866", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994920140685651, "res": {"Yes": 0.9994920140685651, "No": 0.0005079431632557624}, "ground_truth": 1}, {"key": "35046866", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996049131873577, "res": {"Yes": 0.9996049131873577, "No": 0.0003950295443647882}, "ground_truth": 0}, {"key": "35046866", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9986354581350324, "res": {"Yes": 0.9986354581350324, "No": 0.001364517626473617}, "ground_truth": 0}, {"key": "37629558", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9999856212553752, "res": {"Yes": 0.9999856212553752, "No": 1.4245521234291097e-05}, "ground_truth": 0}, {"key": "37629558", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9869359650718529, "res": {"Yes": 0.9869359650718529, "No": 0.01306399377257523}, "ground_truth": 0}, {"key": "37629558", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998487941127626, "res": {"Yes": 0.9998487941127626, "No": 0.0001510821242178657}, "ground_truth": 1}, {"key": "37629558", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 2.7311087190866537e-07}, "ground_truth": 0}, {"key": "37629558", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999964686909351, "res": {"Yes": 0.9999964686909351, "No": 3.5033531683642955e-06}, "ground_truth": 0}, {"key": "33859914", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9484177853136385, "res": {"Yes": 0.9484177853136385, "No": 0.05158206213374291}, "ground_truth": 0}, {"key": "33859914", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9934914694385523, "res": {"Yes": 0.9934914694385523, "No": 0.006508463969419657}, "ground_truth": 0}, {"key": "33859914", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9391427949840792, "res": {"Yes": 0.9391427949840792, "No": 0.060857117404748466}, "ground_truth": 1}, {"key": "33859914", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9993269906724401, "res": {"Yes": 0.9993269906724401, "No": 0.0006729429510771556}, "ground_truth": 0}, {"key": "33859914", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9606408741607939, "res": {"Yes": 0.9606408741607939, "No": 0.039358933626018416}, "ground_truth": 0}, {"key": "39790523", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9996996178037684, "res": {"Yes": 0.9996996178037684, "No": 0.0003003722557587935}, "ground_truth": 0}, {"key": "39790523", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998732274763156, "res": {"Yes": 0.9998732274763156, "No": 0.00012674656708522498}, "ground_truth": 0}, {"key": "39790523", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 5.421348856261368e-07}, "ground_truth": 1}, {"key": "39790523", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999869324773808, "res": {"Yes": 0.9999869324773808, "No": 1.298402243315113e-05}, "ground_truth": 0}, {"key": "39790523", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997901638386876, "res": {"Yes": 0.9997901638386876, "No": 0.00020974312295914794}, "ground_truth": 0}, {"key": "33509656", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.99998752848777, "res": {"Yes": 0.99998752848777, "No": 1.237141485710385e-05}, "ground_truth": 0}, {"key": "33509656", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.7324505565203784, "res": {"Yes": 0.7324505565203784, "No": 0.2675494419514335}, "ground_truth": 0}, {"key": "33509656", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999973031140366, "res": {"Yes": 0.9999973031140366, "No": 2.6567260540061547e-06}, "ground_truth": 1}, {"key": "33509656", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "No": 9.005483332305932e-07}, "ground_truth": 0}, {"key": "33509656", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999406831293152, "res": {"Yes": 0.9999406831293152, "No": 5.928963834662889e-05}, "ground_truth": 0}, {"key": "17380923", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9870378107682467, "res": {"Yes": 0.9870378107682467, "No": 0.012962038613552374}, "ground_truth": 0}, {"key": "17380923", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999987335551019, "res": {"Yes": 0.9999987335551019, "No": 1.1985884322741208e-06}, "ground_truth": 0}, {"key": "17380923", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 3.689366086279711e-07}, "ground_truth": 1}, {"key": "17380923", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998852582493383, "res": {"Yes": 0.9998852582493383, "No": 0.00011460043017295191}, "ground_truth": 0}, {"key": "17380923", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.999970244320709, "res": {"Yes": 0.999970244320709, "No": 2.9623339817348506e-05}, "ground_truth": 0}, {"key": "36202526", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9857019608655505, "res": {"Yes": 0.9857019608655505, "No": 0.014297941544712143}, "ground_truth": 0}, {"key": "36202526", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9909785338367736, "res": {"Yes": 0.9909785338367736, "No": 0.009021334561784972}, "ground_truth": 0}, {"key": "36202526", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9989114534970476, "res": {"Yes": 0.9989114534970476, "No": 0.0010885046558397652}, "ground_truth": 1}, {"key": "36202526", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9977351481560873, "res": {"Yes": 0.9977351481560873, "No": 0.0022648781916822245}, "ground_truth": 0}, {"key": "36202526", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997462025420252, "res": {"Yes": 0.9997462025420252, "No": 0.00025376345611236966}, "ground_truth": 0}, {"key": "26419232", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.08783422042539187, "res": {"No": 0.9121654781747272, "Yes": 0.08783422042539187}, "ground_truth": 0}, {"key": "26419232", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999891973193493, "res": {"Yes": 0.9999891973193493, "No": 1.0675384294636363e-05}, "ground_truth": 0}, {"key": "26419232", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999678603234905, "res": {"Yes": 0.9999678603234905, "No": 3.2062546010787716e-05}, "ground_truth": 1}, {"key": "26419232", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999577995169098, "res": {"Yes": 0.999577995169098, "No": 0.0004218853345189528}, "ground_truth": 0}, {"key": "26419232", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999944422379444, "res": {"Yes": 0.9999944422379444, "No": 5.471356408319235e-06}, "ground_truth": 0}, {"key": "34232398", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9959957664089463, "res": {"Yes": 0.9959957664089463, "No": 0.004004229906387033}, "ground_truth": 0}, {"key": "34232398", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9992273589409412, "res": {"Yes": 0.9992273589409412, "No": 0.0007726191666915553}, "ground_truth": 0}, {"key": "34232398", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999987335551019, "res": {"Yes": 0.9999987335551019, "No": 1.1999796015777592e-06}, "ground_truth": 1}, {"key": "34232398", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9957771559867127, "res": {"Yes": 0.9957771559867127, "No": 0.00422283574129169}, "ground_truth": 0}, {"key": "34232398", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, "No": 1.1029203857886815e-06}, "ground_truth": 0}, {"key": "33586045", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 7.656925695556019e-06, "res": {"No": 0.9999921773835968, "Yes": 7.656925695556019e-06}, "ground_truth": 0}, {"key": "33586045", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9989866377986143, "res": {"Yes": 0.9989866377986143, "No": 0.0010132898138630832}, "ground_truth": 0}, {"key": "33586045", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9975059321682126, "res": {"Yes": 0.9975059321682126, "No": 0.0024940734038005796}, "ground_truth": 1}, {"key": "33586045", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999157713474321, "res": {"Yes": 0.9999157713474321, "No": 8.409848545550759e-05}, "ground_truth": 0}, {"key": "33586045", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998389054171261, "res": {"Yes": 0.9998389054171261, "No": 0.00016097684873444683}, "ground_truth": 0}, {"key": "32281151", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.002535265281194868, "res": {"No": 0.9974647751245387, "Yes": 0.002535265281194868}, "ground_truth": 0}, {"key": "32281151", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9988155907669478, "res": {"Yes": 0.9988155907669478, "No": 0.0011844031525653068}, "ground_truth": 0}, {"key": "32281151", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999323394050813, "res": {"Yes": 0.9999323394050813, "No": 6.761687336977533e-05}, "ground_truth": 1}, {"key": "32281151", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9948222686014101, "res": {"Yes": 0.9948222686014101, "No": 0.005177727632209807}, "ground_truth": 0}, {"key": "32281151", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9501501293294579, "res": {"Yes": 0.9501501293294579, "No": 0.049849819671654506}, "ground_truth": 0}, {"key": "37308159", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.02586759116978076, "res": {"No": 0.9741314456295604, "Yes": 0.02586759116978076}, "ground_truth": 0}, {"key": "37308159", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.0012356180360857992, "res": {"No": 0.9987627979662713, "Yes": 0.0012356180360857992}, "ground_truth": 0}, {"key": "37308159", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 5.2031461161423654e-05, "res": {"No": 0.9999477157538689, "Yes": 5.2031461161423654e-05}, "ground_truth": 1}, {"key": "37308159", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.000673278833405414, "res": {"No": 0.9993265141934445, "Yes": 0.000673278833405414}, "ground_truth": 0}, {"key": "37308159", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 4.8898496567061475e-06, "res": {"No": 0.9999949190499081, "Yes": 4.8898496567061475e-06}, "ground_truth": 0}, {"key": "35694408", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9993448447080114, "res": {"Yes": 0.9993448447080114, "No": 0.0006550630941641494}, "ground_truth": 0}, {"key": "35694408", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999614235510903, "res": {"Yes": 0.9999614235510903, "No": 3.8520422606561335e-05}, "ground_truth": 0}, {"key": "35694408", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999614235510903, "res": {"Yes": 0.9999614235510903, "No": 3.834551446863229e-05}, "ground_truth": 1}, {"key": "35694408", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998472483677188, "res": {"Yes": 0.9998472483677188, "No": 0.0001526641724981527}, "ground_truth": 0}, {"key": "35694408", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999578475844381, "res": {"Yes": 0.9999578475844381, "No": 4.2068092604663424e-05}, "ground_truth": 0}, {"key": "39781995", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9817443878657456, "res": {"Yes": 0.9817443878657456, "No": 0.0182556620090331}, "ground_truth": 0}, {"key": "39781995", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999857404566682, "res": {"Yes": 0.9999857404566682, "No": 1.4216632158518126e-05}, "ground_truth": 0}, {"key": "39781995", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9945443879005372, "res": {"Yes": 0.9945443879005372, "No": 0.00545551257422798}, "ground_truth": 1}, {"key": "39781995", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999479541484323, "res": {"Yes": 0.9999479541484323, "No": 5.193056604569582e-05}, "ground_truth": 0}, {"key": "39781995", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9930874618667309, "res": {"Yes": 0.9930874618667309, "No": 0.006912527042676985}, "ground_truth": 0}, {"key": "22799372", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9969523149497272, "res": {"Yes": 0.9969523149497272, "No": 0.003047508217926543}, "ground_truth": 0}, {"key": "22799372", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9909249233482698, "res": {"Yes": 0.9909249233482698, "No": 0.009074896534983156}, "ground_truth": 0}, {"key": "22799372", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999684563215123, "res": {"Yes": 0.9999684563215123, "No": 3.14880326560464e-05}, "ground_truth": 1}, {"key": "22799372", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999557865770158, "res": {"Yes": 0.999557865770158, "No": 0.00044204383791566284}, "ground_truth": 0}, {"key": "22799372", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9923212709345528, "res": {"Yes": 0.9923212709345528, "No": 0.007678680933687286}, "ground_truth": 0}, {"key": "37428240", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9998439111831272, "res": {"Yes": 0.9998439111831272, "No": 0.0001560492336789516}, "ground_truth": 0}, {"key": "37428240", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999765619466755, "res": {"Yes": 0.9999765619466755, "No": 2.3351671309193036e-05}, "ground_truth": 0}, {"key": "37428240", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999820452021894, "res": {"Yes": 0.9999820452021894, "No": 1.788513745128331e-05}, "ground_truth": 1}, {"key": "37428240", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999982567412194, "res": {"Yes": 0.9999982567412194, "No": 1.6849477559694545e-06}, "ground_truth": 0}, {"key": "37428240", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996463658732931, "res": {"Yes": 0.9996463658732931, "No": 0.0003536092740598136}, "ground_truth": 0}, {"key": "40612657", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9976079520173244, "res": {"Yes": 0.9976079520173244, "No": 0.0023920479430823123}, "ground_truth": 0}, {"key": "40612657", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9993618687923786, "res": {"Yes": 0.9993618687923786, "No": 0.0006381173227932426}, "ground_truth": 0}, {"key": "40612657", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999272140090287, "res": {"Yes": 0.9999272140090287, "No": 7.27392812163624e-05}, "ground_truth": 1}, {"key": "40612657", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9985561671310614, "res": {"Yes": 0.9985561671310614, "No": 0.001443816390484886}, "ground_truth": 0}, {"key": "40612657", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.954656909612411, "res": {"Yes": 0.954656909612411, "No": 0.04534300314460478}, "ground_truth": 0}, {"key": "34404662", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.06930293649630248, "res": {"No": 0.9306969824804843, "Yes": 0.06930293649630248}, "ground_truth": 0}, {"key": "34404662", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8024121583252449, "res": {"Yes": 0.8024121583252449, "No": 0.19758774792872674}, "ground_truth": 1}, {"key": "34404662", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4731341809805465, "res": {"No": 0.5268658062305477, "Yes": 0.4731341809805465}, "ground_truth": 0}, {"key": "34404662", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.0385585736626672, "res": {"No": 0.9614413252464629, "Yes": 0.0385585736626672}, "ground_truth": 0}, {"key": "32619704", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9408300513609236, "res": {"Yes": 0.9408300513609236, "No": 0.05916988889621275}, "ground_truth": 0}, {"key": "32619704", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.3630167146171819, "res": {"No": 0.636982981759489, "Yes": 0.3630167146171819}, "ground_truth": 0}, {"key": "32619704", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9989538044445656, "res": {"Yes": 0.9989538044445656, "No": 0.0010461784767778449}, "ground_truth": 1}, {"key": "32619704", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9764208897774594, "res": {"Yes": 0.9764208897774594, "No": 0.023578995133135185}, "ground_truth": 0}, {"key": "32619704", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.6650824573487468, "res": {"Yes": 0.6650824573487468, "No": 0.3349172475201572}, "ground_truth": 0}, {"key": "39014883", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.999974535534225, "res": {"Yes": 0.999974535534225, "No": 2.539063887750654e-05}, "ground_truth": 0}, {"key": "39014883", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997753894687232, "res": {"Yes": 0.9997753894687232, "No": 0.00022447864001676}, "ground_truth": 0}, {"key": "39014883", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997040270286442, "res": {"Yes": 0.9997040270286442, "No": 0.000295942612241322}, "ground_truth": 1}, {"key": "39014883", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999729859278866, "res": {"Yes": 0.9999729859278866, "No": 2.6925762928578752e-05}, "ground_truth": 0}, {"key": "39014883", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998510586328669, "res": {"Yes": 0.9998510586328669, "No": 0.00014882543738612913}, "ground_truth": 0}, {"key": "37982812", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.6166845993337596, "res": {"Yes": 0.6166845993337596, "No": 0.38331445321059304}, "ground_truth": 0}, {"key": "37982812", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.8690376578157721, "res": {"Yes": 0.8690376578157721, "No": 0.13096200971700642}, "ground_truth": 0}, {"key": "37982812", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9825391880285499, "res": {"Yes": 0.9825391880285499, "No": 0.017460803677340004}, "ground_truth": 1}, {"key": "37982812", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999228788286739, "res": {"Yes": 0.999228788286739, "No": 0.0007710803303576409}, "ground_truth": 0}, {"key": "37982812", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.7534183407850501, "res": {"Yes": 0.7534183407850501, "No": 0.24658147813853026}, "ground_truth": 0}, {"key": "28123476", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 8.179773534563523e-05, "res": {"No": 0.999918155229466, "Yes": 8.179773534563523e-05}, "ground_truth": 0}, {"key": "28123476", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.999780871612133, "res": {"Yes": 0.999780871612133, "No": 0.0002190141978245669}, "ground_truth": 0}, {"key": "28123476", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9248295950575436, "res": {"Yes": 0.9248295950575436, "No": 0.07517030547592243}, "ground_truth": 1}, {"key": "28123476", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996065813694129, "res": {"Yes": 0.9996065813694129, "No": 0.0003933175331252739}, "ground_truth": 0}, {"key": "28123476", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9328122695590689, "res": {"Yes": 0.9328122695590689, "No": 0.06718766532580758}, "ground_truth": 0}, {"key": "39078849", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.281675606991099, "res": {"No": 0.7183241423978716, "Yes": 0.281675606991099}, "ground_truth": 0}, {"key": "39078849", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9953665716028414, "res": {"Yes": 0.9953665716028414, "No": 0.004633471072860332}, "ground_truth": 0}, {"key": "39078849", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9604619253566392, "res": {"Yes": 0.9604619253566392, "No": 0.039537883134084094}, "ground_truth": 1}, {"key": "39078849", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9938989961696343, "res": {"Yes": 0.9938989961696343, "No": 0.00610093646947336}, "ground_truth": 0}, {"key": "39078849", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9909909469116314, "res": {"Yes": 0.9909909469116314, "No": 0.009008794608968657}, "ground_truth": 0}, {"key": "39414137", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9992091459900103, "res": {"Yes": 0.9992091459900103, "No": 0.0007907731659761922}, "ground_truth": 0}, {"key": "39414137", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9966357024739668, "res": {"Yes": 0.9966357024739668, "No": 0.0033642090349298307}, "ground_truth": 0}, {"key": "39414137", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999796611748367, "res": {"Yes": 0.9999796611748367, "No": 2.019662594367361e-05}, "ground_truth": 1}, {"key": "39414137", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997815866856616, "res": {"Yes": 0.9997815866856616, "No": 0.00021833193840549656}, "ground_truth": 0}, {"key": "39414137", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998934823934031, "res": {"Yes": 0.9998934823934031, "No": 0.0001063875499128983}, "ground_truth": 0}, {"key": "37371354", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.0013120375085959892, "res": {"No": 0.9986878869740339, "Yes": 0.0013120375085959892}, "ground_truth": 0}, {"key": "37371354", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9975328621993768, "res": {"Yes": 0.9975328621993768, "No": 0.0024668900764689768}, "ground_truth": 0}, {"key": "37371354", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999909853566321, "res": {"Yes": 0.9999909853566321, "No": 8.909298957431272e-06}, "ground_truth": 1}, {"key": "37371354", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6829487628598943, "res": {"Yes": 0.6829487628598943, "No": 0.3170510744323049}, "ground_truth": 0}, {"key": "37371354", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9913267019601983, "res": {"Yes": 0.9913267019601983, "No": 0.008673221400923735}, "ground_truth": 0}, {"key": "29497179", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998107819639911, "res": {"Yes": 0.9998107819639911, "No": 0.00018911921018859495}, "ground_truth": 0}, {"key": "29497179", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997516845353897, "res": {"Yes": 0.9997516845353897, "No": 0.0002481911109242741}, "ground_truth": 1}, {"key": "29497179", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9919101156761053, "res": {"Yes": 0.9919101156761053, "No": 0.008089858813837952}, "ground_truth": 0}, {"key": "29497179", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9901947886453908, "res": {"Yes": 0.9901947886453908, "No": 0.009805115889253048}, "ground_truth": 0}, {"key": "35908694", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9994703413187088, "res": {"Yes": 0.9994703413187088, "No": 0.0005296002930497265}, "ground_truth": 0}, {"key": "35908694", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999406831293152, "res": {"Yes": 0.9999406831293152, "No": 5.9229252976724826e-05}, "ground_truth": 0}, {"key": "35908694", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999889589149532, "res": {"Yes": 0.9999889589149532, "No": 1.0928407131708291e-05}, "ground_truth": 1}, {"key": "35908694", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999801379802525, "res": {"Yes": 0.9999801379802525, "No": 1.9752430715255467e-05}, "ground_truth": 0}, {"key": "35908694", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9991464213057313, "res": {"Yes": 0.9991464213057313, "No": 0.0008534701414476589}, "ground_truth": 0}, {"key": "37619358", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.5746520567722095, "res": {"Yes": 0.5746520567722095, "No": 0.4253477965056572}, "ground_truth": 0}, {"key": "37619358", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9953052748353652, "res": {"Yes": 0.9953052748353652, "No": 0.004694763219852269}, "ground_truth": 0}, {"key": "37619358", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9230320604811788, "res": {"Yes": 0.9230320604811788, "No": 0.07696758652310143}, "ground_truth": 1}, {"key": "37619358", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9742845307240915, "res": {"Yes": 0.9742845307240915, "No": 0.025715308575557864}, "ground_truth": 0}, {"key": "37619358", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9851340605400284, "res": {"Yes": 0.9851340605400284, "No": 0.014865919020347102}, "ground_truth": 0}, {"key": "37293103", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9598631852894677, "res": {"Yes": 0.9598631852894677, "No": 0.04013670607070275}, "ground_truth": 0}, {"key": "37293103", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9968180906377506, "res": {"Yes": 0.9968180906377506, "No": 0.0031819374713441123}, "ground_truth": 0}, {"key": "37293103", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999511724841019, "res": {"Yes": 0.9999511724841019, "No": 4.8769911640657624e-05}, "ground_truth": 1}, {"key": "37293103", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999992773397112, "res": {"Yes": 0.999992773397112, "No": 7.1480497550215445e-06}, "ground_truth": 0}, {"key": "37293103", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9992594976073219, "res": {"Yes": 0.9992594976073219, "No": 0.0007404129402800352}, "ground_truth": 0}, {"key": "36883729", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 3.735861911016992e-06, "res": {"No": 0.9999958726752174, "Yes": 3.735861911016992e-06}, "ground_truth": 0}, {"key": "36883729", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998803714760626, "res": {"Yes": 0.9998803714760626, "No": 0.00011952703235489147}, "ground_truth": 0}, {"key": "36883729", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999961781147359, "res": {"Yes": 0.999961781147359, "No": 3.8184801042307944e-05}, "ground_truth": 1}, {"key": "36883729", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999909096527971, "res": {"Yes": 0.999909096527971, "No": 9.07582663238153e-05}, "ground_truth": 0}, {"key": "36883729", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9898691324853784, "res": {"Yes": 0.9898691324853784, "No": 0.010129634928687741}, "ground_truth": 0}, {"key": "39209521", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9957486666123646, "res": {"Yes": 0.9957486666123646, "No": 0.004251112353737638}, "ground_truth": 0}, {"key": "39209521", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997950501750854, "res": {"Yes": 0.9997950501750854, "No": 0.00020474853301882188}, "ground_truth": 0}, {"key": "39209521", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999663107274963, "res": {"Yes": 0.9999663107274963, "No": 3.3596219910086934e-05}, "ground_truth": 1}, {"key": "39209521", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9993821133706129, "res": {"Yes": 0.9993821133706129, "No": 0.0006178321743047873}, "ground_truth": 0}, {"key": "39209521", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9958086028271038, "res": {"Yes": 0.9958086028271038, "No": 0.004191242708001847}, "ground_truth": 0}, {"key": "27792571", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.13580586177836168, "res": {"No": 0.8641938257315919, "Yes": 0.13580586177836168}, "ground_truth": 0}, {"key": "27792571", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.19528918870598652, "res": {"No": 0.8047099234848284, "Yes": 0.19528918870598652}, "ground_truth": 0}, {"key": "27792571", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3296333838906195, "res": {"No": 0.6703659310131859, "Yes": 0.3296333838906195}, "ground_truth": 1}, {"key": "27792571", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8776791367783607, "res": {"Yes": 0.8776791367783607, "No": 0.12232028345996636}, "ground_truth": 0}, {"key": "27792571", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.0032177809595207237, "res": {"No": 0.9967819633153369, "Yes": 0.0032177809595207237}, "ground_truth": 0}, {"key": "39755647", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9839472510729023, "res": {"Yes": 0.9839472510729023, "No": 0.016052687466942325}, "ground_truth": 0}, {"key": "39755647", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999696483206215, "res": {"Yes": 0.9999696483206215, "No": 3.0259723110285828e-05}, "ground_truth": 0}, {"key": "39755647", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999983356420506, "res": {"Yes": 0.999983356420506, "No": 1.6506538216512873e-05}, "ground_truth": 1}, {"key": "39755647", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999983759447187, "res": {"Yes": 0.9999983759447187, "No": 1.5521352412795607e-06}, "ground_truth": 0}, {"key": "39755647", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996296944000496, "res": {"Yes": 0.9996296944000496, "No": 0.0003702905093491781}, "ground_truth": 0}, {"key": "40800537", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.752508685675354, "res": {"Yes": 0.752508685675354, "No": 0.24749116365865756}, "ground_truth": 0}, {"key": "40800537", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995702575457518, "res": {"Yes": 0.9995702575457518, "No": 0.00042966165836936593}, "ground_truth": 0}, {"key": "40800537", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999869324773808, "res": {"Yes": 0.9999869324773808, "No": 1.3014121771155431e-05}, "ground_truth": 1}, {"key": "40800537", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999751315392253, "res": {"Yes": 0.9999751315392253, "No": 2.4813182341144196e-05}, "ground_truth": 0}, {"key": "40800537", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9831501842192931, "res": {"Yes": 0.9831501842192931, "No": 0.016849757735538082}, "ground_truth": 0}, {"key": "14171461", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 4.390269250807495e-06, "res": {"No": 0.999995276659155, "Yes": 4.390269250807495e-06}, "ground_truth": 0}, {"key": "14171461", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.4477875400566196, "res": {"No": 0.5522123580904984, "Yes": 0.4477875400566196}, "ground_truth": 0}, {"key": "14171461", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9986799180757021, "res": {"Yes": 0.9986799180757021, "No": 0.0013199790681126574}, "ground_truth": 1}, {"key": "14171461", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9629424212396184, "res": {"Yes": 0.9629424212396184, "No": 0.03705750809989061}, "ground_truth": 0}, {"key": "14171461", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 8.112194837340616e-05, "res": {"No": 0.9999186320055549, "Yes": 8.112194837340616e-05}, "ground_truth": 0}, {"key": "36892440", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.27421086178547205, "res": {"No": 0.7257889854902358, "Yes": 0.27421086178547205}, "ground_truth": 0}, {"key": "36892440", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9662086058482263, "res": {"Yes": 0.9662086058482263, "No": 0.033791313721805605}, "ground_truth": 0}, {"key": "36892440", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.1881350919501675, "res": {"No": 0.8118647067545623, "Yes": 0.1881350919501675}, "ground_truth": 1}, {"key": "36892440", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.05089129966482482, "res": {"No": 0.9491082086549425, "Yes": 0.05089129966482482}, "ground_truth": 0}, {"key": "36892440", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.03101012236166327, "res": {"No": 0.9689897645955963, "Yes": 0.03101012236166327}, "ground_truth": 0}, {"key": "33733410", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.28730506025857044, "res": {"No": 0.7126944655313312, "Yes": 0.28730506025857044}, "ground_truth": 0}, {"key": "33733410", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9887110666591914, "res": {"Yes": 0.9887110666591914, "No": 0.011288840585800652}, "ground_truth": 0}, {"key": "33733410", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999377109936612, "res": {"Yes": 0.999377109936612, "No": 0.0006227982050957462}, "ground_truth": 1}, {"key": "33733410", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.34136451967127307, "res": {"No": 0.6586352024784214, "Yes": 0.34136451967127307}, "ground_truth": 0}, {"key": "33733410", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.7185866372561788, "res": {"Yes": 0.7185866372561788, "No": 0.28141329204645793}, "ground_truth": 0}, {"key": "38587765", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9978478927108063, "res": {"Yes": 0.9978478927108063, "No": 0.0021520680832179105}, "ground_truth": 0}, {"key": "38587765", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999967070975216, "res": {"Yes": 0.9999967070975216, "No": 3.256391101073951e-06}, "ground_truth": 0}, {"key": "38587765", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999983759447187, "res": {"Yes": 0.9999983759447187, "No": 1.555751265598603e-06}, "ground_truth": 1}, {"key": "38587765", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999709595226828, "res": {"Yes": 0.9999709595226828, "No": 2.8982220563728842e-05}, "ground_truth": 0}, {"key": "38587765", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 6.136144730884559e-07}, "ground_truth": 0}, {"key": "41065582", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.001048628247384637, "res": {"No": 0.9989503547633491, "Yes": 0.001048628247384637}, "ground_truth": 0}, {"key": "41065582", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9983933524422772, "res": {"Yes": 0.9983933524422772, "No": 0.001606467355301213}, "ground_truth": 0}, {"key": "41065582", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.001978832410260311, "res": {"No": 0.9980202749458775, "Yes": 0.001978832410260311}, "ground_truth": 1}, {"key": "41065582", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9554582315188337, "res": {"Yes": 0.9554582315188337, "No": 0.04454152405806518}, "ground_truth": 0}, {"key": "41065582", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.8956230335676407, "res": {"Yes": 0.8956230335676407, "No": 0.10437628338317552}, "ground_truth": 0}, {"key": "34713891", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9982242979277464, "res": {"Yes": 0.9982242979277464, "No": 0.0017756530646067165}, "ground_truth": 0}, {"key": "34713891", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9991250088282225, "res": {"Yes": 0.9991250088282225, "No": 0.0008748815956677421}, "ground_truth": 1}, {"key": "34713891", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999468813708443, "res": {"Yes": 0.9999468813708443, "No": 5.3049134915625716e-05}, "ground_truth": 0}, {"key": "34713891", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.999582999628833, "res": {"Yes": 0.999582999628833, "No": 0.0004168814788274416}, "ground_truth": 0}, {"key": "18913023", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9950601709852238, "res": {"Yes": 0.9950601709852238, "No": 0.00493986290594693}, "ground_truth": 0}, {"key": "18913023", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9919626704500704, "res": {"Yes": 0.9919626704500704, "No": 0.008037316662041412}, "ground_truth": 0}, {"key": "18913023", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997117693464384, "res": {"Yes": 0.9997117693464384, "No": 0.0002881325328999453}, "ground_truth": 1}, {"key": "18913023", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.969509718607009, "res": {"Yes": 0.969509718607009, "No": 0.030490088969400177}, "ground_truth": 0}, {"key": "18913023", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.2463413261672736, "res": {"No": 0.753658501013142, "Yes": 0.2463413261672736}, "ground_truth": 0}, {"key": "36884100", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9769912089343005, "res": {"Yes": 0.9769912089343005, "No": 0.023008646475062178}, "ground_truth": 0}, {"key": "36884100", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.999789806303791, "res": {"Yes": 0.999789806303791, "No": 0.00021012184105397892}, "ground_truth": 0}, {"key": "36884100", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994603371703957, "res": {"Yes": 0.9994603371703957, "No": 0.0005395523735690481}, "ground_truth": 1}, {"key": "36884100", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999970244320709, "res": {"Yes": 0.999970244320709, "No": 2.9652414572545032e-05}, "ground_truth": 0}, {"key": "36884100", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9992102180720545, "res": {"Yes": 0.9992102180720545, "No": 0.0007896977347177944}, "ground_truth": 0}, {"key": "39899913", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9893053953135855, "res": {"Yes": 0.9893053953135855, "No": 0.010694482764316653}, "ground_truth": 0}, {"key": "39899913", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.99999861435166, "res": {"Yes": 0.99999861435166, "No": 1.2815541972448623e-06}, "ground_truth": 0}, {"key": "39899913", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999964686909351, "res": {"Yes": 0.9999964686909351, "No": 3.397587286025705e-06}, "ground_truth": 1}, {"key": "39899913", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9938082126122043, "res": {"Yes": 0.9938082126122043, "No": 0.006191794074189938}, "ground_truth": 0}, {"key": "39899913", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996561334956433, "res": {"Yes": 0.9996561334956433, "No": 0.0003438280088131914}, "ground_truth": 0}, {"key": "30725366", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9990199522607929, "res": {"Yes": 0.9990199522607929, "No": 0.0009800097866225295}, "ground_truth": 0}, {"key": "30725366", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9993394877343295, "res": {"Yes": 0.9993394877343295, "No": 0.0006604287583505961}, "ground_truth": 0}, {"key": "30725366", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999987335551019, "res": {"Yes": 0.9999987335551019, "No": 1.1880407930846518e-06}, "ground_truth": 1}, {"key": "30725366", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "No": 9.713800933375467e-07}, "ground_truth": 0}, {"key": "30725366", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999900317366834, "res": {"Yes": 0.9999900317366834, "No": 9.719280158517765e-06}, "ground_truth": 0}, {"key": "26133523", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 4.95526951068271e-06, "res": {"No": 0.9999942038320978, "Yes": 4.95526951068271e-06}, "ground_truth": 0}, {"key": "26133523", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 2.0544592013572104e-07}, "ground_truth": 0}, {"key": "26133523", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9992977050546176, "res": {"Yes": 0.9992977050546176, "No": 0.0007022135485389693}, "ground_truth": 1}, {"key": "26133523", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999247109143655, "res": {"Yes": 0.9999247109143655, "No": 7.522401722550803e-05}, "ground_truth": 0}, {"key": "26133523", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 3.81974494077363e-07}, "ground_truth": 0}, {"key": "29332665", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9675659964689782, "res": {"Yes": 0.9675659964689782, "No": 0.03243387103245422}, "ground_truth": 0}, {"key": "29332665", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998913369602558, "res": {"Yes": 0.9998913369602558, "No": 0.00010861609033230371}, "ground_truth": 0}, {"key": "29332665", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9990195950113219, "res": {"Yes": 0.9990195950113219, "No": 0.0009803459876345185}, "ground_truth": 1}, {"key": "29332665", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999992773397112, "res": {"Yes": 0.999992773397112, "No": 7.201530392607198e-06}, "ground_truth": 0}, {"key": "29332665", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9981669289307302, "res": {"Yes": 0.9981669289307302, "No": 0.0018329749066300974}, "ground_truth": 0}, {"key": "37400481", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.0062198343774822135, "res": {"No": 0.9937800716416217, "Yes": 0.0062198343774822135}, "ground_truth": 0}, {"key": "37400481", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999877668918251, "res": {"Yes": 0.9999877668918251, "No": 1.212449222946382e-05}, "ground_truth": 0}, {"key": "37400481", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999690523188893, "res": {"Yes": 0.9999690523188893, "No": 3.081127079561608e-05}, "ground_truth": 1}, {"key": "37400481", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995277320236443, "res": {"Yes": 0.9995277320236443, "No": 0.0004721732899657895}, "ground_truth": 0}, {"key": "37400481", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998334229554968, "res": {"Yes": 0.9998334229554968, "No": 0.00016650453694435092}, "ground_truth": 0}, {"key": "38787241", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9999555828164723, "res": {"Yes": 0.9999555828164723, "No": 4.4375368765268045e-05}, "ground_truth": 0}, {"key": "38787241", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.999992773397112, "res": {"Yes": 0.999992773397112, "No": 7.094859856348452e-06}, "ground_truth": 0}, {"key": "38787241", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999958726752174, "res": {"Yes": 0.9999958726752174, "No": 4.09210111405732e-06}, "ground_truth": 1}, {"key": "38787241", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999894357248024, "res": {"Yes": 0.9999894357248024, "No": 1.0526970237003248e-05}, "ground_truth": 0}, {"key": "38787241", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999808531883025, "res": {"Yes": 0.9999808531883025, "No": 1.9070384042238598e-05}, "ground_truth": 0}, {"key": "38225963", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9941839302007737, "res": {"Yes": 0.9941839302007737, "No": 0.005816008949382227}, "ground_truth": 0}, {"key": "38225963", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9844735674188081, "res": {"Yes": 0.9844735674188081, "No": 0.015526081371958253}, "ground_truth": 0}, {"key": "38225963", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.99999861435166, "res": {"Yes": 0.99999861435166, "No": 1.368887762560894e-06}, "ground_truth": 1}, {"key": "38225963", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999963494876631, "res": {"Yes": 0.9999963494876631, "No": 3.539985241128581e-06}, "ground_truth": 0}, {"key": "38225963", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9836778349934888, "res": {"Yes": 0.9836778349934888, "No": 0.01632205529797015}, "ground_truth": 0}, {"key": "26072034", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.30253648415712836, "res": {"No": 0.69746259079064, "Yes": 0.30253648415712836}, "ground_truth": 0}, {"key": "26072034", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9756944920604812, "res": {"Yes": 0.9756944920604812, "No": 0.024305109092865908}, "ground_truth": 0}, {"key": "26072034", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9952263920559793, "res": {"Yes": 0.9952263920559793, "No": 0.004773559623899117}, "ground_truth": 1}, {"key": "26072034", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9022807451069224, "res": {"Yes": 0.9022807451069224, "No": 0.09771881100818211}, "ground_truth": 0}, {"key": "26072034", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9748810114209309, "res": {"Yes": 0.9748810114209309, "No": 0.025118606817674465}, "ground_truth": 0}, {"key": "35690810", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.0034970623202457405, "res": {"No": 0.9965028643729018, "Yes": 0.0034970623202457405}, "ground_truth": 0}, {"key": "35690810", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.999471890129134, "res": {"Yes": 0.999471890129134, "No": 0.0005280356804137559}, "ground_truth": 0}, {"key": "35690810", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9974880250027038, "res": {"Yes": 0.9974880250027038, "No": 0.0025119222055096156}, "ground_truth": 1}, {"key": "35690810", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994281847486902, "res": {"Yes": 0.9994281847486902, "No": 0.0005718047334234842}, "ground_truth": 0}, {"key": "35690810", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999685755197594, "res": {"Yes": 0.9999685755197594, "No": 3.13998638828671e-05}, "ground_truth": 0}, {"key": "36855665", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.009783724883631719, "res": {"No": 0.9902159433928296, "Yes": 0.009783724883631719}, "ground_truth": 0}, {"key": "36855665", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.0005492556280285646, "res": {"No": 0.9994506943236344, "Yes": 0.0005492556280285646}, "ground_truth": 0}, {"key": "36855665", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9025484734712425, "res": {"Yes": 0.9025484734712425, "No": 0.09745105763337313}, "ground_truth": 1}, {"key": "36855665", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5359324242606103, "res": {"Yes": 0.5359324242606103, "No": 0.46406745971814783}, "ground_truth": 0}, {"key": "36855665", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9191309177738838, "res": {"Yes": 0.9191309177738838, "No": 0.08086889628361509}, "ground_truth": 0}, {"key": "29757662", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.8799389402471453, "res": {"Yes": 0.8799389402471453, "No": 0.12006063165634395}, "ground_truth": 0}, {"key": "29757662", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999262604448946, "res": {"Yes": 0.9999262604448946, "No": 7.36578378858746e-05}, "ground_truth": 0}, {"key": "29757662", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999711979237877, "res": {"Yes": 0.9999711979237877, "No": 2.8774470641447532e-05}, "ground_truth": 1}, {"key": "29757662", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996525586317364, "res": {"Yes": 0.9996525586317364, "No": 0.000347355780238791}, "ground_truth": 0}, {"key": "29757662", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999753699393249, "res": {"Yes": 0.9999753699393249, "No": 2.4533181739821655e-05}, "ground_truth": 0}, {"key": "19134339", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994883243506565, "res": {"Yes": 0.9994883243506565, "No": 0.0005116498774247966}, "ground_truth": 0}, {"key": "19134339", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9872993092503426, "res": {"Yes": 0.9872993092503426, "No": 0.012700587839677533}, "ground_truth": 1}, {"key": "19134339", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9655023240709469, "res": {"Yes": 0.9655023240709469, "No": 0.03449767667382954}, "ground_truth": 0}, {"key": "19134339", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9912047673535976, "res": {"Yes": 0.9912047673535976, "No": 0.008795122258320793}, "ground_truth": 0}, {"key": "35360732", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9989931836800044, "res": {"Yes": 0.9989931836800044, "No": 0.0010067996926271412}, "ground_truth": 0}, {"key": "35360732", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9990439750832933, "res": {"Yes": 0.9990439750832933, "No": 0.0009559365666412928}, "ground_truth": 0}, {"key": "35360732", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9976304919267389, "res": {"Yes": 0.9976304919267389, "No": 0.0023694422241353277}, "ground_truth": 1}, {"key": "35360732", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996343306233494, "res": {"Yes": 0.9996343306233494, "No": 0.0003656000798413013}, "ground_truth": 0}, {"key": "35360732", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9914707075788064, "res": {"Yes": 0.9914707075788064, "No": 0.008529252100881346}, "ground_truth": 0}, {"key": "37713629", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.0003511233400744841, "res": {"No": 0.9996486299449218, "Yes": 0.0003511233400744841}, "ground_truth": 0}, {"key": "37713629", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.935132540681623, "res": {"Yes": 0.935132540681623, "No": 0.06486719195246599}, "ground_truth": 0}, {"key": "37713629", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995876466410251, "res": {"Yes": 0.9995876466410251, "No": 0.0004123174481842461}, "ground_truth": 1}, {"key": "37713629", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9993212764870781, "res": {"Yes": 0.9993212764870781, "No": 0.0006786052290499048}, "ground_truth": 0}, {"key": "37713629", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.0020490262472954027, "res": {"No": 0.9979506941005436, "Yes": 0.0020490262472954027}, "ground_truth": 0}, {"key": "33393394", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 3.7568568022939473e-07, "res": {"No": 0.9999994487765019, "Yes": 3.7568568022939473e-07}, "ground_truth": 0}, {"key": "33393394", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9986850334272112, "res": {"Yes": 0.9986850334272112, "No": 0.001314904815003521}, "ground_truth": 0}, {"key": "33393394", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998965813680755, "res": {"Yes": 0.9998965813680755, "No": 0.00010333525009694034}, "ground_truth": 1}, {"key": "33393394", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7720462665414215, "res": {"Yes": 0.7720462665414215, "No": 0.22795366371539857}, "ground_truth": 0}, {"key": "33393394", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.996995089209132, "res": {"Yes": 0.996995089209132, "No": 0.003004845880492226}, "ground_truth": 0}, {"key": "32275837", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9101582713808768, "res": {"Yes": 0.9101582713808768, "No": 0.08984156938841455}, "ground_truth": 0}, {"key": "32275837", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999905085465441, "res": {"Yes": 0.9999905085465441, "No": 9.415182613747278e-06}, "ground_truth": 0}, {"key": "32275837", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999648803297101, "res": {"Yes": 0.9999648803297101, "No": 3.50673170306266e-05}, "ground_truth": 1}, {"key": "32275837", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999996945503965, "res": {"Yes": 0.999996945503965, "No": 2.9933451998828954e-06}, "ground_truth": 0}, {"key": "32275837", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999046864026295, "res": {"Yes": 0.9999046864026295, "No": 9.510899578351138e-05}, "ground_truth": 0}, {"key": "21458094", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9968433163358487, "res": {"Yes": 0.9968433163358487, "No": 0.0031566766431767334}, "ground_truth": 0}, {"key": "21458094", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9852908442937377, "res": {"Yes": 0.9852908442937377, "No": 0.014709084248170565}, "ground_truth": 0}, {"key": "21458094", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999118379724999, "res": {"Yes": 0.9999118379724999, "No": 8.804944731382689e-05}, "ground_truth": 1}, {"key": "21458094", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998899066707154, "res": {"Yes": 0.9998899066707154, "No": 0.00010990337360314006}, "ground_truth": 0}, {"key": "21458094", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998353298996095, "res": {"Yes": 0.9998353298996095, "No": 0.00016452483936150308}, "ground_truth": 0}, {"key": "40975362", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9992475934998339, "res": {"Yes": 0.9992475934998339, "No": 0.0007522905040765208}, "ground_truth": 0}, {"key": "40975362", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9987563763416298, "res": {"Yes": 0.9987563763416298, "No": 0.0012436105454987159}, "ground_truth": 0}, {"key": "40975362", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999478349506435, "res": {"Yes": 0.9999478349506435, "No": 5.209721841383426e-05}, "ground_truth": 1}, {"key": "40975362", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9984248398689854, "res": {"Yes": 0.9984248398689854, "No": 0.0015751323161172946}, "ground_truth": 0}, {"key": "40975362", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9737871140172767, "res": {"Yes": 0.9737871140172767, "No": 0.026212811478542423}, "ground_truth": 0}, {"key": "35234201", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.06492968914588501, "res": {"No": 0.935069884224906, "Yes": 0.06492968914588501}, "ground_truth": 0}, {"key": "35234201", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 5.762730831054125e-05, "res": {"No": 0.9999422326855956, "Yes": 5.762730831054125e-05}, "ground_truth": 0}, {"key": "35234201", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999412791111388, "res": {"Yes": 0.9999412791111388, "No": 5.866507398181451e-05}, "ground_truth": 1}, {"key": "35234201", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.25859697766749823, "res": {"No": 0.7414020672229276, "Yes": 0.25859697766749823}, "ground_truth": 0}, {"key": "35234201", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9965092641341476, "res": {"Yes": 0.9965092641341476, "No": 0.0034905428864795652}, "ground_truth": 0}, {"key": "36037573", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.8728027095797531, "res": {"Yes": 0.8728027095797531, "No": 0.12719706528513672}, "ground_truth": 0}, {"key": "36037573", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999214926618624, "res": {"Yes": 0.9999214926618624, "No": 7.845643934496474e-05}, "ground_truth": 0}, {"key": "36037573", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9948002029918331, "res": {"Yes": 0.9948002029918331, "No": 0.00519974340398385}, "ground_truth": 1}, {"key": "36037573", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999591587700257, "res": {"Yes": 0.9999591587700257, "No": 4.0708078348100276e-05}, "ground_truth": 0}, {"key": "36037573", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998760879877276, "res": {"Yes": 0.9998760879877276, "No": 0.00012387370741003082}, "ground_truth": 0}, {"key": "30861915", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.999185580969351, "res": {"Yes": 0.999185580969351, "No": 0.0008144002542296721}, "ground_truth": 0}, {"key": "30861915", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999893165220688, "res": {"Yes": 0.9999893165220688, "No": 1.058321820098439e-05}, "ground_truth": 0}, {"key": "30861915", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, "No": 1.046444845734128e-06}, "ground_truth": 1}, {"key": "30861915", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, "No": 1.1212123530637087e-06}, "ground_truth": 0}, {"key": "30861915", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999473581626301, "res": {"Yes": 0.9999473581626301, "No": 5.257478109789814e-05}, "ground_truth": 0}, {"key": "40173012", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9998890723329287, "res": {"Yes": 0.9998890723329287, "No": 0.00011079576570945087}, "ground_truth": 0}, {"key": "40173012", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998412891059067, "res": {"Yes": 0.9998412891059067, "No": 0.00015848699792986174}, "ground_truth": 0}, {"key": "40173012", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999809723900273, "res": {"Yes": 0.9999809723900273, "No": 1.89014862769216e-05}, "ground_truth": 1}, {"key": "40173012", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998725123372428, "res": {"Yes": 0.9998725123372428, "No": 0.0001274543878147964}, "ground_truth": 0}, {"key": "40173012", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9854195479405283, "res": {"Yes": 0.9854195479405283, "No": 0.014580212175559522}, "ground_truth": 0}, {"key": "35100330", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.00011435990742677973, "res": {"No": 0.9998854966360107, "Yes": 0.00011435990742677973}, "ground_truth": 0}, {"key": "35100330", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999818067994983, "res": {"Yes": 0.9999818067994983, "No": 1.807147192216335e-05}, "ground_truth": 0}, {"key": "35100330", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999984951481323, "res": {"Yes": 0.9999984951481323, "No": 1.3966151483522041e-06}, "ground_truth": 1}, {"key": "35100330", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999694099178861, "res": {"Yes": 0.9999694099178861, "No": 3.055971254378353e-05}, "ground_truth": 0}, {"key": "35100330", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999959918780326, "res": {"Yes": 0.9999959918780326, "No": 3.956665234396354e-06}, "ground_truth": 0}, {"key": "37220221", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.3227426250064379, "res": {"No": 0.6772569743678786, "Yes": 0.3227426250064379}, "ground_truth": 0}, {"key": "37220221", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9983482019225834, "res": {"Yes": 0.9983482019225834, "No": 0.0016517129137632548}, "ground_truth": 0}, {"key": "37220221", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9807308960513595, "res": {"Yes": 0.9807308960513595, "No": 0.01926904801914722}, "ground_truth": 1}, {"key": "37220221", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9989956806665876, "res": {"Yes": 0.9989956806665876, "No": 0.0010042939717726021}, "ground_truth": 0}, {"key": "37220221", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998895490961502, "res": {"Yes": 0.9998895490961502, "No": 0.00011033217166795257}, "ground_truth": 0}, {"key": "38815218", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.8722440154393603, "res": {"Yes": 0.8722440154393603, "No": 0.12775588300996574}, "ground_truth": 0}, {"key": "38815218", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9610886082961628, "res": {"Yes": 0.9610886082961628, "No": 0.03891110085673525}, "ground_truth": 0}, {"key": "38815218", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7143671022230259, "res": {"Yes": 0.7143671022230259, "No": 0.28563254555059514}, "ground_truth": 1}, {"key": "38815218", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7735536587586389, "res": {"Yes": 0.7735536587586389, "No": 0.2264459033255582}, "ground_truth": 0}, {"key": "38815218", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.011128772581106817, "res": {"No": 0.9888708661557156, "Yes": 0.011128772581106817}, "ground_truth": 0}, {"key": "39379109", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9420762395272454, "res": {"Yes": 0.9420762395272454, "No": 0.057923708788736136}, "ground_truth": 0}, {"key": "39379109", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999874092852638, "res": {"Yes": 0.9999874092852638, "No": 1.2469585981329048e-05}, "ground_truth": 0}, {"key": "39379109", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999858596579756, "res": {"Yes": 0.9999858596579756, "No": 1.4029742746172684e-05}, "ground_truth": 1}, {"key": "39379109", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999608275572601, "res": {"Yes": 0.9999608275572601, "No": 3.9087923522405014e-05}, "ground_truth": 0}, {"key": "39379109", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999471197682087, "res": {"Yes": 0.9999471197682087, "No": 5.273506255617657e-05}, "ground_truth": 0}, {"key": "14576125", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9999953958625991, "res": {"Yes": 0.9999953958625991, "No": 4.587460293449034e-06}, "ground_truth": 0}, {"key": "14576125", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999982567412194, "res": {"Yes": 0.9999982567412194, "No": 1.7159082940883785e-06}, "ground_truth": 0}, {"key": "14576125", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 8.239343356149669e-07}, "ground_truth": 1}, {"key": "14576125", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 3.856037689538032e-07}, "ground_truth": 0}, {"key": "14576125", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999974223173222, "res": {"Yes": 0.9999974223173222, "No": 2.487361619034486e-06}, "ground_truth": 0}, {"key": "40814250", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.999736672396801, "res": {"Yes": 0.999736672396801, "No": 0.00026322628407297356}, "ground_truth": 0}, {"key": "40814250", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, "No": 1.0212062820118817e-06}, "ground_truth": 0}, {"key": "40814250", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 7.741091853159261e-08}, "ground_truth": 1}, {"key": "40814250", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "No": 2.5106551724230435e-08}, "ground_truth": 0}, {"key": "40814250", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 1.7180460900850046e-07}, "ground_truth": 0}, {"key": "36334488", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9968965140994877, "res": {"Yes": 0.9968965140994877, "No": 0.0031034961892474834}, "ground_truth": 0}, {"key": "36334488", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9803754543542191, "res": {"Yes": 0.9803754543542191, "No": 0.019624372310568954}, "ground_truth": 0}, {"key": "36334488", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998779876841082, "res": {"Yes": 0.9998779876841082, "No": 0.00012190177018090498}, "ground_truth": 1}, {"key": "36334488", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999332929750125, "res": {"Yes": 0.9999332929750125, "No": 6.664103693351038e-05}, "ground_truth": 0}, {"key": "36334488", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9849566361424554, "res": {"Yes": 0.9849566361424554, "No": 0.015043090442796097}, "ground_truth": 0}, {"key": "36888322", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.8683815649129756, "res": {"Yes": 0.8683815649129756, "No": 0.13161814363857818}, "ground_truth": 0}, {"key": "36888322", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.7357120082257098, "res": {"Yes": 0.7357120082257098, "No": 0.2642878576423195}, "ground_truth": 0}, {"key": "36888322", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8742028993353731, "res": {"Yes": 0.8742028993353731, "No": 0.1257968216673982}, "ground_truth": 1}, {"key": "36888322", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9385235031080982, "res": {"Yes": 0.9385235031080982, "No": 0.06147639493766678}, "ground_truth": 0}, {"key": "36888322", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9904825196552662, "res": {"Yes": 0.9904825196552662, "No": 0.009517341722508395}, "ground_truth": 0}, {"key": "37318916", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 5.8041632685162116e-05, "res": {"No": 0.9999417558982535, "Yes": 5.8041632685162116e-05}, "ground_truth": 0}, {"key": "37318916", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.00040332293071093, "res": {"No": 0.9995955072092758, "Yes": 0.00040332293071093}, "ground_truth": 1}, {"key": "37318916", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 3.277803885213811e-07, "res": {"No": 0.9999995679800934, "Yes": 3.277803885213811e-07}, "ground_truth": 0}, {"key": "37318916", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 3.177998378742917e-06, "res": {"No": 0.9999967070975216, "Yes": 3.177998378742917e-06}, "ground_truth": 0}, {"key": "39308700", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 5.191603717845504e-07, "res": {"No": 0.999999091165773, "Yes": 5.191603717845504e-07}, "ground_truth": 0}, {"key": "39308700", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.04855147517749615, "res": {"No": 0.9514483161577446, "Yes": 0.04855147517749615}, "ground_truth": 0}, {"key": "39308700", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994160403408493, "res": {"Yes": 0.9994160403408493, "No": 0.0005838769496447913}, "ground_truth": 1}, {"key": "39308700", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9328522041071217, "res": {"Yes": 0.9328522041071217, "No": 0.06714722380865541}, "ground_truth": 0}, {"key": "39308700", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.47201665877715726, "res": {"No": 0.5279826265914975, "Yes": 0.47201665877715726}, "ground_truth": 0}, {"key": "31061543", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.0009435580205762896, "res": {"No": 0.9990563457819428, "Yes": 0.0009435580205762896}, "ground_truth": 0}, {"key": "31061543", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999924157887603, "res": {"Yes": 0.9999924157887603, "No": 7.520988343361616e-06}, "ground_truth": 0}, {"key": "31061543", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999545100307, "res": {"Yes": 0.9999545100307, "No": 4.5459533561875766e-05}, "ground_truth": 1}, {"key": "31061543", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999984951481323, "res": {"Yes": 0.9999984951481323, "No": 1.4712090679990952e-06}, "ground_truth": 0}, {"key": "31061543", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9989069323341495, "res": {"Yes": 0.9989069323341495, "No": 0.0010929891463318851}, "ground_truth": 0}, {"key": "37380894", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.916181334932651, "res": {"Yes": 0.916181334932651, "No": 0.08381858651210866}, "ground_truth": 0}, {"key": "37380894", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999912237625114, "res": {"Yes": 0.9999912237625114, "No": 8.70796007035845e-06}, "ground_truth": 0}, {"key": "37380894", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 1.0271167970931786e-07}, "ground_truth": 1}, {"key": "37380894", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, "No": 1.0153176893597113e-06}, "ground_truth": 0}, {"key": "37380894", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 2.435537937333362e-07}, "ground_truth": 0}, {"key": "38410139", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.5510103422729627, "res": {"Yes": 0.5510103422729627, "No": 0.4489895973472161}, "ground_truth": 0}, {"key": "38410139", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999331737729718, "res": {"Yes": 0.9999331737729718, "No": 6.671224758067224e-05}, "ground_truth": 0}, {"key": "38410139", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999688139202959, "res": {"Yes": 0.9999688139202959, "No": 3.1073723482854496e-05}, "ground_truth": 1}, {"key": "38410139", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999827604126034, "res": {"Yes": 0.9999827604126034, "No": 1.7183791037606835e-05}, "ground_truth": 0}, {"key": "38410139", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9914404701662594, "res": {"Yes": 0.9914404701662594, "No": 0.008559499159024382}, "ground_truth": 0}, {"key": "35953842", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 1.4274400297154248e-07, "res": {"No": 0.9999998063873687, "Yes": 1.4274400297154248e-07}, "ground_truth": 0}, {"key": "35953842", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9956606204336138, "res": {"Yes": 0.9956606204336138, "No": 0.004339307091294573}, "ground_truth": 0}, {"key": "35953842", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999900317366834, "res": {"Yes": 0.9999900317366834, "No": 9.946239678330828e-06}, "ground_truth": 1}, {"key": "35953842", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996856789392184, "res": {"Yes": 0.9996856789392184, "No": 0.00031427913562328153}, "ground_truth": 0}, {"key": "35953842", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.986227262092864, "res": {"Yes": 0.986227262092864, "No": 0.013772602311013193}, "ground_truth": 0}, {"key": "39815663", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.18521721639399255, "res": {"No": 0.8147824857603436, "Yes": 0.18521721639399255}, "ground_truth": 0}, {"key": "39815663", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999610659559496, "res": {"Yes": 0.9999610659559496, "No": 3.882442496444719e-05}, "ground_truth": 0}, {"key": "39815663", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9979041617425155, "res": {"Yes": 0.9979041617425155, "No": 0.002095831933787417}, "ground_truth": 1}, {"key": "39815663", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995452395349003, "res": {"Yes": 0.9995452395349003, "No": 0.0004547097293806057}, "ground_truth": 0}, {"key": "39815663", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9965402846548899, "res": {"Yes": 0.9965402846548899, "No": 0.0034596702428121074}, "ground_truth": 0}, {"key": "35121432", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.868920641083684, "res": {"Yes": 0.868920641083684, "No": 0.13107885373027361}, "ground_truth": 0}, {"key": "35121432", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996836531082333, "res": {"Yes": 0.9996836531082333, "No": 0.00031628084154678186}, "ground_truth": 0}, {"key": "35121432", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9987210576447628, "res": {"Yes": 0.9987210576447628, "No": 0.0012788687255543233}, "ground_truth": 1}, {"key": "35121432", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9984858090456715, "res": {"Yes": 0.9984858090456715, "No": 0.0015140849740703755}, "ground_truth": 0}, {"key": "35121432", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9643978712582362, "res": {"Yes": 0.9643978712582362, "No": 0.0356010267588262}, "ground_truth": 0}, {"key": "21712310", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9933287755835809, "res": {"Yes": 0.9933287755835809, "No": 0.006671254872123189}, "ground_truth": 0}, {"key": "21712310", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.999711292653989, "res": {"Yes": 0.999711292653989, "No": 0.00028866347347385315}, "ground_truth": 0}, {"key": "21712310", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995136763936793, "res": {"Yes": 0.9995136763936793, "No": 0.00048622664258513856}, "ground_truth": 1}, {"key": "21712310", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999523644646081, "res": {"Yes": 0.9999523644646081, "No": 4.756320974920453e-05}, "ground_truth": 0}, {"key": "21712310", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999054015547151, "res": {"Yes": 0.9999054015547151, "No": 9.452965151834778e-05}, "ground_truth": 0}, {"key": "37952914", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9132134006932795, "res": {"Yes": 0.9132134006932795, "No": 0.08678641315850233}, "ground_truth": 0}, {"key": "37952914", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9966180632744299, "res": {"Yes": 0.9966180632744299, "No": 0.0033819189293781368}, "ground_truth": 0}, {"key": "37952914", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999129107124922, "res": {"Yes": 0.9999129107124922, "No": 8.706587864986751e-05}, "ground_truth": 1}, {"key": "37952914", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995585806741905, "res": {"Yes": 0.9995585806741905, "No": 0.00044134984398193426}, "ground_truth": 0}, {"key": "37952914", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9962195558399731, "res": {"Yes": 0.9962195558399731, "No": 0.0037803109909377566}, "ground_truth": 0}, {"key": "38956779", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999151753838112, "res": {"Yes": 0.9999151753838112, "No": 8.474667885696513e-05}, "ground_truth": 0}, {"key": "38956779", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999266180325883, "res": {"Yes": 0.9999266180325883, "No": 7.329596487802287e-05}, "ground_truth": 1}, {"key": "38956779", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999918197754583, "res": {"Yes": 0.9999918197754583, "No": 8.050784643834738e-06}, "ground_truth": 0}, {"key": "38956779", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999484309397297, "res": {"Yes": 0.9999484309397297, "No": 5.1505034386223465e-05}, "ground_truth": 0}, {"key": "36101833", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.4392379435940966, "res": {"No": 0.5607618911560842, "Yes": 0.4392379435940966}, "ground_truth": 0}, {"key": "36101833", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.999817094909176, "res": {"Yes": 0.999817094909176, "No": 0.00018282118755478822}, "ground_truth": 0}, {"key": "36101833", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9992028406005102, "res": {"Yes": 0.9992028406005102, "No": 0.0007970918827977096}, "ground_truth": 1}, {"key": "36101833", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9965206277926824, "res": {"Yes": 0.9965206277926824, "No": 0.0034793088891159228}, "ground_truth": 0}, {"key": "36101833", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.27752179731432475, "res": {"No": 0.7224777177178905, "Yes": 0.27752179731432475}, "ground_truth": 0}, {"key": "35544662", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.016634931289468982, "res": {"No": 0.9833647772138353, "Yes": 0.016634931289468982}, "ground_truth": 0}, {"key": "35544662", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9985669879437401, "res": {"Yes": 0.9985669879437401, "No": 0.0014327766558917942}, "ground_truth": 0}, {"key": "35544662", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8570672603286679, "res": {"Yes": 0.8570672603286679, "No": 0.1429322839054522}, "ground_truth": 1}, {"key": "35544662", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999892767243843, "res": {"Yes": 0.999892767243843, "No": 0.00010715812614031168}, "ground_truth": 0}, {"key": "35544662", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.8486719526313787, "res": {"Yes": 0.8486719526313787, "No": 0.15132727552206243}, "ground_truth": 0}, {"key": "39759044", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9990019849486219, "res": {"Yes": 0.9990019849486219, "No": 0.000997713197707599}, "ground_truth": 0}, {"key": "39759044", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996552993429273, "res": {"Yes": 0.9996552993429273, "No": 0.0003446352315926558}, "ground_truth": 0}, {"key": "39759044", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9980372544092506, "res": {"Yes": 0.9980372544092506, "No": 0.001962613059560435}, "ground_truth": 1}, {"key": "39759044", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997402475715051, "res": {"Yes": 0.9997402475715051, "No": 0.00025960741769164735}, "ground_truth": 0}, {"key": "39759044", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.999269008604483, "res": {"Yes": 0.999269008604483, "No": 0.0007309206164054747}, "ground_truth": 0}, {"key": "39433018", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 1.0006888881328313e-07, "res": {"No": 0.9999998063873687, "Yes": 1.0006888881328313e-07}, "ground_truth": 0}, {"key": "39433018", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9993413937363784, "res": {"Yes": 0.9993413937363784, "No": 0.0006585105603049246}, "ground_truth": 0}, {"key": "39433018", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998987268084759, "res": {"Yes": 0.9998987268084759, "No": 0.00010115869870256939}, "ground_truth": 1}, {"key": "39433018", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999522452652937, "res": {"Yes": 0.9999522452652937, "No": 4.7721143419951655e-05}, "ground_truth": 0}, {"key": "39433018", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9978538289255774, "res": {"Yes": 0.9978538289255774, "No": 0.0021460872039959556}, "ground_truth": 0}, {"key": "22111959", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9998921712939328, "res": {"Yes": 0.9998921712939328, "No": 0.00010767716719912943}, "ground_truth": 0}, {"key": "22111959", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999939654258081, "res": {"Yes": 0.9999939654258081, "No": 5.972021966772333e-06}, "ground_truth": 0}, {"key": "22111959", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998754920517581, "res": {"Yes": 0.9998754920517581, "No": 0.00012439208563257178}, "ground_truth": 1}, {"key": "22111959", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999753699393249, "res": {"Yes": 0.9999753699393249, "No": 2.4551343013151505e-05}, "ground_truth": 0}, {"key": "22111959", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.99997334352929, "res": {"Yes": 0.99997334352929, "No": 2.6548295817532542e-05}, "ground_truth": 0}, {"key": "38210094", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9691695511604491, "res": {"Yes": 0.9691695511604491, "No": 0.030830321776981338}, "ground_truth": 0}, {"key": "38210094", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999589203757908, "res": {"Yes": 0.9999589203757908, "No": 4.098776296817994e-05}, "ground_truth": 0}, {"key": "38210094", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999971839107652, "res": {"Yes": 0.9999971839107652, "No": 2.6997672294100817e-06}, "ground_truth": 1}, {"key": "38210094", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998254376677772, "res": {"Yes": 0.9998254376677772, "No": 0.00017451392850400632}, "ground_truth": 0}, {"key": "38210094", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 7.95176288765056e-07}, "ground_truth": 0}, {"key": "37675935", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.012973259519589535, "res": {"No": 0.9870264297943839, "Yes": 0.012973259519589535}, "ground_truth": 0}, {"key": "37675935", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.12046305145506465, "res": {"No": 0.879536864843165, "Yes": 0.12046305145506465}, "ground_truth": 0}, {"key": "37675935", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9985302587291516, "res": {"Yes": 0.9985302587291516, "No": 0.0014696902715759066}, "ground_truth": 1}, {"key": "37675935", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9985014982766018, "res": {"Yes": 0.9985014982766018, "No": 0.0014985144923746019}, "ground_truth": 0}, {"key": "37675935", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9995344042933366, "res": {"Yes": 0.9995344042933366, "No": 0.00046556771713553933}, "ground_truth": 0}, {"key": "35732604", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9975626394985112, "res": {"Yes": 0.9975626394985112, "No": 0.002437152327834835}, "ground_truth": 0}, {"key": "35732604", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.6994201810255453, "res": {"Yes": 0.6994201810255453, "No": 0.30057826790558345}, "ground_truth": 0}, {"key": "35732604", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999639931190339, "res": {"Yes": 0.999639931190339, "No": 0.00036003151092408334}, "ground_truth": 1}, {"key": "35732604", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7801267909568071, "res": {"Yes": 0.7801267909568071, "No": 0.21987116891475736}, "ground_truth": 0}, {"key": "35732604", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.04228917228697965, "res": {"No": 0.9577101731787674, "Yes": 0.04228917228697965}, "ground_truth": 0}, {"key": "27453212", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9998972965103645, "res": {"Yes": 0.9998972965103645, "No": 0.00010257062215508995}, "ground_truth": 0}, {"key": "27453212", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999940846288958, "res": {"Yes": 0.9999940846288958, "No": 5.853451913851207e-06}, "ground_truth": 0}, {"key": "27453212", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999902701413353, "res": {"Yes": 0.9999902701413353, "No": 9.62486214972001e-06}, "ground_truth": 1}, {"key": "27453212", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999843100330889, "res": {"Yes": 0.9999843100330889, "No": 1.5622193413682616e-05}, "ground_truth": 0}, {"key": "27453212", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999069510593213, "res": {"Yes": 0.9999069510593213, "No": 9.301060863244898e-05}, "ground_truth": 0}, {"key": "39910047", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 8.997002487987958e-05, "res": {"No": 0.9999099308804668, "Yes": 8.997002487987958e-05}, "ground_truth": 0}, {"key": "39910047", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9652001478988238, "res": {"Yes": 0.9652001478988238, "No": 0.03479982224907341}, "ground_truth": 0}, {"key": "39910047", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9368690603486952, "res": {"Yes": 0.9368690603486952, "No": 0.06313083247571107}, "ground_truth": 1}, {"key": "39910047", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9979942687725882, "res": {"Yes": 0.9979942687725882, "No": 0.0020056682138626628}, "ground_truth": 0}, {"key": "39910047", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.058653853659316385, "res": {"No": 0.941346087580007, "Yes": 0.058653853659316385}, "ground_truth": 0}, {"key": "40054265", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9999982567412194, "res": {"Yes": 0.9999982567412194, "No": 1.6434579989912453e-06}, "ground_truth": 0}, {"key": "40054265", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999765619466755, "res": {"Yes": 0.9999765619466755, "No": 2.3345717392165896e-05}, "ground_truth": 0}, {"key": "40054265", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 2.2202563250613026e-07}, "ground_truth": 1}, {"key": "40054265", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999928926002577, "res": {"Yes": 0.9999928926002577, "No": 6.973795035135228e-06}, "ground_truth": 0}, {"key": "40054265", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999915813694369, "res": {"Yes": 0.9999915813694369, "No": 8.338952074732468e-06}, "ground_truth": 0}, {"key": "19984615", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.0693929015390266, "res": {"No": 0.9306049876306864, "Yes": 0.0693929015390266}, "ground_truth": 0}, {"key": "19984615", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.001705592024620679, "res": {"No": 0.9982934403317713, "Yes": 0.001705592024620679}, "ground_truth": 0}, {"key": "19984615", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.0022204997407169313, "res": {"No": 0.9977791746897066, "Yes": 0.0022204997407169313}, "ground_truth": 1}, {"key": "19984615", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.06807935288341013, "res": {"No": 0.931920330729721, "Yes": 0.06807935288341013}, "ground_truth": 0}, {"key": "19984615", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.0005238647592444062, "res": {"No": 0.9994759372688684, "Yes": 0.0005238647592444062}, "ground_truth": 0}, {"key": "16490806", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.997157088747502, "res": {"Yes": 0.997157088747502, "No": 0.002842886129649118}, "ground_truth": 0}, {"key": "16490806", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996511286997496, "res": {"Yes": 0.9996511286997496, "No": 0.0003487627382666387}, "ground_truth": 0}, {"key": "16490806", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 7.62383596840394e-07}, "ground_truth": 1}, {"key": "16490806", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 8.841580506162246e-07}, "ground_truth": 0}, {"key": "16490806", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999359152835132, "res": {"Yes": 0.9999359152835132, "No": 6.398831944072631e-05}, "ground_truth": 0}, {"key": "36396237", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9479827481942823, "res": {"Yes": 0.9479827481942823, "No": 0.05201712956158168}, "ground_truth": 0}, {"key": "36396237", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.8017635019115023, "res": {"Yes": 0.8017635019115023, "No": 0.19823626541190054}, "ground_truth": 0}, {"key": "36396237", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999931310055916, "res": {"Yes": 0.9999931310055916, "No": 6.840942797065569e-06}, "ground_truth": 1}, {"key": "36396237", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999504572972803, "res": {"Yes": 0.9999504572972803, "No": 4.94012416238609e-05}, "ground_truth": 0}, {"key": "36396237", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999224462174501, "res": {"Yes": 0.9999224462174501, "No": 7.748540204939788e-05}, "ground_truth": 0}, {"key": "40726444", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.0038894236277625164, "res": {"No": 0.9961102393938636, "Yes": 0.0038894236277625164}, "ground_truth": 0}, {"key": "40726444", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9549543477950426, "res": {"Yes": 0.9549543477950426, "No": 0.045045544546834036}, "ground_truth": 0}, {"key": "40726444", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9985883811724799, "res": {"Yes": 0.9985883811724799, "No": 0.001411551958653435}, "ground_truth": 1}, {"key": "40726444", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998151916591657, "res": {"Yes": 0.9998151916591657, "No": 0.0001847692146547459}, "ground_truth": 0}, {"key": "40726444", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.15653081303176777, "res": {"No": 0.8434678691632062, "Yes": 0.15653081303176777}, "ground_truth": 0}, {"key": "37314826", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9999772771531678, "res": {"Yes": 0.9999772771531678, "No": 2.2392353925176118e-05}, "ground_truth": 0}, {"key": "37314826", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999845484373766, "res": {"Yes": 0.9999845484373766, "No": 1.5428347405726374e-05}, "ground_truth": 0}, {"key": "37314826", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 2.1730064096728443e-07}, "ground_truth": 1}, {"key": "37314826", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999959918780326, "res": {"Yes": 0.9999959918780326, "No": 3.926516154170492e-06}, "ground_truth": 0}, {"key": "37314826", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999847868417213, "res": {"Yes": 0.9999847868417213, "No": 1.5132769499824725e-05}, "ground_truth": 0}, {"key": "38506971", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.04287362259826809, "res": {"No": 0.9571262275603548, "Yes": 0.04287362259826809}, "ground_truth": 0}, {"key": "38506971", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.997762916708957, "res": {"Yes": 0.997762916708957, "No": 0.002237042495516293}, "ground_truth": 0}, {"key": "38506971", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999031369065324, "res": {"Yes": 0.9999031369065324, "No": 9.681138083336589e-05}, "ground_truth": 1}, {"key": "38506971", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995339277054509, "res": {"Yes": 0.9995339277054509, "No": 0.00046601161465354123}, "ground_truth": 0}, {"key": "38506971", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9971525902828745, "res": {"Yes": 0.9971525902828745, "No": 0.0028473128190282205}, "ground_truth": 0}, {"key": "40699312", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9999367496503877, "res": {"Yes": 0.9999367496503877, "No": 6.32277039007526e-05}, "ground_truth": 0}, {"key": "40699312", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9992024832356782, "res": {"Yes": 0.9992024832356782, "No": 0.0007975082328784513}, "ground_truth": 0}, {"key": "40699312", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "No": 9.594554049143258e-07}, "ground_truth": 1}, {"key": "40699312", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.99998752848777, "res": {"Yes": 0.99998752848777, "No": 1.2410173229268711e-05}, "ground_truth": 0}, {"key": "40699312", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9993194896522671, "res": {"Yes": 0.9993194896522671, "No": 0.0006803950313580651}, "ground_truth": 0}, {"key": "34695474", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9770193173757196, "res": {"Yes": 0.9770193173757196, "No": 0.022980533292971825}, "ground_truth": 0}, {"key": "34695474", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.999657086808173, "res": {"Yes": 0.999657086808173, "No": 0.0003427793542030362}, "ground_truth": 0}, {"key": "34695474", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999495037169802, "res": {"Yes": 0.9999495037169802, "No": 5.039382047636773e-05}, "ground_truth": 1}, {"key": "34695474", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9986995390331915, "res": {"Yes": 0.9986995390331915, "No": 0.001300412964447319}, "ground_truth": 0}, {"key": "34695474", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.987459930242251, "res": {"Yes": 0.987459930242251, "No": 0.012539582973902516}, "ground_truth": 0}, {"key": "36281498", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.14571590632248574, "res": {"No": 0.8542834599079434, "Yes": 0.14571590632248574}, "ground_truth": 0}, {"key": "36281498", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999726283256111, "res": {"Yes": 0.9999726283256111, "No": 2.7337559376058927e-05}, "ground_truth": 0}, {"key": "36281498", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999971839107652, "res": {"Yes": 0.9999971839107652, "No": 2.692754259167864e-06}, "ground_truth": 1}, {"key": "36281498", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999932502087799, "res": {"Yes": 0.9999932502087799, "No": 6.670478243049408e-06}, "ground_truth": 0}, {"key": "36281498", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999977799274644, "res": {"Yes": 0.9999977799274644, "No": 2.0909459222489672e-06}, "ground_truth": 0}, {"key": "39558652", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9965080801821943, "res": {"Yes": 0.9965080801821943, "No": 0.0034919358909771864}, "ground_truth": 0}, {"key": "39558652", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.993556787092703, "res": {"Yes": 0.993556787092703, "No": 0.006443130851092255}, "ground_truth": 0}, {"key": "39558652", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999771579518836, "res": {"Yes": 0.9999771579518836, "No": 2.2756175473919124e-05}, "ground_truth": 1}, {"key": "39558652", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997186811775627, "res": {"Yes": 0.9997186811775627, "No": 0.0002812774525900631}, "ground_truth": 0}, {"key": "39558652", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999919389784903, "res": {"Yes": 0.9999919389784903, "No": 7.936263384193358e-06}, "ground_truth": 0}, {"key": "37330579", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.006455767252577072, "res": {"No": 0.9935441875809081, "Yes": 0.006455767252577072}, "ground_truth": 0}, {"key": "37330579", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9472363114681822, "res": {"Yes": 0.9472363114681822, "No": 0.052763463867480466}, "ground_truth": 1}, {"key": "37330579", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9905714095450098, "res": {"Yes": 0.9905714095450098, "No": 0.00942846475442169}, "ground_truth": 0}, {"key": "37330579", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.895567762793255, "res": {"Yes": 0.895567762793255, "No": 0.10443197254371081}, "ground_truth": 0}, {"key": "40547658", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9242806497153043, "res": {"Yes": 0.9242806497153043, "No": 0.07571865618190228}, "ground_truth": 0}, {"key": "40547658", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.8897861553626594, "res": {"Yes": 0.8897861553626594, "No": 0.11021348735070761}, "ground_truth": 0}, {"key": "40547658", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9980932925805337, "res": {"Yes": 0.9980932925805337, "No": 0.0019066013878237904}, "ground_truth": 1}, {"key": "40547658", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9777334422827536, "res": {"Yes": 0.9777334422827536, "No": 0.022266069640934295}, "ground_truth": 0}, {"key": "40547658", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9435556956690461, "res": {"Yes": 0.9435556956690461, "No": 0.05644335557048693}, "ground_truth": 0}, {"key": "37119340", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 1.5762830222589548e-06, "res": {"No": 0.9999983759447187, "Yes": 1.5762830222589548e-06}, "ground_truth": 0}, {"key": "37119340", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998885955719342, "res": {"Yes": 0.9998885955719342, "No": 0.00011127933347364247}, "ground_truth": 0}, {"key": "37119340", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 5.034456877601794e-07}, "ground_truth": 1}, {"key": "37119340", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999958726752174, "res": {"Yes": 0.9999958726752174, "No": 4.103683799374956e-06}, "ground_truth": 0}, {"key": "37119340", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999884821053314, "res": {"Yes": 0.9999884821053314, "No": 1.1404791315818689e-05}, "ground_truth": 0}, {"key": "35301627", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.0009142687431717518, "res": {"No": 0.9990856170431082, "Yes": 0.0009142687431717518}, "ground_truth": 0}, {"key": "35301627", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9956807134659404, "res": {"Yes": 0.9956807134659404, "No": 0.0043189325593569776}, "ground_truth": 0}, {"key": "35301627", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999917005724405, "res": {"Yes": 0.9999917005724405, "No": 8.218564493286706e-06}, "ground_truth": 1}, {"key": "35301627", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9786399376053174, "res": {"Yes": 0.9786399376053174, "No": 0.021359954394102137}, "ground_truth": 0}, {"key": "35301627", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.991873638852323, "res": {"Yes": 0.991873638852323, "No": 0.008125995785992398}, "ground_truth": 0}, {"key": "34037168", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.3148851216020857, "res": {"No": 0.6851145009617596, "Yes": 0.3148851216020857}, "ground_truth": 0}, {"key": "34037168", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.7868954937837823, "res": {"Yes": 0.7868954937837823, "No": 0.2131029279880243}, "ground_truth": 0}, {"key": "34037168", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994031813071685, "res": {"Yes": 0.9994031813071685, "No": 0.000596189731569073}, "ground_truth": 1}, {"key": "34037168", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996277878481368, "res": {"Yes": 0.9996277878481368, "No": 0.0003720071810424095}, "ground_truth": 0}, {"key": "34037168", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.6123303588916634, "res": {"Yes": 0.6123303588916634, "No": 0.3876649668030703}, "ground_truth": 0}, {"key": "39703862", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9938582449596638, "res": {"Yes": 0.9938582449596638, "No": 0.0061416674624722315}, "ground_truth": 0}, {"key": "39703862", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9683832641766328, "res": {"Yes": 0.9683832641766328, "No": 0.031616299007099655}, "ground_truth": 0}, {"key": "39703862", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9896610304187095, "res": {"Yes": 0.9896610304187095, "No": 0.010338644617225745}, "ground_truth": 1}, {"key": "39703862", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9929628544881809, "res": {"Yes": 0.9929628544881809, "No": 0.007037044586557244}, "ground_truth": 0}, {"key": "39703862", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9849470348319632, "res": {"Yes": 0.9849470348319632, "No": 0.015052648849273868}, "ground_truth": 0}, {"key": "16554814", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.0025397396581403123, "res": {"No": 0.9974580158273922, "Yes": 0.0025397396581403123}, "ground_truth": 0}, {"key": "16554814", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.5970420395870562, "res": {"Yes": 0.5970420395870562, "No": 0.40294612936616125}, "ground_truth": 0}, {"key": "16554814", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9985863611300869, "res": {"Yes": 0.9985863611300869, "No": 0.0014135746076730038}, "ground_truth": 1}, {"key": "16554814", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4506516646950347, "res": {"No": 0.54934731798934, "Yes": 0.4506516646950347}, "ground_truth": 0}, {"key": "16554814", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9986035958808205, "res": {"Yes": 0.9986035958808205, "No": 0.0013963094921021785}, "ground_truth": 0}, {"key": "32983099", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.0012057068337352707, "res": {"No": 0.9987940662232532, "Yes": 0.0012057068337352707}, "ground_truth": 0}, {"key": "32983099", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999937270200753, "res": {"Yes": 0.9999937270200753, "No": 6.139133190235433e-06}, "ground_truth": 0}, {"key": "32983099", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999978991308068, "res": {"Yes": 0.9999978991308068, "No": 1.9722135826831963e-06}, "ground_truth": 1}, {"key": "32983099", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997478709598699, "res": {"Yes": 0.9997478709598699, "No": 0.00025204573470374164}, "ground_truth": 0}, {"key": "32983099", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.999957132395842, "res": {"Yes": 0.999957132395842, "No": 4.2742406674808654e-05}, "ground_truth": 0}, {"key": "41072994", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9856232034682896, "res": {"Yes": 0.9856232034682896, "No": 0.014376540971882823}, "ground_truth": 0}, {"key": "41072994", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.8905137460888745, "res": {"Yes": 0.8905137460888745, "No": 0.10948611953478496}, "ground_truth": 0}, {"key": "41072994", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999588238736947, "res": {"Yes": 0.999588238736947, "No": 0.0004116387556126719}, "ground_truth": 1}, {"key": "41072994", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9886021196687048, "res": {"Yes": 0.9886021196687048, "No": 0.011397759249077846}, "ground_truth": 0}, {"key": "41072994", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.02804627372125587, "res": {"No": 0.9719535312698989, "Yes": 0.02804627372125587}, "ground_truth": 0}, {"key": "38396247", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.00012016425012816874, "res": {"No": 0.9998797755275411, "Yes": 0.00012016425012816874}, "ground_truth": 0}, {"key": "38396247", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998844239234294, "res": {"Yes": 0.9998844239234294, "No": 0.00011549919473708727}, "ground_truth": 0}, {"key": "38396247", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999746547351966, "res": {"Yes": 0.9999746547351966, "No": 2.529628527717669e-05}, "ground_truth": 1}, {"key": "38396247", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999748705179834, "res": {"Yes": 0.999748705179834, "No": 0.0002512240217391036}, "ground_truth": 0}, {"key": "38396247", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996148032573222, "res": {"Yes": 0.9996148032573222, "No": 0.0003851045508088164}, "ground_truth": 0}, {"key": "37507998", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 6.676637936729076e-06, "res": {"No": 0.9999932502087799, "Yes": 6.676637936729076e-06}, "ground_truth": 0}, {"key": "37507998", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.01728769676729372, "res": {"No": 0.9827120731468008, "Yes": 0.01728769676729372}, "ground_truth": 0}, {"key": "37507998", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.0152674067596314, "res": {"No": 0.9847317607585351, "Yes": 0.0152674067596314}, "ground_truth": 1}, {"key": "37507998", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.022161084029517254, "res": {"No": 0.977838071225341, "Yes": 0.022161084029517254}, "ground_truth": 0}, {"key": "37507998", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.00019713397189142205, "res": {"No": 0.9998025584941401, "Yes": 0.00019713397189142205}, "ground_truth": 0}, {"key": "32593929", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9996112285112276, "res": {"Yes": 0.9996112285112276, "No": 0.00038872826733302884}, "ground_truth": 0}, {"key": "32593929", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9961348536815995, "res": {"Yes": 0.9961348536815995, "No": 0.0038651576834002155}, "ground_truth": 0}, {"key": "32593929", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996234981246263, "res": {"Yes": 0.9996234981246263, "No": 0.0003764811418600078}, "ground_truth": 1}, {"key": "32593929", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9970594378472115, "res": {"Yes": 0.9970594378472115, "No": 0.0029405271822966037}, "ground_truth": 0}, {"key": "32593929", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9942668908545435, "res": {"Yes": 0.9942668908545435, "No": 0.005732961732673143}, "ground_truth": 0}, {"key": "36056449", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9999852636485811, "res": {"Yes": 0.9999852636485811, "No": 1.4616259273894924e-05}, "ground_truth": 0}, {"key": "36056449", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 7.181545789882487e-07}, "ground_truth": 0}, {"key": "36056449", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 2.0738144706458844e-07}, "ground_truth": 1}, {"key": "36056449", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999987335551019, "res": {"Yes": 0.9999987335551019, "No": 1.2274336888995891e-06}, "ground_truth": 0}, {"key": "36056449", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 5.25499288804234e-07}, "ground_truth": 0}, {"key": "21986185", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9540060422104276, "res": {"Yes": 0.9540060422104276, "No": 0.04599370222582754}, "ground_truth": 0}, {"key": "21986185", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.99999861435166, "res": {"Yes": 0.99999861435166, "No": 1.2882253299112974e-06}, "ground_truth": 0}, {"key": "21986185", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999985978860297, "res": {"Yes": 0.999985978860297, "No": 1.3982458195892461e-05}, "ground_truth": 1}, {"key": "21986185", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999120763555102, "res": {"Yes": 0.9999120763555102, "No": 8.781598608812522e-05}, "ground_truth": 0}, {"key": "21986185", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999931310055916, "res": {"Yes": 0.9999931310055916, "No": 6.742188381881307e-06}, "ground_truth": 0}, {"key": "40757465", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.6484105943723586, "res": {"Yes": 0.6484105943723586, "No": 0.35158924583543283}, "ground_truth": 0}, {"key": "40757465", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.993635043303101, "res": {"Yes": 0.993635043303101, "No": 0.006364875546198391}, "ground_truth": 0}, {"key": "40757465", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997121268434309, "res": {"Yes": 0.9997121268434309, "No": 0.0002877446796559324}, "ground_truth": 1}, {"key": "40757465", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9989118108077384, "res": {"Yes": 0.9989118108077384, "No": 0.0010881606338935875}, "ground_truth": 0}, {"key": "40757465", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9942390771244447, "res": {"Yes": 0.9942390771244447, "No": 0.005760864665513554}, "ground_truth": 0}, {"key": "35198313", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9999354384941838, "res": {"Yes": 0.9999354384941838, "No": 6.446526540949286e-05}, "ground_truth": 0}, {"key": "35198313", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999983759447187, "res": {"Yes": 0.9999983759447187, "No": 1.5671748784807053e-06}, "ground_truth": 0}, {"key": "35198313", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999964686909351, "res": {"Yes": 0.9999964686909351, "No": 3.4921184506032295e-06}, "ground_truth": 1}, {"key": "35198313", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999984951481323, "res": {"Yes": 0.9999984951481323, "No": 1.4292167371692978e-06}, "ground_truth": 0}, {"key": "35198313", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999800187796273, "res": {"Yes": 0.9999800187796273, "No": 1.988943244890459e-05}, "ground_truth": 0}, {"key": "30604618", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.8836146161338898, "res": {"Yes": 0.8836146161338898, "No": 0.11638530469261787}, "ground_truth": 0}, {"key": "30604618", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 1.918440566276869e-07}, "ground_truth": 0}, {"key": "30604618", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 1.5029436989563053e-07}, "ground_truth": 1}, {"key": "30604618", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 3.504898813764146e-07}, "ground_truth": 0}, {"key": "30604618", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 5.139494102362793e-07}, "ground_truth": 0}, {"key": "35779006", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9951386736820413, "res": {"Yes": 0.9951386736820413, "No": 0.004861211948686924}, "ground_truth": 0}, {"key": "35779006", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999615427524989, "res": {"Yes": 0.9999615427524989, "No": 3.8395280867431235e-05}, "ground_truth": 0}, {"key": "35779006", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999918197754583, "res": {"Yes": 0.9999918197754583, "No": 8.06326497276736e-06}, "ground_truth": 1}, {"key": "35779006", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999983759447187, "res": {"Yes": 0.9999983759447187, "No": 1.5174579870959466e-06}, "ground_truth": 0}, {"key": "35779006", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 2.9810919243154666e-07}, "ground_truth": 0}, {"key": "33858956", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9999700059218314, "res": {"Yes": 0.9999700059218314, "No": 2.9929588049298056e-05}, "ground_truth": 0}, {"key": "33858956", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9992571152755936, "res": {"Yes": 0.9992571152755936, "No": 0.000742761916495503}, "ground_truth": 0}, {"key": "33858956", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999913429644723, "res": {"Yes": 0.9999913429644723, "No": 8.564455507609768e-06}, "ground_truth": 1}, {"key": "33858956", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999980183344636, "res": {"Yes": 0.9999980183344636, "No": 1.9047572220510149e-06}, "ground_truth": 0}, {"key": "33858956", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999968263007362, "res": {"Yes": 0.9999968263007362, "No": 3.1316892578616658e-06}, "ground_truth": 0}, {"key": "38633880", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.0003019892914478607, "res": {"No": 0.9996979494664503, "Yes": 0.0003019892914478607}, "ground_truth": 0}, {"key": "38633880", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999074278310677, "res": {"Yes": 0.9999074278310677, "No": 9.249166736907989e-05}, "ground_truth": 0}, {"key": "38633880", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999839524287637, "res": {"Yes": 0.9999839524287637, "No": 1.595079319302014e-05}, "ground_truth": 1}, {"key": "38633880", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998622621767251, "res": {"Yes": 0.9998622621767251, "No": 0.00013761267801120746}, "ground_truth": 0}, {"key": "38633880", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9957815332335557, "res": {"Yes": 0.9957815332335557, "No": 0.004218266573055197}, "ground_truth": 0}, {"key": "36654905", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9950070586779641, "res": {"Yes": 0.9950070586779641, "No": 0.0049928140015505565}, "ground_truth": 0}, {"key": "36654905", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9982450850671379, "res": {"Yes": 0.9982450850671379, "No": 0.0017549217674665961}, "ground_truth": 0}, {"key": "36654905", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9717430979270094, "res": {"Yes": 0.9717430979270094, "No": 0.02825678424374709}, "ground_truth": 1}, {"key": "36654905", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9979656537929583, "res": {"Yes": 0.9979656537929583, "No": 0.0020343518745106213}, "ground_truth": 0}, {"key": "36654905", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.8839726285522946, "res": {"Yes": 0.8839726285522946, "No": 0.11602719218022378}, "ground_truth": 0}, {"key": "26547482", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.0001751226451637388, "res": {"No": 0.9998247225628852, "Yes": 0.0001751226451637388}, "ground_truth": 0}, {"key": "26547482", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9969921214582671, "res": {"Yes": 0.9969921214582671, "No": 0.003007842002616396}, "ground_truth": 0}, {"key": "26547482", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9984806983091377, "res": {"Yes": 0.9984806983091377, "No": 0.001519226431979785}, "ground_truth": 1}, {"key": "26547482", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994398564390128, "res": {"Yes": 0.9994398564390128, "No": 0.0005600633811581336}, "ground_truth": 0}, {"key": "26547482", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998495092248729, "res": {"Yes": 0.9998495092248729, "No": 0.0001503929533909403}, "ground_truth": 0}, {"key": "36439068", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9817107214827164, "res": {"Yes": 0.9817107214827164, "No": 0.01828926506000285}, "ground_truth": 0}, {"key": "36439068", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.004536656463655248, "res": {"No": 0.9954631880641245, "Yes": 0.004536656463655248}, "ground_truth": 0}, {"key": "36439068", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9977639834178614, "res": {"Yes": 0.9977639834178614, "No": 0.00223594415366666}, "ground_truth": 1}, {"key": "36439068", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999366304579342, "res": {"Yes": 0.9999366304579342, "No": 6.327931876313549e-05}, "ground_truth": 0}, {"key": "36439068", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9965060680351102, "res": {"Yes": 0.9965060680351102, "No": 0.0034938744532979635}, "ground_truth": 0}, {"key": "30501258", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.7668664597571239, "res": {"Yes": 0.7668664597571239, "No": 0.2331319212538989}, "ground_truth": 0}, {"key": "30501258", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9947435831621014, "res": {"Yes": 0.9947435831621014, "No": 0.005256420485254616}, "ground_truth": 0}, {"key": "30501258", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994953501385975, "res": {"Yes": 0.9994953501385975, "No": 0.0005044574816088621}, "ground_truth": 1}, {"key": "30501258", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9962539819970038, "res": {"Yes": 0.9962539819970038, "No": 0.0037458302162488156}, "ground_truth": 0}, {"key": "30501258", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.8510253605257324, "res": {"Yes": 0.8510253605257324, "No": 0.14897259740258054}, "ground_truth": 0}, {"key": "37560941", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.006680585136402429, "res": {"No": 0.9933191244480827, "Yes": 0.006680585136402429}, "ground_truth": 0}, {"key": "37560941", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.991255604576134, "res": {"Yes": 0.991255604576134, "No": 0.008744287928115909}, "ground_truth": 0}, {"key": "37560941", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999776347571058, "res": {"Yes": 0.9999776347571058, "No": 2.2253828427412358e-05}, "ground_truth": 1}, {"key": "37560941", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 6.636220840664107e-07}, "ground_truth": 0}, {"key": "37560941", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9991812968204792, "res": {"Yes": 0.9991812968204792, "No": 0.000818654843048483}, "ground_truth": 0}, {"key": "36801665", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.8423266875702856, "res": {"Yes": 0.8423266875702856, "No": 0.15767311207903476}, "ground_truth": 0}, {"key": "36801665", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994273544840648, "res": {"Yes": 0.9994273544840648, "No": 0.0005725159286233774}, "ground_truth": 0}, {"key": "36801665", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9377392534654643, "res": {"Yes": 0.9377392534654643, "No": 0.06226037443081562}, "ground_truth": 1}, {"key": "36801665", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6049057610159541, "res": {"Yes": 0.6049057610159541, "No": 0.3950939477326051}, "ground_truth": 0}, {"key": "36801665", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9157325755707563, "res": {"Yes": 0.9157325755707563, "No": 0.08426728226872968}, "ground_truth": 0}, {"key": "34954610", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.012717821518581756, "res": {"No": 0.9872821106461758, "Yes": 0.012717821518581756}, "ground_truth": 0}, {"key": "34954610", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999977799274644, "res": {"Yes": 0.9999977799274644, "No": 2.181957823555134e-06}, "ground_truth": 0}, {"key": "34954610", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999529604543941, "res": {"Yes": 0.9999529604543941, "No": 4.697058515239951e-05}, "ground_truth": 1}, {"key": "34954610", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998113778613928, "res": {"Yes": 0.9998113778613928, "No": 0.0001884277337339618}, "ground_truth": 0}, {"key": "34954610", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999051631772958, "res": {"Yes": 0.9999051631772958, "No": 9.472420441371261e-05}, "ground_truth": 0}, {"key": "37020510", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9997434615416192, "res": {"Yes": 0.9997434615416192, "No": 0.00025650048564561953}, "ground_truth": 0}, {"key": "37020510", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9981873580234188, "res": {"Yes": 0.9981873580234188, "No": 0.001812555773722352}, "ground_truth": 0}, {"key": "37020510", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9984082069485974, "res": {"Yes": 0.9984082069485974, "No": 0.0015917653430590649}, "ground_truth": 1}, {"key": "37020510", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998125696772581, "res": {"Yes": 0.9998125696772581, "No": 0.00018740153446745723}, "ground_truth": 0}, {"key": "37020510", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999238765575363, "res": {"Yes": 0.9999238765575363, "No": 7.608366255859732e-05}, "ground_truth": 0}, {"key": "38064637", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9993380581802147, "res": {"Yes": 0.9993380581802147, "No": 0.0006618451114645586}, "ground_truth": 0}, {"key": "38064637", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999976607241361, "res": {"Yes": 0.9999976607241361, "No": 2.3107162882555375e-06}, "ground_truth": 0}, {"key": "38064637", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 7.10557474249774e-07}, "ground_truth": 1}, {"key": "38064637", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999987335551019, "res": {"Yes": 0.9999987335551019, "No": 1.2409069926969906e-06}, "ground_truth": 0}, {"key": "38064637", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9992170036517487, "res": {"Yes": 0.9992170036517487, "No": 0.0007829270763212641}, "ground_truth": 0}, {"key": "40886108", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 6.571811154947361e-05, "res": {"No": 0.999934127339699, "Yes": 6.571811154947361e-05}, "ground_truth": 0}, {"key": "40886108", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997397708854714, "res": {"Yes": 0.9997397708854714, "No": 0.00026021795230797806}, "ground_truth": 0}, {"key": "40886108", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9993054405681485, "res": {"Yes": 0.9993054405681485, "No": 0.0006944669396490912}, "ground_truth": 1}, {"key": "40886108", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998891915297121, "res": {"Yes": 0.9998891915297121, "No": 0.00011067406804805}, "ground_truth": 0}, {"key": "40886108", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999362728606604, "res": {"Yes": 0.9999362728606604, "No": 6.365357932859579e-05}, "ground_truth": 0}, {"key": "38554603", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9414841649363278, "res": {"Yes": 0.9414841649363278, "No": 0.058515839541524256}, "ground_truth": 0}, {"key": "38554603", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999107652316588, "res": {"Yes": 0.9999107652316588, "No": 8.920825302111752e-05}, "ground_truth": 0}, {"key": "38554603", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999962734742367, "res": {"Yes": 0.999962734742367, "No": 3.7171024926891016e-05}, "ground_truth": 1}, {"key": "38554603", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999773963544663, "res": {"Yes": 0.9999773963544663, "No": 2.251200129651658e-05}, "ground_truth": 0}, {"key": "38554603", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997390558518481, "res": {"Yes": 0.9997390558518481, "No": 0.00026090348716615756}, "ground_truth": 0}, {"key": "39115586", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.11850537035291071, "res": {"No": 0.8814944087903551, "Yes": 0.11850537035291071}, "ground_truth": 0}, {"key": "39115586", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999108844260287, "res": {"Yes": 0.9999108844260287, "No": 8.904541833762549e-05}, "ground_truth": 0}, {"key": "39115586", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999978991308068, "res": {"Yes": 0.9999978991308068, "No": 1.9847253819908514e-06}, "ground_truth": 1}, {"key": "39115586", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 3.1215592948249343e-07}, "ground_truth": 0}, {"key": "39115586", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999218502438513, "res": {"Yes": 0.9999218502438513, "No": 7.804125194406067e-05}, "ground_truth": 0}, {"key": "38786314", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.7976142694584463, "res": {"Yes": 0.7976142694584463, "No": 0.20238442421288227}, "ground_truth": 0}, {"key": "38786314", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996229023192102, "res": {"Yes": 0.9996229023192102, "No": 0.00037700843377896486}, "ground_truth": 0}, {"key": "38786314", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998910985741908, "res": {"Yes": 0.9998910985741908, "No": 0.00010876162469076732}, "ground_truth": 1}, {"key": "38786314", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999130299021183, "res": {"Yes": 0.9999130299021183, "No": 8.683237690755056e-05}, "ground_truth": 0}, {"key": "38786314", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9993638939512897, "res": {"Yes": 0.9993638939512897, "No": 0.0006359754474548937}, "ground_truth": 0}, {"key": "38721078", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.4782935565273246, "res": {"No": 0.5217061085969548, "Yes": 0.4782935565273246}, "ground_truth": 0}, {"key": "38721078", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999490269261713, "res": {"Yes": 0.9999490269261713, "No": 5.0839446438944913e-05}, "ground_truth": 0}, {"key": "38721078", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999268564151225, "res": {"Yes": 0.9999268564151225, "No": 7.310556978952623e-05}, "ground_truth": 1}, {"key": "38721078", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999624963432797, "res": {"Yes": 0.9999624963432797, "No": 3.740231339408274e-05}, "ground_truth": 0}, {"key": "38721078", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.007770288385813685, "res": {"No": 0.9922296058195788, "Yes": 0.007770288385813685}, "ground_truth": 0}, {"key": "39475467", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9939299669424428, "res": {"Yes": 0.9939299669424428, "No": 0.006069973740694931}, "ground_truth": 0}, {"key": "39475467", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997697881628126, "res": {"Yes": 0.9997697881628126, "No": 0.00023009046506593234}, "ground_truth": 0}, {"key": "39475467", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996804356715607, "res": {"Yes": 0.9996804356715607, "No": 0.00031950200779117416}, "ground_truth": 1}, {"key": "39475467", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999958726752174, "res": {"Yes": 0.9999958726752174, "No": 4.0921952334653145e-06}, "ground_truth": 0}, {"key": "39475467", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999932502087799, "res": {"Yes": 0.9999932502087799, "No": 6.658349013593932e-06}, "ground_truth": 0}, {"key": "35691234", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 1.6480972900515567e-05, "res": {"No": 0.9999829988145218, "Yes": 1.6480972900515567e-05}, "ground_truth": 0}, {"key": "35691234", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999560596094073, "res": {"Yes": 0.9999560596094073, "No": 4.386894023582897e-05}, "ground_truth": 0}, {"key": "35691234", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999985978860297, "res": {"Yes": 0.999985978860297, "No": 1.3979368414064639e-05}, "ground_truth": 1}, {"key": "35691234", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999924157887603, "res": {"Yes": 0.9999924157887603, "No": 7.537560269419891e-06}, "ground_truth": 0}, {"key": "35691234", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998853774396677, "res": {"Yes": 0.9998853774396677, "No": 0.00011453798995501249}, "ground_truth": 0}, {"key": "36871390", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.060584051325006306, "res": {"No": 0.9394156105051568, "Yes": 0.060584051325006306}, "ground_truth": 0}, {"key": "36871390", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.6450253568586186, "res": {"Yes": 0.6450253568586186, "No": 0.3549742952055457}, "ground_truth": 0}, {"key": "36871390", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998162643014713, "res": {"Yes": 0.9998162643014713, "No": 0.0001836071751874209}, "ground_truth": 1}, {"key": "36871390", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998247225628852, "res": {"Yes": 0.9998247225628852, "No": 0.00017518132107753544}, "ground_truth": 0}, {"key": "36871390", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.011844845321554042, "res": {"No": 0.9881547044902784, "Yes": 0.011844845321554042}, "ground_truth": 0}, {"key": "31730844", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.6812785878967533, "res": {"Yes": 0.6812785878967533, "No": 0.31872108511146635}, "ground_truth": 0}, {"key": "31730844", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9993978203329961, "res": {"Yes": 0.9993978203329961, "No": 0.0006020121079403439}, "ground_truth": 0}, {"key": "31730844", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995816888964966, "res": {"Yes": 0.9995816888964966, "No": 0.00041822541519179077}, "ground_truth": 1}, {"key": "31730844", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9975816107266428, "res": {"Yes": 0.9975816107266428, "No": 0.0024183059611328295}, "ground_truth": 0}, {"key": "31730844", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.560616168078107, "res": {"Yes": 0.560616168078107, "No": 0.4393835443526389}, "ground_truth": 0}, {"key": "30810940", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.6140974561861584, "res": {"Yes": 0.6140974561861584, "No": 0.38590243520592543}, "ground_truth": 0}, {"key": "30810940", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.49826351266804053, "res": {"No": 0.5017348731206587, "Yes": 0.49826351266804053}, "ground_truth": 0}, {"key": "30810940", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9978213038114019, "res": {"Yes": 0.9978213038114019, "No": 0.0021787157092069327}, "ground_truth": 1}, {"key": "30810940", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9946292940883037, "res": {"Yes": 0.9946292940883037, "No": 0.005370715172996427}, "ground_truth": 0}, {"key": "30810940", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999740587314805, "res": {"Yes": 0.9999740587314805, "No": 2.589843540906199e-05}, "ground_truth": 0}, {"key": "39352003", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.5244545546448868, "res": {"Yes": 0.5244545546448868, "No": 0.47554533965839285}, "ground_truth": 0}, {"key": "39352003", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.7801282419939878, "res": {"Yes": 0.7801282419939878, "No": 0.21987158667037515}, "ground_truth": 0}, {"key": "39352003", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9933808782362662, "res": {"Yes": 0.9933808782362662, "No": 0.006619054791285158}, "ground_truth": 1}, {"key": "39352003", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9898857168917513, "res": {"Yes": 0.9898857168917513, "No": 0.010114128209157039}, "ground_truth": 0}, {"key": "39352003", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9971371819015225, "res": {"Yes": 0.9971371819015225, "No": 0.0028627459282300513}, "ground_truth": 0}, {"key": "40118123", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 1.6604531681336404e-06, "res": {"No": 0.9999982567412194, "Yes": 1.6604531681336404e-06}, "ground_truth": 0}, {"key": "40118123", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999815683978641, "res": {"Yes": 0.9999815683978641, "No": 1.83520733636845e-05}, "ground_truth": 0}, {"key": "40118123", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999770387506135, "res": {"Yes": 0.9999770387506135, "No": 2.289671746083932e-05}, "ground_truth": 1}, {"key": "40118123", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999982567412194, "res": {"Yes": 0.9999982567412194, "No": 1.6871024957870503e-06}, "ground_truth": 0}, {"key": "40118123", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9948345397102923, "res": {"Yes": 0.9948345397102923, "No": 0.005165402247322999}, "ground_truth": 0}, {"key": "37114191", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9991246515411832, "res": {"Yes": 0.9991246515411832, "No": 0.0008753186206526743}, "ground_truth": 0}, {"key": "37114191", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999939654258081, "res": {"Yes": 0.9999939654258081, "No": 5.972296686101308e-06}, "ground_truth": 0}, {"key": "37114191", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 1.0746893584736069e-07}, "ground_truth": 1}, {"key": "37114191", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "No": 4.427060164234675e-08}, "ground_truth": 0}, {"key": "37114191", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 6.246439684515539e-07}, "ground_truth": 0}, {"key": "39268203", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.16761677608011702, "res": {"No": 0.8323823320502228, "Yes": 0.16761677608011702}, "ground_truth": 0}, {"key": "39268203", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9399129447264324, "res": {"Yes": 0.9399129447264324, "No": 0.060086625478591685}, "ground_truth": 1}, {"key": "39268203", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.997153180697098, "res": {"Yes": 0.997153180697098, "No": 0.0028467493915743207}, "ground_truth": 0}, {"key": "39268203", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.47232288447181064, "res": {"No": 0.5276765909928931, "Yes": 0.47232288447181064}, "ground_truth": 0}, {"key": "37950968", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.13205124658944342, "res": {"No": 0.8679483072106259, "Yes": 0.13205124658944342}, "ground_truth": 0}, {"key": "37950968", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9861857575919035, "res": {"Yes": 0.9861857575919035, "No": 0.013814106483078726}, "ground_truth": 0}, {"key": "37950968", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9984066673044887, "res": {"Yes": 0.9984066673044887, "No": 0.001593068895640172}, "ground_truth": 1}, {"key": "37950968", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9552455367529876, "res": {"Yes": 0.9552455367529876, "No": 0.04475367565913866}, "ground_truth": 0}, {"key": "37950968", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.3333664978724238, "res": {"No": 0.6666328595959937, "Yes": 0.3333664978724238}, "ground_truth": 0}, {"key": "34959807", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9999801379802525, "res": {"Yes": 0.9999801379802525, "No": 1.9823310792508994e-05}, "ground_truth": 0}, {"key": "34959807", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 2.7904717036697163e-07}, "ground_truth": 0}, {"key": "34959807", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999983759447187, "res": {"Yes": 0.9999983759447187, "No": 1.5218178252582258e-06}, "ground_truth": 1}, {"key": "34959807", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 1.243737116642122e-07}, "ground_truth": 0}, {"key": "34959807", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 5.088945565047549e-07}, "ground_truth": 0}, {"key": "35631314", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 5.691527417018599e-07, "res": {"No": 0.9999993295729247, "Yes": 5.691527417018599e-07}, "ground_truth": 0}, {"key": "35631314", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999845484373766, "res": {"Yes": 0.9999845484373766, "No": 1.5317279312358862e-05}, "ground_truth": 0}, {"key": "35631314", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 5.396343804210944e-07}, "ground_truth": 1}, {"key": "35631314", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999940846288958, "res": {"Yes": 0.9999940846288958, "No": 5.889714697081634e-06}, "ground_truth": 0}, {"key": "35631314", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999981375378344, "res": {"Yes": 0.9999981375378344, "No": 1.8046137102948772e-06}, "ground_truth": 0}, {"key": "38082365", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9941171377995117, "res": {"Yes": 0.9941171377995117, "No": 0.0058828467092533815}, "ground_truth": 0}, {"key": "38082365", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9977623219426596, "res": {"Yes": 0.9977623219426596, "No": 0.0022376725353884814}, "ground_truth": 0}, {"key": "38082365", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996070579919407, "res": {"Yes": 0.9996070579919407, "No": 0.0003928333821453127}, "ground_truth": 1}, {"key": "38082365", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9979543743498993, "res": {"Yes": 0.9979543743498993, "No": 0.002045590046108633}, "ground_truth": 0}, {"key": "38082365", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9992978241809041, "res": {"Yes": 0.9992978241809041, "No": 0.0007021440328387481}, "ground_truth": 0}, {"key": "37242829", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9991870102553875, "res": {"Yes": 0.9991870102553875, "No": 0.0008128776107757616}, "ground_truth": 0}, {"key": "37242829", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999751315392253, "res": {"Yes": 0.9999751315392253, "No": 2.4714189638225717e-05}, "ground_truth": 0}, {"key": "37242829", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9992089078085545, "res": {"Yes": 0.9992089078085545, "No": 0.00079103843577932}, "ground_truth": 1}, {"key": "37242829", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996731702604792, "res": {"Yes": 0.9996731702604792, "No": 0.0003267307894487794}, "ground_truth": 0}, {"key": "37242829", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9977445255137177, "res": {"Yes": 0.9977445255137177, "No": 0.0022554246700747147}, "ground_truth": 0}, {"key": "38556068", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 4.10784909908045e-06, "res": {"No": 0.9999958726752174, "Yes": 4.10784909908045e-06}, "ground_truth": 0}, {"key": "38556068", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 6.593225292112631e-07}, "ground_truth": 0}, {"key": "38556068", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997971954076322, "res": {"Yes": 0.9997971954076322, "No": 0.00020269118365516818}, "ground_truth": 1}, {"key": "38556068", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 4.1152996521622434e-07}, "ground_truth": 0}, {"key": "38556068", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999716747231683, "res": {"Yes": 0.9999716747231683, "No": 2.8268003820015282e-05}, "ground_truth": 0}, {"key": "32969336", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 3.4716485165611307e-06, "res": {"No": 0.9999958726752174, "Yes": 3.4716485165611307e-06}, "ground_truth": 0}, {"key": "32969336", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 1.866857714628547e-07}, "ground_truth": 0}, {"key": "32969336", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9969044506236078, "res": {"Yes": 0.9969044506236078, "No": 0.0030954685344178634}, "ground_truth": 1}, {"key": "32969336", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999947998470209, "res": {"Yes": 0.9999947998470209, "No": 5.067368037026405e-06}, "ground_truth": 0}, {"key": "32969336", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999937270200753, "res": {"Yes": 0.9999937270200753, "No": 6.224857546324045e-06}, "ground_truth": 0}, {"key": "36825153", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9982745486058038, "res": {"Yes": 0.9982745486058038, "No": 0.0017253807986854692}, "ground_truth": 0}, {"key": "36825153", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999822836059372, "res": {"Yes": 0.9999822836059372, "No": 1.760502826233721e-05}, "ground_truth": 0}, {"key": "36825153", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999975489140396, "res": {"Yes": 0.999975489140396, "No": 2.4374501318935096e-05}, "ground_truth": 1}, {"key": "36825153", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999974223173222, "res": {"Yes": 0.9999974223173222, "No": 2.5521127817054105e-06}, "ground_truth": 0}, {"key": "36825153", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.999516774191217, "res": {"Yes": 0.999516774191217, "No": 0.00048315502165478016}, "ground_truth": 0}, {"key": "41050146", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9073879477684536, "res": {"Yes": 0.9073879477684536, "No": 0.09261175514282576}, "ground_truth": 0}, {"key": "41050146", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9943351298322867, "res": {"Yes": 0.9943351298322867, "No": 0.005664853190607108}, "ground_truth": 0}, {"key": "41050146", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.995246706127753, "res": {"Yes": 0.995246706127753, "No": 0.004753268938766195}, "ground_truth": 1}, {"key": "41050146", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9926010887128606, "res": {"Yes": 0.9926010887128606, "No": 0.007398840353785861}, "ground_truth": 0}, {"key": "41050146", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.7410566691681856, "res": {"Yes": 0.7410566691681856, "No": 0.25894331558003947}, "ground_truth": 0}, {"key": "27865037", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.8426201968898555, "res": {"Yes": 0.8426201968898555, "No": 0.15737948314530337}, "ground_truth": 0}, {"key": "27865037", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 1.0173341738587737e-07}, "ground_truth": 0}, {"key": "27865037", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999978991308068, "res": {"Yes": 0.9999978991308068, "No": 1.992538707242934e-06}, "ground_truth": 1}, {"key": "27865037", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999925349918634, "res": {"Yes": 0.9999925349918634, "No": 7.434866008025185e-06}, "ground_truth": 0}, {"key": "27865037", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999622579442493, "res": {"Yes": 0.9999622579442493, "No": 3.767864762336e-05}, "ground_truth": 0}, {"key": "39868565", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9640188332461802, "res": {"Yes": 0.9640188332461802, "No": 0.03598098408299931}, "ground_truth": 0}, {"key": "39868565", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.982710000609224, "res": {"Yes": 0.982710000609224, "No": 0.01729002903489554}, "ground_truth": 0}, {"key": "39868565", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9902058937422171, "res": {"Yes": 0.9902058937422171, "No": 0.009794006128127416}, "ground_truth": 1}, {"key": "39868565", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9719293192370557, "res": {"Yes": 0.9719293192370557, "No": 0.02807040950522332}, "ground_truth": 0}, {"key": "39868565", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.6171177503068527, "res": {"Yes": 0.6171177503068527, "No": 0.38288197368336263}, "ground_truth": 0}, {"key": "37761968", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.5226053754366992, "res": {"Yes": 0.5226053754366992, "No": 0.47739393948242326}, "ground_truth": 0}, {"key": "37761968", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.05132681262966649, "res": {"No": 0.9486730673426853, "Yes": 0.05132681262966649}, "ground_truth": 0}, {"key": "37761968", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999442590335854, "res": {"Yes": 0.9999442590335854, "No": 5.5715955379217914e-05}, "ground_truth": 1}, {"key": "37761968", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.99999861435166, "res": {"Yes": 0.99999861435166, "No": 1.331065245290881e-06}, "ground_truth": 0}, {"key": "37761968", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9979894023647996, "res": {"Yes": 0.9979894023647996, "No": 0.0020106223057059646}, "ground_truth": 0}, {"key": "16326139", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9876743122996617, "res": {"Yes": 0.9876743122996617, "No": 0.012325555365344971}, "ground_truth": 0}, {"key": "16326139", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9742162932510134, "res": {"Yes": 0.9742162932510134, "No": 0.025783544515971906}, "ground_truth": 0}, {"key": "16326139", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8832798523608693, "res": {"Yes": 0.8832798523608693, "No": 0.11671995036688562}, "ground_truth": 1}, {"key": "16326139", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.2684391554064356, "res": {"No": 0.7315606427823761, "Yes": 0.2684391554064356}, "ground_truth": 0}, {"key": "16326139", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.7992335204678633, "res": {"Yes": 0.7992335204678633, "No": 0.20076628673630087}, "ground_truth": 0}, {"key": "36568381", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9998504627018165, "res": {"Yes": 0.9998504627018165, "No": 0.00014943231651334113}, "ground_truth": 0}, {"key": "36568381", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9983044780242133, "res": {"Yes": 0.9983044780242133, "No": 0.0016954259021629773}, "ground_truth": 0}, {"key": "36568381", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997983871965933, "res": {"Yes": 0.9997983871965933, "No": 0.00020153973499765013}, "ground_truth": 1}, {"key": "36568381", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9749089928572803, "res": {"Yes": 0.9749089928572803, "No": 0.02509059510682702}, "ground_truth": 0}, {"key": "36568381", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998620238095902, "res": {"Yes": 0.9998620238095902, "No": 0.00013786053496770112}, "ground_truth": 0}, {"key": "39855613", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.45307742137030965, "res": {"No": 0.5469223733870535, "Yes": 0.45307742137030965}, "ground_truth": 0}, {"key": "39855613", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.999992773397112, "res": {"Yes": 0.999992773397112, "No": 7.10485290255279e-06}, "ground_truth": 0}, {"key": "39855613", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999639931190339, "res": {"Yes": 0.999639931190339, "No": 0.0003600356513102659}, "ground_truth": 1}, {"key": "39855613", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999803763825457, "res": {"Yes": 0.9999803763825457, "No": 1.9541151161679824e-05}, "ground_truth": 0}, {"key": "39855613", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9965991141573681, "res": {"Yes": 0.9965991141573681, "No": 0.0034008587694594875}, "ground_truth": 0}, {"key": "29856302", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9957941797393329, "res": {"Yes": 0.9957941797393329, "No": 0.004205754383928901}, "ground_truth": 0}, {"key": "29856302", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9902613032534402, "res": {"Yes": 0.9902613032534402, "No": 0.009738593268927365}, "ground_truth": 0}, {"key": "29856302", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9963168213038925, "res": {"Yes": 0.9963168213038925, "No": 0.003683069762275947}, "ground_truth": 1}, {"key": "29856302", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9950409282001711, "res": {"Yes": 0.9950409282001711, "No": 0.004959005809348625}, "ground_truth": 0}, {"key": "29856302", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9993506818583229, "res": {"Yes": 0.9993506818583229, "No": 0.0006491835899400408}, "ground_truth": 0}, {"key": "35641106", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.25290043633031883, "res": {"No": 0.7470993305989849, "Yes": 0.25290043633031883}, "ground_truth": 0}, {"key": "35641106", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9993628218842474, "res": {"Yes": 0.9993628218842474, "No": 0.0006370696294290369}, "ground_truth": 0}, {"key": "35641106", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9991144237543004, "res": {"Yes": 0.9991144237543004, "No": 0.0008855465791815358}, "ground_truth": 1}, {"key": "35641106", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9992603314397577, "res": {"Yes": 0.9992603314397577, "No": 0.0007395808138794447}, "ground_truth": 0}, {"key": "35641106", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999352000996042, "res": {"Yes": 0.9999352000996042, "No": 6.472429012123674e-05}, "ground_truth": 0}, {"key": "39474558", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9997763428849825, "res": {"Yes": 0.9997763428849825, "No": 0.00022361315179716143}, "ground_truth": 0}, {"key": "39474558", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9946560658906767, "res": {"Yes": 0.9946560658906767, "No": 0.005343884264897461}, "ground_truth": 0}, {"key": "39474558", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997216604436437, "res": {"Yes": 0.9997216604436437, "No": 0.0002782398629274074}, "ground_truth": 1}, {"key": "39474558", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9937657103314426, "res": {"Yes": 0.9937657103314426, "No": 0.006234283638721406}, "ground_truth": 0}, {"key": "39474558", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.02769621272128217, "res": {"No": 0.9723036628229121, "Yes": 0.02769621272128217}, "ground_truth": 0}, {"key": "34338135", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9994964224377642, "res": {"Yes": 0.9994964224377642, "No": 0.000503506467904691}, "ground_truth": 0}, {"key": "34338135", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998625005539155, "res": {"Yes": 0.9998625005539155, "No": 0.00013738298793464012}, "ground_truth": 0}, {"key": "34338135", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999971839107652, "res": {"Yes": 0.9999971839107652, "No": 2.7313324251861967e-06}, "ground_truth": 1}, {"key": "34338135", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998655994225669, "res": {"Yes": 0.9998655994225669, "No": 0.0001343125515171199}, "ground_truth": 0}, {"key": "34338135", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9983656704734027, "res": {"Yes": 0.9983656704734027, "No": 0.0016342308719861352}, "ground_truth": 0}, {"key": "35870330", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9999685755197594, "res": {"Yes": 0.9999685755197594, "No": 3.132999852749546e-05}, "ground_truth": 0}, {"key": "35870330", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.99969806864045, "res": {"Yes": 0.99969806864045, "No": 0.00030184648428956145}, "ground_truth": 0}, {"key": "35870330", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998870461047716, "res": {"Yes": 0.9998870461047716, "No": 0.00011291999487105908}, "ground_truth": 1}, {"key": "35870330", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995921708849111, "res": {"Yes": 0.9995921708849111, "No": 0.0004078206759123225}, "ground_truth": 0}, {"key": "35870330", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996707869425151, "res": {"Yes": 0.9996707869425151, "No": 0.0003291871908011295}, "ground_truth": 0}, {"key": "24478245", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9999953958625991, "res": {"Yes": 0.9999953958625991, "No": 4.5497648848312774e-06}, "ground_truth": 0}, {"key": "24478245", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999965878943212, "res": {"Yes": 0.9999965878943212, "No": 3.284208101765774e-06}, "ground_truth": 0}, {"key": "24478245", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999655955278475, "res": {"Yes": 0.9999655955278475, "No": 3.426706051667791e-05}, "ground_truth": 1}, {"key": "24478245", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999939654258081, "res": {"Yes": 0.9999939654258081, "No": 5.9396223740669265e-06}, "ground_truth": 0}, {"key": "24478245", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999560596094073, "res": {"Yes": 0.9999560596094073, "No": 4.3912304167036585e-05}, "ground_truth": 0}, {"key": "38485946", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 5.383819453935294e-06, "res": {"No": 0.9999938462231346, "Yes": 5.383819453935294e-06}, "ground_truth": 0}, {"key": "38485946", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9939895623844107, "res": {"Yes": 0.9939895623844107, "No": 0.006009442480775281}, "ground_truth": 0}, {"key": "38485946", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.03547228470325194, "res": {"No": 0.9645240613308861, "Yes": 0.03547228470325194}, "ground_truth": 1}, {"key": "38485946", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9985716157133125, "res": {"Yes": 0.9985716157133125, "No": 0.0014283352124506222}, "ground_truth": 0}, {"key": "38485946", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.23850242523624793, "res": {"No": 0.7614686547517722, "Yes": 0.23850242523624793}, "ground_truth": 0}, {"key": "32509613", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.009232217593569984, "res": {"No": 0.9907677364776684, "Yes": 0.009232217593569984}, "ground_truth": 0}, {"key": "32509613", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9972534636513453, "res": {"Yes": 0.9972534636513453, "No": 0.0027465244581901635}, "ground_truth": 0}, {"key": "32509613", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998502243374946, "res": {"Yes": 0.9998502243374946, "No": 0.0001497443576713646}, "ground_truth": 1}, {"key": "32509613", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.996431850628466, "res": {"Yes": 0.996431850628466, "No": 0.003568150088451944}, "ground_truth": 0}, {"key": "32509613", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.028446987828228312, "res": {"No": 0.9715528881165029, "Yes": 0.028446987828228312}, "ground_truth": 0}, {"key": "34078819", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.004737631271186055, "res": {"No": 0.9952623970124365, "Yes": 0.004737631271186055}, "ground_truth": 0}, {"key": "34078819", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 1.0, "res": {"Yes": 1.0, "No": 3.8819388947168557e-08}, "ground_truth": 0}, {"key": "34078819", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999980257181892, "res": {"Yes": 0.999980257181892, "No": 1.9606488511627517e-05}, "ground_truth": 1}, {"key": "34078819", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999534372470786, "res": {"Yes": 0.9999534372470786, "No": 4.6433471047682476e-05}, "ground_truth": 0}, {"key": "34078819", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "No": 1.7979493113239957e-08}, "ground_truth": 0}, {"key": "39523865", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9823833873529518, "res": {"Yes": 0.9823833873529518, "No": 0.017616382253763184}, "ground_truth": 0}, {"key": "39523865", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999742971333243, "res": {"Yes": 0.9999742971333243, "No": 2.5632768640316236e-05}, "ground_truth": 0}, {"key": "39523865", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999717939237989, "res": {"Yes": 0.9999717939237989, "No": 2.8151159899002607e-05}, "ground_truth": 1}, {"key": "39523865", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999974223173222, "res": {"Yes": 0.9999974223173222, "No": 2.475849471555279e-06}, "ground_truth": 0}, {"key": "39523865", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999959918780326, "res": {"Yes": 0.9999959918780326, "No": 3.878950230834777e-06}, "ground_truth": 0}, {"key": "33146158", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.011796371438639259, "res": {"No": 0.988203359461266, "Yes": 0.011796371438639259}, "ground_truth": 0}, {"key": "33146158", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.999774078524101, "res": {"Yes": 0.999774078524101, "No": 0.00022587947441391043}, "ground_truth": 0}, {"key": "33146158", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997852811953829, "res": {"Yes": 0.9997852811953829, "No": 0.00021465939399654832}, "ground_truth": 1}, {"key": "33146158", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999239957484695, "res": {"Yes": 0.9999239957484695, "No": 7.590773259727512e-05}, "ground_truth": 0}, {"key": "33146158", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.999926022056503, "res": {"Yes": 0.999926022056503, "No": 7.395002727331705e-05}, "ground_truth": 0}, {"key": "32083974", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.6701694688490656, "res": {"Yes": 0.6701694688490656, "No": 0.32982980499562076}, "ground_truth": 0}, {"key": "32083974", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9983756526824624, "res": {"Yes": 0.9983756526824624, "No": 0.0016243091371012839}, "ground_truth": 0}, {"key": "32083974", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999735819309633, "res": {"Yes": 0.9999735819309633, "No": 2.6357570993898128e-05}, "ground_truth": 1}, {"key": "32083974", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999984951481323, "res": {"Yes": 0.9999984951481323, "No": 1.4163166001616014e-06}, "ground_truth": 0}, {"key": "32083974", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.998332872203969, "res": {"Yes": 0.998332872203969, "No": 0.0016670441347651072}, "ground_truth": 0}, {"key": "34378482", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9913995810301494, "res": {"Yes": 0.9913995810301494, "No": 0.008600189853310574}, "ground_truth": 0}, {"key": "34378482", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.5480914644668318, "res": {"Yes": 0.5480914644668318, "No": 0.4519084065772714}, "ground_truth": 0}, {"key": "34378482", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9992771049146434, "res": {"Yes": 0.9992771049146434, "No": 0.0007228575366250592}, "ground_truth": 1}, {"key": "34378482", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9987347377201411, "res": {"Yes": 0.9987347377201411, "No": 0.0012652701782035261}, "ground_truth": 0}, {"key": "34378482", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.0010327418740321316, "res": {"No": 0.9989666539700978, "Yes": 0.0010327418740321316}, "ground_truth": 0}, {"key": "38080102", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.00010585566951841224, "res": {"No": 0.9998940783500941, "Yes": 0.00010585566951841224}, "ground_truth": 0}, {"key": "38080102", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9991552312581929, "res": {"Yes": 0.9991552312581929, "No": 0.0008447616849752667}, "ground_truth": 0}, {"key": "38080102", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9984616805282405, "res": {"Yes": 0.9984616805282405, "No": 0.0015382550336908933}, "ground_truth": 1}, {"key": "38080102", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9987874062866239, "res": {"Yes": 0.9987874062866239, "No": 0.0012124788240574888}, "ground_truth": 0}, {"key": "38080102", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.15267803824333115, "res": {"No": 0.8473216197942828, "Yes": 0.15267803824333115}, "ground_truth": 0}, {"key": "40244537", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 2.1227161361940955e-05, "res": {"No": 0.999978707566687, "Yes": 2.1227161361940955e-05}, "ground_truth": 0}, {"key": "40244537", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.1541092211045772, "res": {"No": 0.845890449565601, "Yes": 0.1541092211045772}, "ground_truth": 0}, {"key": "40244537", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999689611440499, "res": {"Yes": 0.999689611440499, "No": 0.00031035213059253}, "ground_truth": 1}, {"key": "40244537", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9938608408214039, "res": {"Yes": 0.9938608408214039, "No": 0.006139178749021542}, "ground_truth": 0}, {"key": "40244537", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9618826396157514, "res": {"Yes": 0.9618826396157514, "No": 0.038117091562434716}, "ground_truth": 0}, {"key": "33497789", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.4894917847961025, "res": {"No": 0.51050766336569, "Yes": 0.4894917847961025}, "ground_truth": 0}, {"key": "33497789", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9630430212371437, "res": {"Yes": 0.9630430212371437, "No": 0.03695662689636204}, "ground_truth": 0}, {"key": "33497789", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.08676282781679896, "res": {"No": 0.9132368705792641, "Yes": 0.08676282781679896}, "ground_truth": 1}, {"key": "33497789", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8624035607081043, "res": {"Yes": 0.8624035607081043, "No": 0.13759625093032096}, "ground_truth": 0}, {"key": "33497789", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9761785184956537, "res": {"Yes": 0.9761785184956537, "No": 0.02382146295153286}, "ground_truth": 0}, {"key": "28816889", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9999697675220106, "res": {"Yes": 0.9999697675220106, "No": 3.0176140364389576e-05}, "ground_truth": 0}, {"key": "28816889", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998939591587275, "res": {"Yes": 0.9998939591587275, "No": 0.00010598902573646025}, "ground_truth": 0}, {"key": "28816889", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 1.7973055935342526e-07}, "ground_truth": 1}, {"key": "28816889", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 3.6119115627204144e-07}, "ground_truth": 0}, {"key": "28816889", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 3.762477503640767e-07}, "ground_truth": 0}, {"key": "38157127", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.0009179450752647561, "res": {"No": 0.9990818169882272, "Yes": 0.0009179450752647561}, "ground_truth": 0}, {"key": "38157127", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9760217851185147, "res": {"Yes": 0.9760217851185147, "No": 0.02397808477908476}, "ground_truth": 1}, {"key": "38157127", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9940459841254287, "res": {"Yes": 0.9940459841254287, "No": 0.005953878041826518}, "ground_truth": 0}, {"key": "38157127", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.0607216179602859, "res": {"No": 0.9392778158573539, "Yes": 0.0607216179602859}, "ground_truth": 0}, {"key": "36183569", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9981039786237659, "res": {"Yes": 0.9981039786237659, "No": 0.001895974167459069}, "ground_truth": 0}, {"key": "36183569", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998845431116596, "res": {"Yes": 0.9998845431116596, "No": 0.00011524526056653893}, "ground_truth": 0}, {"key": "36183569", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994098490476159, "res": {"Yes": 0.9994098490476159, "No": 0.0005899675633197354}, "ground_truth": 1}, {"key": "36183569", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9991084688502581, "res": {"Yes": 0.9991084688502581, "No": 0.0008914145276669331}, "ground_truth": 0}, {"key": "36183569", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9978125202286888, "res": {"Yes": 0.9978125202286888, "No": 0.002187464868446966}, "ground_truth": 0}, {"key": "36012016", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.6367210699990173, "res": {"Yes": 0.6367210699990173, "No": 0.36327879834165755}, "ground_truth": 0}, {"key": "36012016", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999286443459358, "res": {"Yes": 0.9999286443459358, "No": 7.131064093174516e-05}, "ground_truth": 0}, {"key": "36012016", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999368688428554, "res": {"Yes": 0.9999368688428554, "No": 6.306727778654867e-05}, "ground_truth": 1}, {"key": "36012016", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998327078448934, "res": {"Yes": 0.9998327078448934, "No": 0.00016724161412062048}, "ground_truth": 0}, {"key": "36012016", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999133874850805, "res": {"Yes": 0.9999133874850805, "No": 8.646809984193077e-05}, "ground_truth": 0}, {"key": "34571973", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.29762686735722527, "res": {"No": 0.7023726467314654, "Yes": 0.29762686735722527}, "ground_truth": 0}, {"key": "34571973", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9363110681026361, "res": {"Yes": 0.9363110681026361, "No": 0.0636885806586169}, "ground_truth": 0}, {"key": "34571973", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9813944191143326, "res": {"Yes": 0.9813944191143326, "No": 0.018605463589975623}, "ground_truth": 1}, {"key": "34571973", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.99980184341561, "res": {"Yes": 0.99980184341561, "No": 0.00019804584770670083}, "ground_truth": 0}, {"key": "34571973", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9766191487302074, "res": {"Yes": 0.9766191487302074, "No": 0.023380264731794224}, "ground_truth": 0}, {"key": "38707498", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9834451233650391, "res": {"Yes": 0.9834451233650391, "No": 0.0165548114062316}, "ground_truth": 0}, {"key": "38707498", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.0005855464758639517, "res": {"No": 0.9994143724467939, "Yes": 0.0005855464758639517}, "ground_truth": 0}, {"key": "38707498", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9837779745591969, "res": {"Yes": 0.9837779745591969, "No": 0.016222031362574205}, "ground_truth": 1}, {"key": "38707498", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.0037243832921924815, "res": {"No": 0.9962755268200759, "Yes": 0.0037243832921924815}, "ground_truth": 0}, {"key": "38707498", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.6050398471877063, "res": {"Yes": 0.6050398471877063, "No": 0.394959994088053}, "ground_truth": 0}, {"key": "35459082", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9999946806438478, "res": {"Yes": 0.9999946806438478, "No": 5.254771694313379e-06}, "ground_truth": 0}, {"key": "35459082", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9945812951330189, "res": {"Yes": 0.9945812951330189, "No": 0.005418631425150559}, "ground_truth": 0}, {"key": "35459082", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995940773389053, "res": {"Yes": 0.9995940773389053, "No": 0.00040576840181662704}, "ground_truth": 1}, {"key": "35459082", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998748961061449, "res": {"Yes": 0.9998748961061449, "No": 0.00012501710375279338}, "ground_truth": 0}, {"key": "35459082", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998616662389972, "res": {"Yes": 0.9998616662389972, "No": 0.00013828662970463481}, "ground_truth": 0}, {"key": "39464041", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.5202932937330826, "res": {"Yes": 0.5202932937330826, "No": 0.47970444706704796}, "ground_truth": 0}, {"key": "39464041", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9986939474299958, "res": {"Yes": 0.9986939474299958, "No": 0.0013058984600025484}, "ground_truth": 0}, {"key": "39464041", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999671451237333, "res": {"Yes": 0.9999671451237333, "No": 3.277832775943615e-05}, "ground_truth": 1}, {"key": "39464041", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999183936139823, "res": {"Yes": 0.9999183936139823, "No": 8.152237789562036e-05}, "ground_truth": 0}, {"key": "39464041", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9987115482676127, "res": {"Yes": 0.9987115482676127, "No": 0.001288316437739448}, "ground_truth": 0}, {"key": "23782052", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9311187768335406, "res": {"Yes": 0.9311187768335406, "No": 0.0688798635393953}, "ground_truth": 0}, {"key": "23782052", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.999907070258236, "res": {"Yes": 0.999907070258236, "No": 9.282041065062692e-05}, "ground_truth": 0}, {"key": "23782052", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994523586103162, "res": {"Yes": 0.9994523586103162, "No": 0.0005475890834563829}, "ground_truth": 1}, {"key": "23782052", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999546292272846, "res": {"Yes": 0.9999546292272846, "No": 4.527298776088753e-05}, "ground_truth": 0}, {"key": "23782052", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9987749271152148, "res": {"Yes": 0.9987749271152148, "No": 0.0012249699082267766}, "ground_truth": 0}, {"key": "36568455", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9998445071102727, "res": {"Yes": 0.9998445071102727, "No": 0.00015540607937168668}, "ground_truth": 0}, {"key": "36568455", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999534372470786, "res": {"Yes": 0.9999534372470786, "No": 4.6466404052025696e-05}, "ground_truth": 0}, {"key": "36568455", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 3.301215225902141e-07}, "ground_truth": 1}, {"key": "36568455", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999981375378344, "res": {"Yes": 0.9999981375378344, "No": 1.8088523039856258e-06}, "ground_truth": 0}, {"key": "36568455", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999901509395023, "res": {"Yes": 0.9999901509395023, "No": 9.738611567587342e-06}, "ground_truth": 0}, {"key": "38469552", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 7.797777396727815e-08, "res": {"No": 0.9999998063873687, "Yes": 7.797777396727815e-08}, "ground_truth": 0}, {"key": "38469552", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998945551097033, "res": {"Yes": 0.9998945551097033, "No": 0.00010539314098691733}, "ground_truth": 0}, {"key": "38469552", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999943230348141, "res": {"Yes": 0.9999943230348141, "No": 5.637634473655059e-06}, "ground_truth": 1}, {"key": "38469552", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995586998615626, "res": {"Yes": 0.9995586998615626, "No": 0.0004412910159687053}, "ground_truth": 0}, {"key": "38469552", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.7053800100797495, "res": {"Yes": 0.7053800100797495, "No": 0.2946198787919854}, "ground_truth": 0}, {"key": "35922277", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.36701766447551737, "res": {"No": 0.6329822805764418, "Yes": 0.36701766447551737}, "ground_truth": 0}, {"key": "35922277", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9961941227789877, "res": {"Yes": 0.9961941227789877, "No": 0.0038057366723816387}, "ground_truth": 0}, {"key": "35922277", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9638850222595441, "res": {"Yes": 0.9638850222595441, "No": 0.03611486953405155}, "ground_truth": 1}, {"key": "35922277", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995375021551165, "res": {"Yes": 0.9995375021551165, "No": 0.00046239747558315453}, "ground_truth": 0}, {"key": "35922277", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998810866107576, "res": {"Yes": 0.9998810866107576, "No": 0.00011884410092659063}, "ground_truth": 0}, {"key": "32744293", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.7338049510100314, "res": {"Yes": 0.7338049510100314, "No": 0.2661947221105462}, "ground_truth": 0}, {"key": "32744293", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9792645188952768, "res": {"Yes": 0.9792645188952768, "No": 0.020735439281512575}, "ground_truth": 0}, {"key": "32744293", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9458393071036715, "res": {"Yes": 0.9458393071036715, "No": 0.05416039809794192}, "ground_truth": 1}, {"key": "32744293", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8745983732484137, "res": {"Yes": 0.8745983732484137, "No": 0.12540133561534056}, "ground_truth": 0}, {"key": "32744293", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.01901229507290967, "res": {"No": 0.9809874554610626, "Yes": 0.01901229507290967}, "ground_truth": 0}, {"key": "30972362", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9610446779399135, "res": {"Yes": 0.9610446779399135, "No": 0.038955212118316845}, "ground_truth": 0}, {"key": "30972362", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9890586883677192, "res": {"Yes": 0.9890586883677192, "No": 0.010941274387172156}, "ground_truth": 0}, {"key": "30972362", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9985018553408015, "res": {"Yes": 0.9985018553408015, "No": 0.0014981035062186716}, "ground_truth": 1}, {"key": "30972362", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9983426195743847, "res": {"Yes": 0.9983426195743847, "No": 0.001657377574782775}, "ground_truth": 0}, {"key": "30972362", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9978807757333548, "res": {"Yes": 0.9978807757333548, "No": 0.0021192339498695314}, "ground_truth": 0}, {"key": "36380943", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.09041367960210918, "res": {"No": 0.9095857744637096, "Yes": 0.09041367960210918}, "ground_truth": 0}, {"key": "36380943", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999893165220688, "res": {"Yes": 0.9999893165220688, "No": 1.0613911623518596e-05}, "ground_truth": 0}, {"key": "36380943", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999690523188893, "res": {"Yes": 0.9999690523188893, "No": 3.0890094246577337e-05}, "ground_truth": 1}, {"key": "36380943", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995950305922557, "res": {"Yes": 0.9995950305922557, "No": 0.0004049306257397747}, "ground_truth": 0}, {"key": "36380943", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9973454708101187, "res": {"Yes": 0.9973454708101187, "No": 0.0026542177512457778}, "ground_truth": 0}, {"key": "36929355", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.991699392871586, "res": {"Yes": 0.991699392871586, "No": 0.008300504358013853}, "ground_truth": 0}, {"key": "36929355", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999853828508316, "res": {"Yes": 0.9999853828508316, "No": 1.455876995321076e-05}, "ground_truth": 0}, {"key": "36929355", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999937270200753, "res": {"Yes": 0.9999937270200753, "No": 6.1638190307380714e-06}, "ground_truth": 1}, {"key": "36929355", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9984014342694969, "res": {"Yes": 0.9984014342694969, "No": 0.0015985824678860269}, "ground_truth": 0}, {"key": "36929355", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999031369065324, "res": {"Yes": 0.9999031369065324, "No": 9.672603084035087e-05}, "ground_truth": 0}, {"key": "39127206", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.5309062643344418, "res": {"Yes": 0.5309062643344418, "No": 0.4690900615874274}, "ground_truth": 0}, {"key": "39127206", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.0016266607257414262, "res": {"No": 0.9983729191336677, "Yes": 0.0016266607257414262}, "ground_truth": 0}, {"key": "39127206", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.23433473367592614, "res": {"No": 0.7656621683584666, "Yes": 0.23433473367592614}, "ground_truth": 1}, {"key": "39127206", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.026620134857665463, "res": {"No": 0.973375366071793, "Yes": 0.026620134857665463}, "ground_truth": 0}, {"key": "39127206", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.0008084657033061969, "res": {"No": 0.9991880822137628, "Yes": 0.0008084657033061969}, "ground_truth": 0}, {"key": "36128318", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9969629740220789, "res": {"Yes": 0.9969629740220789, "No": 0.0030368757677345683}, "ground_truth": 0}, {"key": "36128318", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9989890193855977, "res": {"Yes": 0.9989890193855977, "No": 0.00101089449647751}, "ground_truth": 0}, {"key": "36128318", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9993623453282058, "res": {"Yes": 0.9993623453282058, "No": 0.0006375482484196741}, "ground_truth": 1}, {"key": "36128318", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9992556859092605, "res": {"Yes": 0.9992556859092605, "No": 0.0007441562736195628}, "ground_truth": 0}, {"key": "36128318", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9993665147665507, "res": {"Yes": 0.9993665147665507, "No": 0.0006333525118930156}, "ground_truth": 0}, {"key": "39863480", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9761251337284489, "res": {"Yes": 0.9761251337284489, "No": 0.023874651251238475}, "ground_truth": 0}, {"key": "39863480", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9971176173645276, "res": {"Yes": 0.9971176173645276, "No": 0.0028823241896409643}, "ground_truth": 0}, {"key": "39863480", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997257085847705, "res": {"Yes": 0.9997257085847705, "No": 0.0002742281866958901}, "ground_truth": 1}, {"key": "39863480", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998397397030535, "res": {"Yes": 0.9998397397030535, "No": 0.00016015507393811896}, "ground_truth": 0}, {"key": "39863480", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.7542638790389259, "res": {"Yes": 0.7542638790389259, "No": 0.24573548150778793}, "ground_truth": 0}, {"key": "38634057", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 2.061837272093579e-05, "res": {"No": 0.999979303571174, "Yes": 2.061837272093579e-05}, "ground_truth": 0}, {"key": "38634057", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.7831543860105463, "res": {"Yes": 0.7831543860105463, "No": 0.21684536134637378}, "ground_truth": 0}, {"key": "38634057", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.46160162196750454, "res": {"No": 0.5383982774735872, "Yes": 0.46160162196750454}, "ground_truth": 1}, {"key": "38634057", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9902112646335506, "res": {"Yes": 0.9902112646335506, "No": 0.009788641461954318}, "ground_truth": 0}, {"key": "38634057", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.38038841635383874, "res": {"No": 0.6196115063511055, "Yes": 0.38038841635383874}, "ground_truth": 0}, {"key": "33131935", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.0616107814634641, "res": {"No": 0.9383890129419908, "Yes": 0.0616107814634641}, "ground_truth": 0}, {"key": "33131935", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9744087920766915, "res": {"Yes": 0.9744087920766915, "No": 0.025591167667898596}, "ground_truth": 0}, {"key": "33131935", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9937201476156542, "res": {"Yes": 0.9937201476156542, "No": 0.006279753191442752}, "ground_truth": 1}, {"key": "33131935", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.04465611620346615, "res": {"No": 0.9553436598375653, "Yes": 0.04465611620346615}, "ground_truth": 0}, {"key": "33131935", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.8703279276672438, "res": {"Yes": 0.8703279276672438, "No": 0.12967197325803964}, "ground_truth": 0}, {"key": "39021319", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9977067854433055, "res": {"Yes": 0.9977067854433055, "No": 0.002293197837353002}, "ground_truth": 0}, {"key": "39021319", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999956342685299, "res": {"Yes": 0.9999956342685299, "No": 4.252234280958487e-06}, "ground_truth": 0}, {"key": "39021319", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999959918780326, "res": {"Yes": 0.9999959918780326, "No": 3.913850053580703e-06}, "ground_truth": 1}, {"key": "39021319", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999958726752174, "res": {"Yes": 0.9999958726752174, "No": 4.064019701052165e-06}, "ground_truth": 0}, {"key": "39021319", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999896741293122, "res": {"Yes": 0.9999896741293122, "No": 1.0295990606659227e-05}, "ground_truth": 0}, {"key": "40644571", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9602495090781925, "res": {"Yes": 0.9602495090781925, "No": 0.039750363232460985}, "ground_truth": 0}, {"key": "40644571", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9976290685085685, "res": {"Yes": 0.9976290685085685, "No": 0.0023709216916654122}, "ground_truth": 0}, {"key": "40644571", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998714396345921, "res": {"Yes": 0.9998714396345921, "No": 0.0001285229590421235}, "ground_truth": 1}, {"key": "40644571", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.99999861435166, "res": {"Yes": 0.99999861435166, "No": 1.3166525359063086e-06}, "ground_truth": 0}, {"key": "40644571", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.999480941227638, "res": {"Yes": 0.999480941227638, "No": 0.0005190117683537842}, "ground_truth": 0}, {"key": "14681877", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9998229348378865, "res": {"Yes": 0.9998229348378865, "No": 0.00017695749419872005}, "ground_truth": 0}, {"key": "14681877", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999808531883025, "res": {"Yes": 0.9999808531883025, "No": 1.9054733682291754e-05}, "ground_truth": 0}, {"key": "14681877", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999924157887603, "res": {"Yes": 0.9999924157887603, "No": 7.472312762331541e-06}, "ground_truth": 1}, {"key": "14681877", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999963494876631, "res": {"Yes": 0.9999963494876631, "No": 3.6241963166127218e-06}, "ground_truth": 0}, {"key": "14681877", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999276907844373, "res": {"Yes": 0.9999276907844373, "No": 7.221202317210244e-05}, "ground_truth": 0}, {"key": "36570890", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9999518876724351, "res": {"Yes": 0.9999518876724351, "No": 4.809395809850528e-05}, "ground_truth": 0}, {"key": "36570890", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9387055098464789, "res": {"Yes": 0.9387055098464789, "No": 0.06129441190397059}, "ground_truth": 0}, {"key": "36570890", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994511672038477, "res": {"Yes": 0.9994511672038477, "No": 0.0005487827029486558}, "ground_truth": 1}, {"key": "36570890", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9991270335171394, "res": {"Yes": 0.9991270335171394, "No": 0.0008728900107841874}, "ground_truth": 0}, {"key": "36570890", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.1463243420957241, "res": {"No": 0.8536753326657521, "Yes": 0.1463243420957241}, "ground_truth": 0}, {"key": "30452755", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.37049833432688556, "res": {"No": 0.6295012750521156, "Yes": 0.37049833432688556}, "ground_truth": 0}, {"key": "30452755", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999400871468467, "res": {"Yes": 0.9999400871468467, "No": 5.9789230810870504e-05}, "ground_truth": 0}, {"key": "30452755", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999461661890916, "res": {"Yes": 0.9999461661890916, "No": 5.379069828905247e-05}, "ground_truth": 1}, {"key": "30452755", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9992416414692925, "res": {"Yes": 0.9992416414692925, "No": 0.0007581691834978368}, "ground_truth": 0}, {"key": "30452755", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998169757209873, "res": {"Yes": 0.9998169757209873, "No": 0.00018289305040162151}, "ground_truth": 0}, {"key": "37347053", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.6129709708745233, "res": {"Yes": 0.6129709708745233, "No": 0.38702828529717787}, "ground_truth": 0}, {"key": "37347053", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.99980184341561, "res": {"Yes": 0.99980184341561, "No": 0.00019803376727844742}, "ground_truth": 0}, {"key": "37347053", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999180360292298, "res": {"Yes": 0.9999180360292298, "No": 8.183618931722312e-05}, "ground_truth": 1}, {"key": "37347053", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9985770876011872, "res": {"Yes": 0.9985770876011872, "No": 0.0014227964684582375}, "ground_truth": 0}, {"key": "37347053", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.998743890167214, "res": {"Yes": 0.998743890167214, "No": 0.0012560341774007773}, "ground_truth": 0}, {"key": "38890979", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999964686909351, "res": {"Yes": 0.9999964686909351, "No": 3.4131361405946855e-06}, "ground_truth": 0}, {"key": "38890979", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996517244820033, "res": {"Yes": 0.9996517244820033, "No": 0.00034815144015181495}, "ground_truth": 1}, {"key": "38890979", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999597547668612, "res": {"Yes": 0.9999597547668612, "No": 4.020163063911084e-05}, "ground_truth": 0}, {"key": "38890979", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998666721189521, "res": {"Yes": 0.9998666721189521, "No": 0.00013328471546927145}, "ground_truth": 0}, {"key": "32974694", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.8421400654994766, "res": {"Yes": 0.8421400654994766, "No": 0.15785962345728796}, "ground_truth": 0}, {"key": "32974694", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9993482993491763, "res": {"Yes": 0.9993482993491763, "No": 0.0006516357615949394}, "ground_truth": 0}, {"key": "32974694", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999891973193493, "res": {"Yes": 0.9999891973193493, "No": 1.070099318260278e-05}, "ground_truth": 1}, {"key": "32974694", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997070062310602, "res": {"Yes": 0.9997070062310602, "No": 0.00029286962905454675}, "ground_truth": 0}, {"key": "32974694", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997925474212627, "res": {"Yes": 0.9997925474212627, "No": 0.00020739321028495993}, "ground_truth": 0}, {"key": "38519940", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 5.919699283847385e-07, "res": {"No": 0.9999992103693117, "Yes": 5.919699283847385e-07}, "ground_truth": 0}, {"key": "38519940", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.99883509892464, "res": {"Yes": 0.99883509892464, "No": 0.0011648231570620402}, "ground_truth": 0}, {"key": "38519940", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9990385042735225, "res": {"Yes": 0.9990385042735225, "No": 0.000961413462101995}, "ground_truth": 1}, {"key": "38519940", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998594016848696, "res": {"Yes": 0.9998594016848696, "No": 0.0001405053488230736}, "ground_truth": 0}, {"key": "38519940", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9989854506035994, "res": {"Yes": 0.9989854506035994, "No": 0.0010144872160600786}, "ground_truth": 0}, {"key": "38870104", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.0005270521805490148, "res": {"No": 0.9994728432459723, "Yes": 0.0005270521805490148}, "ground_truth": 0}, {"key": "38870104", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.0016321028558635214, "res": {"No": 0.9983675711637754, "Yes": 0.0016321028558635214}, "ground_truth": 1}, {"key": "38870104", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.011006935553552965, "res": {"No": 0.9889926906834816, "Yes": 0.011006935553552965}, "ground_truth": 0}, {"key": "38870104", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.005801226025855352, "res": {"No": 0.99419854123285, "Yes": 0.005801226025855352}, "ground_truth": 0}, {"key": "34283161", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.721310032972298, "res": {"Yes": 0.721310032972298, "No": 0.2786897842977752}, "ground_truth": 0}, {"key": "34283161", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995716873820494, "res": {"Yes": 0.9995716873820494, "No": 0.00042818967271501985}, "ground_truth": 0}, {"key": "34283161", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9991160801375207, "res": {"Yes": 0.9991160801375207, "No": 0.000883850170567495}, "ground_truth": 1}, {"key": "34283161", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9993456785616978, "res": {"Yes": 0.9993456785616978, "No": 0.0006541469091533858}, "ground_truth": 0}, {"key": "34283161", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999860980626328, "res": {"Yes": 0.9999860980626328, "No": 1.3786813032655672e-05}, "ground_truth": 0}, {"key": "31650463", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9585236814693776, "res": {"Yes": 0.9585236814693776, "No": 0.04147622116248816}, "ground_truth": 0}, {"key": "31650463", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.7663469304197464, "res": {"Yes": 0.7663469304197464, "No": 0.2336530631331903}, "ground_truth": 0}, {"key": "31650463", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999634499379698, "res": {"Yes": 0.9999634499379698, "No": 3.6442532142235185e-05}, "ground_truth": 1}, {"key": "31650463", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8403475270717111, "res": {"Yes": 0.8403475270717111, "No": 0.15965246817955664}, "ground_truth": 0}, {"key": "31650463", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9927949117062783, "res": {"Yes": 0.9927949117062783, "No": 0.007204985946882051}, "ground_truth": 0}, {"key": "35589432", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.03505917958768512, "res": {"No": 0.964940680998899, "Yes": 0.03505917958768512}, "ground_truth": 0}, {"key": "35589432", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9782090101212597, "res": {"Yes": 0.9782090101212597, "No": 0.02179092239078028}, "ground_truth": 0}, {"key": "35589432", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.38790311660911825, "res": {"No": 0.6120967994140494, "Yes": 0.38790311660911825}, "ground_truth": 1}, {"key": "35589432", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.642270239538107, "res": {"Yes": 0.642270239538107, "No": 0.357729558944181}, "ground_truth": 0}, {"key": "35589432", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.155607946265104, "res": {"No": 0.8443915163625487, "Yes": 0.155607946265104}, "ground_truth": 0}, {"key": "14412752", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9973352721072143, "res": {"Yes": 0.9973352721072143, "No": 0.002664720779564361}, "ground_truth": 0}, {"key": "14412752", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9985301398042048, "res": {"Yes": 0.9985301398042048, "No": 0.0014697852166341426}, "ground_truth": 0}, {"key": "14412752", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9935924409215781, "res": {"Yes": 0.9935924409215781, "No": 0.00640723152961918}, "ground_truth": 1}, {"key": "14412752", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999669067235946, "res": {"Yes": 0.9999669067235946, "No": 3.300491725074467e-05}, "ground_truth": 0}, {"key": "14412752", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999756083404814, "res": {"Yes": 0.9999756083404814, "No": 2.4339475975296115e-05}, "ground_truth": 0}, {"key": "37271183", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9995728789121937, "res": {"Yes": 0.9995728789121937, "No": 0.000426997670608476}, "ground_truth": 0}, {"key": "37271183", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9978597655748069, "res": {"Yes": 0.9978597655748069, "No": 0.002140174309988199}, "ground_truth": 0}, {"key": "37271183", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998448646647308, "res": {"Yes": 0.9998448646647308, "No": 0.00015499929336358997}, "ground_truth": 1}, {"key": "37271183", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998280596834308, "res": {"Yes": 0.9998280596834308, "No": 0.00017189134429235192}, "ground_truth": 0}, {"key": "37271183", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999210158834096, "res": {"Yes": 0.9999210158834096, "No": 7.884465766416781e-05}, "ground_truth": 0}, {"key": "35588153", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9866509889856023, "res": {"Yes": 0.9866509889856023, "No": 0.013348943978483277}, "ground_truth": 0}, {"key": "35588153", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999058783297223, "res": {"Yes": 0.9999058783297223, "No": 9.409976582048504e-05}, "ground_truth": 0}, {"key": "35588153", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996560143266428, "res": {"Yes": 0.9996560143266428, "No": 0.00034386858291205597}, "ground_truth": 1}, {"key": "35588153", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.99999861435166, "res": {"Yes": 0.99999861435166, "No": 1.3007572019813173e-06}, "ground_truth": 0}, {"key": "35588153", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999895549275502, "res": {"Yes": 0.9999895549275502, "No": 1.0414650928912597e-05}, "ground_truth": 0}, {"key": "39876692", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9854686274905848, "res": {"Yes": 0.9854686274905848, "No": 0.01453113776388488}, "ground_truth": 0}, {"key": "39876692", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999829988145218, "res": {"Yes": 0.9999829988145218, "No": 1.6926738949711053e-05}, "ground_truth": 0}, {"key": "39876692", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999518876724351, "res": {"Yes": 0.9999518876724351, "No": 4.7990763147564625e-05}, "ground_truth": 1}, {"key": "39876692", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999966549126493, "res": {"Yes": 0.999966549126493, "No": 3.336977526051388e-05}, "ground_truth": 0}, {"key": "39876692", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9977641023513353, "res": {"Yes": 0.9977641023513353, "No": 0.0022356722793878655}, "ground_truth": 0}, {"key": "38992323", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.3258312075354128, "res": {"No": 0.6741683647042538, "Yes": 0.3258312075354128}, "ground_truth": 0}, {"key": "38992323", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9990084118487648, "res": {"Yes": 0.9990084118487648, "No": 0.0009915422487194888}, "ground_truth": 0}, {"key": "38992323", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9221387303384552, "res": {"Yes": 0.9221387303384552, "No": 0.07786092940478162}, "ground_truth": 1}, {"key": "38992323", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9992922255905554, "res": {"Yes": 0.9992922255905554, "No": 0.0007076658068020353}, "ground_truth": 0}, {"key": "38992323", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9960206038650578, "res": {"Yes": 0.9960206038650578, "No": 0.0039792496802844276}, "ground_truth": 0}, {"key": "37556002", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9303458269695436, "res": {"Yes": 0.9303458269695436, "No": 0.06965413585316756}, "ground_truth": 0}, {"key": "37556002", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9963589341147661, "res": {"Yes": 0.9963589341147661, "No": 0.0036410906976707957}, "ground_truth": 0}, {"key": "37556002", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999996945503965, "res": {"Yes": 0.999996945503965, "No": 2.9375313682436e-06}, "ground_truth": 1}, {"key": "37556002", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5830559314070295, "res": {"Yes": 0.5830559314070295, "No": 0.4169441078915025}, "ground_truth": 0}, {"key": "37556002", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999947998470209, "res": {"Yes": 0.9999947998470209, "No": 5.076093948046583e-06}, "ground_truth": 0}, {"key": "39875801", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 1.4747164646419775e-08, "res": {"No": 1.0, "Yes": 1.4747164646419775e-08}, "ground_truth": 0}, {"key": "39875801", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999982567412194, "res": {"Yes": 0.9999982567412194, "No": 1.6751497394438416e-06}, "ground_truth": 0}, {"key": "39875801", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999962302846054, "res": {"Yes": 0.9999962302846054, "No": 3.736986574705603e-06}, "ground_truth": 1}, {"key": "39875801", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999348425128413, "res": {"Yes": 0.9999348425128413, "No": 6.512278955764838e-05}, "ground_truth": 0}, {"key": "39875801", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 5.053912273390554e-07}, "ground_truth": 0}, {"key": "39272285", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.0017417327986560735, "res": {"No": 0.9982580336875899, "Yes": 0.0017417327986560735}, "ground_truth": 0}, {"key": "39272285", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9991693935946331, "res": {"Yes": 0.9991693935946331, "No": 0.0008305063358346907}, "ground_truth": 0}, {"key": "39272285", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9980107757343059, "res": {"Yes": 0.9980107757343059, "No": 0.001989235271063943}, "ground_truth": 1}, {"key": "39272285", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999577283874698, "res": {"Yes": 0.9999577283874698, "No": 4.212972539643213e-05}, "ground_truth": 0}, {"key": "39272285", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998364025435236, "res": {"Yes": 0.9998364025435236, "No": 0.00016348766727215816}, "ground_truth": 0}, {"key": "39629714", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9990582512739377, "res": {"Yes": 0.9990582512739377, "No": 0.0009416540512916558}, "ground_truth": 0}, {"key": "39629714", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999485501335897, "res": {"Yes": 0.9999485501335897, "No": 5.1397861059367934e-05}, "ground_truth": 0}, {"key": "39629714", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8159711500287251, "res": {"Yes": 0.8159711500287251, "No": 0.18402850774083201}, "ground_truth": 1}, {"key": "39629714", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998743001608869, "res": {"Yes": 0.9998743001608869, "No": 0.00012560983535869877}, "ground_truth": 0}, {"key": "39629714", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.11935798790894474, "res": {"No": 0.8806418432847196, "Yes": 0.11935798790894474}, "ground_truth": 0}, {"key": "34043257", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9147309597526717, "res": {"Yes": 0.9147309597526717, "No": 0.08526875389278199}, "ground_truth": 0}, {"key": "34043257", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9960237984070635, "res": {"Yes": 0.9960237984070635, "No": 0.003976137532965361}, "ground_truth": 0}, {"key": "34043257", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999943230348141, "res": {"Yes": 0.9999943230348141, "No": 5.5852129346588595e-06}, "ground_truth": 1}, {"key": "34043257", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997236863416524, "res": {"Yes": 0.9997236863416524, "No": 0.0002761531375283798}, "ground_truth": 0}, {"key": "34043257", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9794520336857905, "res": {"Yes": 0.9794520336857905, "No": 0.020547187679174076}, "ground_truth": 0}, {"key": "33995240", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.01676403267171519, "res": {"No": 0.9832359147203804, "Yes": 0.01676403267171519}, "ground_truth": 0}, {"key": "33995240", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.999995276659155, "res": {"Yes": 0.999995276659155, "No": 4.629563364670076e-06}, "ground_truth": 0}, {"key": "33995240", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999956342685299, "res": {"Yes": 0.9999956342685299, "No": 4.331721586219284e-06}, "ground_truth": 1}, {"key": "33995240", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999972151525776, "res": {"Yes": 0.999972151525776, "No": 2.7804518903276742e-05}, "ground_truth": 0}, {"key": "33995240", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9988409294417802, "res": {"Yes": 0.9988409294417802, "No": 0.0011589967717543176}, "ground_truth": 0}, {"key": "39399948", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9997780113531267, "res": {"Yes": 0.9997780113531267, "No": 0.00022186843209457606}, "ground_truth": 0}, {"key": "39399948", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.999892767243843, "res": {"Yes": 0.999892767243843, "No": 0.0001070869967621214}, "ground_truth": 0}, {"key": "39399948", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999650532877865, "res": {"Yes": 0.999650532877865, "No": 0.00034935778114726163}, "ground_truth": 1}, {"key": "39399948", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999965953125608, "res": {"Yes": 0.999965953125608, "No": 3.399738323453895e-05}, "ground_truth": 0}, {"key": "39399948", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9978164378478964, "res": {"Yes": 0.9978164378478964, "No": 0.002183472673752904}, "ground_truth": 0}, {"key": "33185890", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.99959717538563, "res": {"Yes": 0.99959717538563, "No": 0.0004027331395895464}, "ground_truth": 0}, {"key": "33185890", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997136760385316, "res": {"Yes": 0.9997136760385316, "No": 0.00028628692951813956}, "ground_truth": 0}, {"key": "33185890", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996496987498154, "res": {"Yes": 0.9996496987498154, "No": 0.000350164679948259}, "ground_truth": 1}, {"key": "33185890", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999071894471658, "res": {"Yes": 0.9999071894471658, "No": 9.273649254687704e-05}, "ground_truth": 0}, {"key": "33185890", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997715758127782, "res": {"Yes": 0.9997715758127782, "No": 0.00022837580919482997}, "ground_truth": 0}, {"key": "35280425", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9999896741293122, "res": {"Yes": 0.9999896741293122, "No": 1.0240839083109899e-05}, "ground_truth": 0}, {"key": "35280425", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999715555225518, "res": {"Yes": 0.9999715555225518, "No": 2.836166921379927e-05}, "ground_truth": 0}, {"key": "35280425", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999636883392843, "res": {"Yes": 0.9999636883392843, "No": 3.619397099871004e-05}, "ground_truth": 1}, {"key": "35280425", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999968263007362, "res": {"Yes": 0.9999968263007362, "No": 3.097305555718382e-06}, "ground_truth": 0}, {"key": "35280425", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.999336036771202, "res": {"Yes": 0.999336036771202, "No": 0.0006639478172865122}, "ground_truth": 0}, {"key": "16365170", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, "No": 1.0177604315465319e-06}, "ground_truth": 0}, {"key": "16365170", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999978991308068, "res": {"Yes": 0.9999978991308068, "No": 2.0721973333471334e-06}, "ground_truth": 0}, {"key": "16365170", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999547484278832, "res": {"Yes": 0.9999547484278832, "No": 4.51411188798568e-05}, "ground_truth": 1}, {"key": "16365170", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "No": 9.530456815222999e-07}, "ground_truth": 0}, {"key": "16365170", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999757275415809, "res": {"Yes": 0.9999757275415809, "No": 2.4217888588546476e-05}, "ground_truth": 0}, {"key": "24388238", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9995407192017304, "res": {"Yes": 0.9995407192017304, "No": 0.00045918746996168947}, "ground_truth": 0}, {"key": "24388238", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.999984906043415, "res": {"Yes": 0.999984906043415, "No": 1.5016837295682916e-05}, "ground_truth": 0}, {"key": "24388238", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999652379302147, "res": {"Yes": 0.9999652379302147, "No": 3.4705374440259446e-05}, "ground_truth": 1}, {"key": "24388238", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999977799274644, "res": {"Yes": 0.9999977799274644, "No": 2.106179321429732e-06}, "ground_truth": 0}, {"key": "24388238", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999925349918634, "res": {"Yes": 0.9999925349918634, "No": 7.398117366901565e-06}, "ground_truth": 0}, {"key": "35024827", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 1.4743592151167589e-05, "res": {"No": 0.9999850252451228, "Yes": 1.4743592151167589e-05}, "ground_truth": 0}, {"key": "35024827", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.8061654375755918, "res": {"Yes": 0.8061654375755918, "No": 0.19383430166729318}, "ground_truth": 0}, {"key": "35024827", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996323158923753, "res": {"Yes": 0.9996323158923753, "No": 0.0003675794765395995}, "ground_truth": 1}, {"key": "35024827", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9876943198208515, "res": {"Yes": 0.9876943198208515, "No": 0.01230564720020649}, "ground_truth": 0}, {"key": "35024827", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.00014076749503764955, "res": {"No": 0.999858924942022, "Yes": 0.00014076749503764955}, "ground_truth": 0}, {"key": "38624944", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.002425947765046988, "res": {"No": 0.9975740185609266, "Yes": 0.002425947765046988}, "ground_truth": 0}, {"key": "38624944", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.4400392902648537, "res": {"No": 0.5599598859784436, "Yes": 0.4400392902648537}, "ground_truth": 0}, {"key": "38624944", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996145649291892, "res": {"Yes": 0.9996145649291892, "No": 0.00038530293073092795}, "ground_truth": 1}, {"key": "38624944", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9958553020677557, "res": {"Yes": 0.9958553020677557, "No": 0.004144645577840238}, "ground_truth": 0}, {"key": "38624944", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9909781830304347, "res": {"Yes": 0.9909781830304347, "No": 0.009021710759279858}, "ground_truth": 0}, {"key": "34719830", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.6442789593337933, "res": {"Yes": 0.6442789593337933, "No": 0.35572048891993824}, "ground_truth": 0}, {"key": "34719830", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.999447239328501, "res": {"Yes": 0.999447239328501, "No": 0.0005526920959629363}, "ground_truth": 0}, {"key": "34719830", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.998720581454676, "res": {"Yes": 0.998720581454676, "No": 0.0012793655346414084}, "ground_truth": 1}, {"key": "34719830", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9656962743455338, "res": {"Yes": 0.9656962743455338, "No": 0.03430341387087094}, "ground_truth": 0}, {"key": "34719830", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998198360914665, "res": {"Yes": 0.9998198360914665, "No": 0.0001800978938837185}, "ground_truth": 0}, {"key": "38995225", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.7334935609245034, "res": {"Yes": 0.7334935609245034, "No": 0.2665053394812565}, "ground_truth": 0}, {"key": "38995225", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998231731957026, "res": {"Yes": 0.9998231731957026, "No": 0.0001767776199087001}, "ground_truth": 0}, {"key": "38995225", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999315050358871, "res": {"Yes": 0.9999315050358871, "No": 6.839321049715784e-05}, "ground_truth": 1}, {"key": "38995225", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999648803297101, "res": {"Yes": 0.9999648803297101, "No": 3.503470206859391e-05}, "ground_truth": 0}, {"key": "38995225", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9970796851393822, "res": {"Yes": 0.9970796851393822, "No": 0.002920220067382015}, "ground_truth": 0}, {"key": "34242311", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.09155228949835867, "res": {"No": 0.9084472496115469, "Yes": 0.09155228949835867}, "ground_truth": 0}, {"key": "34242311", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 7.088435483479878e-07}, "ground_truth": 0}, {"key": "34242311", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999974223173222, "res": {"Yes": 0.9999974223173222, "No": 2.4689366650397908e-06}, "ground_truth": 1}, {"key": "34242311", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999961110815618, "res": {"Yes": 0.9999961110815618, "No": 3.8207951597684755e-06}, "ground_truth": 0}, {"key": "34242311", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 2.464707528990081e-07}, "ground_truth": 0}, {"key": "39253748", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.01243310009862689, "res": {"No": 0.9875666419153541, "Yes": 0.01243310009862689}, "ground_truth": 0}, {"key": "39253748", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.08670027702390465, "res": {"No": 0.9132994020485331, "Yes": 0.08670027702390465}, "ground_truth": 0}, {"key": "39253748", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9412550506368372, "res": {"Yes": 0.9412550506368372, "No": 0.058744902246204336}, "ground_truth": 1}, {"key": "39253748", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9907560316167704, "res": {"Yes": 0.9907560316167704, "No": 0.009243912982662672}, "ground_truth": 0}, {"key": "39253748", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9352677565948784, "res": {"Yes": 0.9352677565948784, "No": 0.06473145685726626}, "ground_truth": 0}, {"key": "37131104", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 2.2833568229085653e-07, "res": {"No": 0.9999994487765019, "Yes": 2.2833568229085653e-07}, "ground_truth": 0}, {"key": "37131104", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996323158923753, "res": {"Yes": 0.9996323158923753, "No": 0.00036765667633536765}, "ground_truth": 0}, {"key": "37131104", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999717939237989, "res": {"Yes": 0.9999717939237989, "No": 2.8076517700835024e-05}, "ground_truth": 1}, {"key": "37131104", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9981221531647709, "res": {"Yes": 0.9981221531647709, "No": 0.0018777406627200968}, "ground_truth": 0}, {"key": "37131104", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999766811478886, "res": {"Yes": 0.9999766811478886, "No": 2.3262800925551137e-05}, "ground_truth": 0}, {"key": "38490554", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.5745184886803131, "res": {"Yes": 0.5745184886803131, "No": 0.42548097673244445}, "ground_truth": 0}, {"key": "38490554", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994099681872711, "res": {"Yes": 0.9994099681872711, "No": 0.000589958713872657}, "ground_truth": 0}, {"key": "38490554", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999955150656573, "res": {"Yes": 0.9999955150656573, "No": 4.128047595226914e-06}, "ground_truth": 1}, {"key": "38490554", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998634540532465, "res": {"Yes": 0.9998634540532465, "No": 0.00013651268735540232}, "ground_truth": 0}, {"key": "38490554", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999908661547138, "res": {"Yes": 0.9999908661547138, "No": 9.096625840119023e-06}, "ground_truth": 0}, {"key": "29009500", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.09190222856714438, "res": {"No": 0.9080976382950094, "Yes": 0.09190222856714438}, "ground_truth": 0}, {"key": "29009500", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9972961500048955, "res": {"Yes": 0.9972961500048955, "No": 0.002703729940133945}, "ground_truth": 0}, {"key": "29009500", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.998950235688474, "res": {"Yes": 0.998950235688474, "No": 0.0010497324046578338}, "ground_truth": 1}, {"key": "29009500", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9972464682420092, "res": {"Yes": 0.9972464682420092, "No": 0.002753459934833684}, "ground_truth": 0}, {"key": "29009500", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997245205514333, "res": {"Yes": 0.9997245205514333, "No": 0.0002754514968119133}, "ground_truth": 0}, {"key": "36703057", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.4323550081879593, "res": {"No": 0.5676439360383752, "Yes": 0.4323550081879593}, "ground_truth": 0}, {"key": "36703057", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.2184249940583986, "res": {"No": 0.7815746517580318, "Yes": 0.2184249940583986}, "ground_truth": 0}, {"key": "36703057", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.37627463122950044, "res": {"No": 0.6237249207120583, "Yes": 0.37627463122950044}, "ground_truth": 1}, {"key": "36703057", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5205236525399551, "res": {"Yes": 0.5205236525399551, "No": 0.4794758264531838}, "ground_truth": 0}, {"key": "36703057", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.07433805336665, "res": {"No": 0.9256607982765195, "Yes": 0.07433805336665}, "ground_truth": 0}, {"key": "34876987", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.0924342867907198, "res": {"No": 0.9075651452682699, "Yes": 0.0924342867907198}, "ground_truth": 0}, {"key": "34876987", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9929865968117625, "res": {"Yes": 0.9929865968117625, "No": 0.007013096144873145}, "ground_truth": 0}, {"key": "34876987", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997842085863171, "res": {"Yes": 0.9997842085863171, "No": 0.00021571574766038708}, "ground_truth": 1}, {"key": "34876987", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998808482291361, "res": {"Yes": 0.9998808482291361, "No": 0.00011904654581810008}, "ground_truth": 0}, {"key": "34876987", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9714641233274138, "res": {"Yes": 0.9714641233274138, "No": 0.028535575699245884}, "ground_truth": 0}, {"key": "36209258", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9849628847271756, "res": {"Yes": 0.9849628847271756, "No": 0.015037045839491534}, "ground_truth": 0}, {"key": "36209258", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999902701413353, "res": {"Yes": 0.9999902701413353, "No": 9.698949279724893e-06}, "ground_truth": 0}, {"key": "36209258", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997843277705998, "res": {"Yes": 0.9997843277705998, "No": 0.00021554216635936795}, "ground_truth": 1}, {"key": "36209258", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999707211216348, "res": {"Yes": 0.9999707211216348, "No": 2.924183992882445e-05}, "ground_truth": 0}, {"key": "36209258", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999399679479958, "res": {"Yes": 0.9999399679479958, "No": 5.994932115946394e-05}, "ground_truth": 0}, {"key": "36854437", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 7.98411916976251e-05, "res": {"No": 0.9999200623291858, "Yes": 7.98411916976251e-05}, "ground_truth": 0}, {"key": "36854437", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999797803764193, "res": {"Yes": 0.9999797803764193, "No": 2.0104236378652498e-05}, "ground_truth": 0}, {"key": "36854437", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999944422379444, "res": {"Yes": 0.9999944422379444, "No": 5.5346457259716765e-06}, "ground_truth": 1}, {"key": "36854437", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999516492769339, "res": {"Yes": 0.9999516492769339, "No": 4.823112492401483e-05}, "ground_truth": 0}, {"key": "36854437", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997224946317397, "res": {"Yes": 0.9997224946317397, "No": 0.0002774768643403189}, "ground_truth": 0}, {"key": "38047723", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.00013019112433285116, "res": {"No": 0.9998694134371675, "Yes": 0.00013019112433285116}, "ground_truth": 0}, {"key": "38047723", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9930271499215562, "res": {"Yes": 0.9930271499215562, "No": 0.006972754071809653}, "ground_truth": 0}, {"key": "38047723", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9984574068218557, "res": {"Yes": 0.9984574068218557, "No": 0.001542473472598055}, "ground_truth": 1}, {"key": "38047723", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9963777641812855, "res": {"Yes": 0.9963777641812855, "No": 0.003621891050143397}, "ground_truth": 0}, {"key": "38047723", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9404002654902133, "res": {"Yes": 0.9404002654902133, "No": 0.05959966653361855}, "ground_truth": 0}, {"key": "34287816", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.8514982194126658, "res": {"Yes": 0.8514982194126658, "No": 0.14850144353681444}, "ground_truth": 0}, {"key": "34287816", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9890929766596014, "res": {"Yes": 0.9890929766596014, "No": 0.010906939946610867}, "ground_truth": 0}, {"key": "34287816", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999843100330889, "res": {"Yes": 0.9999843100330889, "No": 1.565099601300074e-05}, "ground_truth": 1}, {"key": "34287816", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998268678590992, "res": {"Yes": 0.9998268678590992, "No": 0.00017301591041128076}, "ground_truth": 0}, {"key": "34287816", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9976766471719416, "res": {"Yes": 0.9976766471719416, "No": 0.0023233132200398505}, "ground_truth": 0}, {"key": "33235855", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.5680271852787819, "res": {"Yes": 0.5680271852787819, "No": 0.43197243527827917}, "ground_truth": 0}, {"key": "33235855", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999707211216348, "res": {"Yes": 0.9999707211216348, "No": 2.923625527074984e-05}, "ground_truth": 0}, {"key": "33235855", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999475965531086, "res": {"Yes": 0.9999475965531086, "No": 5.2269998183065755e-05}, "ground_truth": 1}, {"key": "33235855", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999973031140366, "res": {"Yes": 0.9999973031140366, "No": 2.677358188962691e-06}, "ground_truth": 0}, {"key": "33235855", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9142378149139089, "res": {"Yes": 0.9142378149139089, "No": 0.08576188516785065}, "ground_truth": 0}, {"key": "34381016", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 9.366293668124201e-05, "res": {"No": 0.9999062359061275, "Yes": 9.366293668124201e-05}, "ground_truth": 0}, {"key": "34381016", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.07861667908180978, "res": {"No": 0.92138323664346, "Yes": 0.07861667908180978}, "ground_truth": 0}, {"key": "34381016", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999956342685299, "res": {"Yes": 0.9999956342685299, "No": 4.342310405513263e-06}, "ground_truth": 1}, {"key": "34381016", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996893731045271, "res": {"Yes": 0.9996893731045271, "No": 0.0003105275290917248}, "ground_truth": 0}, {"key": "34381016", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.0003634343073069144, "res": {"No": 0.9996364755210411, "Yes": 0.0003634343073069144}, "ground_truth": 0}, {"key": "28064995", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 2.0327154473214603e-06, "res": {"No": 0.9999978991308068, "Yes": 2.0327154473214603e-06}, "ground_truth": 0}, {"key": "28064995", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999669067235946, "res": {"Yes": 0.9999669067235946, "No": 3.3038169942336385e-05}, "ground_truth": 0}, {"key": "28064995", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998619046160455, "res": {"Yes": 0.9998619046160455, "No": 0.0001380130930684539}, "ground_truth": 1}, {"key": "28064995", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998596400713766, "res": {"Yes": 0.9998596400713766, "No": 0.00014033979063254217}, "ground_truth": 0}, {"key": "28064995", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997720525239746, "res": {"Yes": 0.9997720525239746, "No": 0.00022788441712108081}, "ground_truth": 0}, {"key": "37576197", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 4.4845697407433495e-06, "res": {"No": 0.9999915813694369, "Yes": 4.4845697407433495e-06}, "ground_truth": 0}, {"key": "37576197", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9957328343344324, "res": {"Yes": 0.9957328343344324, "No": 0.004266967036597688}, "ground_truth": 0}, {"key": "37576197", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999174400582596, "res": {"Yes": 0.9999174400582596, "No": 8.236878628790774e-05}, "ground_truth": 1}, {"key": "37576197", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999774078524101, "res": {"Yes": 0.999774078524101, "No": 0.0002258356580461995}, "ground_truth": 0}, {"key": "37576197", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.7988776131337386, "res": {"Yes": 0.7988776131337386, "No": 0.2011206102272027}, "ground_truth": 0}, {"key": "34454741", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9953077477739232, "res": {"Yes": 0.9953077477739232, "No": 0.004692267677746935}, "ground_truth": 0}, {"key": "34454741", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998872844828712, "res": {"Yes": 0.9998872844828712, "No": 0.00011265494443934777}, "ground_truth": 0}, {"key": "34454741", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999788018631226, "res": {"Yes": 0.999788018631226, "No": 0.00021188179702291108}, "ground_truth": 1}, {"key": "34454741", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9977105840195809, "res": {"Yes": 0.9977105840195809, "No": 0.002289404589060878}, "ground_truth": 0}, {"key": "34454741", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996074155014689, "res": {"Yes": 0.9996074155014689, "No": 0.0003925369048760455}, "ground_truth": 0}, {"key": "34766970", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9941729816116924, "res": {"Yes": 0.9941729816116924, "No": 0.0058270057735179915}, "ground_truth": 0}, {"key": "34766970", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999729859278866, "res": {"Yes": 0.9999729859278866, "No": 2.696936407681409e-05}, "ground_truth": 0}, {"key": "34766970", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999979303571174, "res": {"Yes": 0.999979303571174, "No": 2.056905080483965e-05}, "ground_truth": 1}, {"key": "34766970", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999820452021894, "res": {"Yes": 0.9999820452021894, "No": 1.7885271590317224e-05}, "ground_truth": 0}, {"key": "34766970", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999411599161456, "res": {"Yes": 0.9999411599161456, "No": 5.871208353088485e-05}, "ground_truth": 0}, {"key": "35574186", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.060093956494082904, "res": {"No": 0.9399057863767043, "Yes": 0.060093956494082904}, "ground_truth": 0}, {"key": "35574186", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9795553125529884, "res": {"Yes": 0.9795553125529884, "No": 0.0204444996181327}, "ground_truth": 0}, {"key": "35574186", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.990172230284679, "res": {"Yes": 0.990172230284679, "No": 0.009827206168710013}, "ground_truth": 1}, {"key": "35574186", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997806332644017, "res": {"Yes": 0.9997806332644017, "No": 0.00021899317347076066}, "ground_truth": 0}, {"key": "35574186", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.002777457050323311, "res": {"No": 0.9972222874025404, "Yes": 0.002777457050323311}, "ground_truth": 0}, {"key": "35486470", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 1.9047896031990282e-06, "res": {"No": 0.9999976607241361, "Yes": 1.9047896031990282e-06}, "ground_truth": 0}, {"key": "35486470", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.999928763541437, "res": {"Yes": 0.999928763541437, "No": 7.119706231630525e-05}, "ground_truth": 0}, {"key": "35486470", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9912143721743286, "res": {"Yes": 0.9912143721743286, "No": 0.008785382659592686}, "ground_truth": 1}, {"key": "35486470", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9992671173597689, "res": {"Yes": 0.9992671173597689, "No": 0.000732840117944181}, "ground_truth": 0}, {"key": "35486470", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998292515191815, "res": {"Yes": 0.9998292515191815, "No": 0.00017066124323802535}, "ground_truth": 0}, {"key": "40977702", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 9.868367750731475e-05, "res": {"No": 0.9999012298380936, "Yes": 9.868367750731475e-05}, "ground_truth": 0}, {"key": "40977702", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9987055994599925, "res": {"Yes": 0.9987055994599925, "No": 0.001294358594955007}, "ground_truth": 0}, {"key": "40977702", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.06282106071659123, "res": {"No": 0.9371783531310077, "Yes": 0.06282106071659123}, "ground_truth": 1}, {"key": "40977702", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.09031174143918147, "res": {"No": 0.9096872717068502, "Yes": 0.09031174143918147}, "ground_truth": 0}, {"key": "40977702", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.8747045296949486, "res": {"Yes": 0.8747045296949486, "No": 0.1252950904763237}, "ground_truth": 0}, {"key": "35336618", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 1.4873777214006763e-07, "res": {"No": 0.9999998063873687, "Yes": 1.4873777214006763e-07}, "ground_truth": 0}, {"key": "35336618", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999592779711644, "res": {"Yes": 0.9999592779711644, "No": 4.059640670809846e-05}, "ground_truth": 0}, {"key": "35336618", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998198360914665, "res": {"Yes": 0.9998198360914665, "No": 0.00018003126998916206}, "ground_truth": 1}, {"key": "35336618", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999502188991202, "res": {"Yes": 0.9999502188991202, "No": 4.972304749790975e-05}, "ground_truth": 0}, {"key": "35336618", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999783499623655, "res": {"Yes": 0.9999783499623655, "No": 2.154990822767826e-05}, "ground_truth": 0}, {"key": "33024679", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9999688139202959, "res": {"Yes": 0.9999688139202959, "No": 3.1137801330752456e-05}, "ground_truth": 0}, {"key": "33024679", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 2.8563187658975233e-07}, "ground_truth": 0}, {"key": "33024679", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999939654258081, "res": {"Yes": 0.9999939654258081, "No": 5.959046934457251e-06}, "ground_truth": 1}, {"key": "33024679", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999853323178097, "res": {"Yes": 0.999853323178097, "No": 0.00014658105588793278}, "ground_truth": 0}, {"key": "33024679", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999608275572601, "res": {"Yes": 0.9999608275572601, "No": 3.910043365944737e-05}, "ground_truth": 0}, {"key": "37451334", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.99942175509806, "res": {"Yes": 0.99942175509806, "No": 0.0005781398867355799}, "ground_truth": 0}, {"key": "37451334", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999506956924973, "res": {"Yes": 0.9999506956924973, "No": 4.921933617440368e-05}, "ground_truth": 0}, {"key": "37451334", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998066106221369, "res": {"Yes": 0.9998066106221369, "No": 0.00019326794805690523}, "ground_truth": 1}, {"key": "37451334", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997985063825662, "res": {"Yes": 0.9997985063825662, "No": 0.00020139910934668343}, "ground_truth": 0}, {"key": "37451334", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999267372238483, "res": {"Yes": 0.9999267372238483, "No": 7.321435863626487e-05}, "ground_truth": 0}, {"key": "33354824", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.133213763051656, "res": {"No": 0.8667859920357074, "Yes": 0.133213763051656}, "ground_truth": 0}, {"key": "33354824", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 1.1264731559016281e-07}, "ground_truth": 0}, {"key": "33354824", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 1.383530618180894e-07}, "ground_truth": 1}, {"key": "33354824", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999982567412194, "res": {"Yes": 0.9999982567412194, "No": 1.673533999665105e-06}, "ground_truth": 0}, {"key": "33354824", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997585856551338, "res": {"Yes": 0.9997585856551338, "No": 0.00024137568000637242}, "ground_truth": 0}, {"key": "34688538", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 7.0781781982613674e-06, "res": {"No": 0.9999926541946805, "Yes": 7.0781781982613674e-06}, "ground_truth": 0}, {"key": "34688538", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.06070598415659973, "res": {"No": 0.9392938025018258, "Yes": 0.06070598415659973}, "ground_truth": 0}, {"key": "34688538", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.08987683912563266, "res": {"No": 0.9101231217471799, "Yes": 0.08987683912563266}, "ground_truth": 1}, {"key": "34688538", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.13559381725228115, "res": {"No": 0.8644060372927099, "Yes": 0.13559381725228115}, "ground_truth": 0}, {"key": "34688538", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.026444872389013496, "res": {"No": 0.9735549762361524, "Yes": 0.026444872389013496}, "ground_truth": 0}, {"key": "33646276", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.00046893129161731137, "res": {"No": 0.999530949018822, "Yes": 0.00046893129161731137}, "ground_truth": 0}, {"key": "33646276", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.8993728517019323, "res": {"Yes": 0.8993728517019323, "No": 0.10062660430165696}, "ground_truth": 0}, {"key": "33646276", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998378327705268, "res": {"Yes": 0.9998378327705268, "No": 0.00016204378199774532}, "ground_truth": 1}, {"key": "33646276", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9988210638918542, "res": {"Yes": 0.9988210638918542, "No": 0.001178847043429023}, "ground_truth": 0}, {"key": "33646276", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998103052243296, "res": {"Yes": 0.9998103052243296, "No": 0.0001895912725608515}, "ground_truth": 0}, {"key": "40322608", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999174400582596, "res": {"Yes": 0.9999174400582596, "No": 8.250810722977064e-05}, "ground_truth": 0}, {"key": "40322608", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9972008229224222, "res": {"Yes": 0.9972008229224222, "No": 0.002799114952948702}, "ground_truth": 1}, {"key": "40322608", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.00047883063665360415, "res": {"No": 0.9995210634667341, "Yes": 0.00047883063665360415}, "ground_truth": 0}, {"key": "40322608", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.004499495498208726, "res": {"No": 0.9955005221178725, "Yes": 0.004499495498208726}, "ground_truth": 0}, {"key": "39565762", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9999707211216348, "res": {"Yes": 0.9999707211216348, "No": 2.926003401103316e-05}, "ground_truth": 0}, {"key": "39565762", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 1.0, "res": {"Yes": 1.0, "No": 4.8609492800512674e-08}, "ground_truth": 0}, {"key": "39565762", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 1.013641863490719e-07}, "ground_truth": 1}, {"key": "39565762", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999970647075079, "res": {"Yes": 0.9999970647075079, "No": 2.864344585319785e-06}, "ground_truth": 0}, {"key": "39565762", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999987335551019, "res": {"Yes": 0.9999987335551019, "No": 1.1875882357601304e-06}, "ground_truth": 0}, {"key": "30534259", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999886013079656, "res": {"Yes": 0.9999886013079656, "No": 1.1352097833166924e-05}, "ground_truth": 0}, {"key": "30534259", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996693569744574, "res": {"Yes": 0.9996693569744574, "No": 0.00033055450350492633}, "ground_truth": 1}, {"key": "30534259", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999942038320978, "res": {"Yes": 0.9999942038320978, "No": 5.7187288859712305e-06}, "ground_truth": 0}, {"key": "30534259", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9986783705225002, "res": {"Yes": 0.9986783705225002, "No": 0.0013215996764691356}, "ground_truth": 0}, {"key": "39644242", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.8262021927537984, "res": {"Yes": 0.8262021927537984, "No": 0.1737968831249658}, "ground_truth": 0}, {"key": "39644242", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999883629027115, "res": {"Yes": 0.9999883629027115, "No": 1.1511039805467684e-05}, "ground_truth": 0}, {"key": "39644242", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999852636485811, "res": {"Yes": 0.9999852636485811, "No": 1.4625631299502797e-05}, "ground_truth": 1}, {"key": "39644242", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999973031140366, "res": {"Yes": 0.9999973031140366, "No": 2.6770101550208214e-06}, "ground_truth": 0}, {"key": "39644242", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998027968570965, "res": {"Yes": 0.9998027968570965, "No": 0.00019708962173732586}, "ground_truth": 0}, {"key": "19853740", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9976993111509193, "res": {"Yes": 0.9976993111509193, "No": 0.0023006382374455}, "ground_truth": 0}, {"key": "19853740", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998735850310456, "res": {"Yes": 0.9998735850310456, "No": 0.00012634061559235637}, "ground_truth": 0}, {"key": "19853740", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9992575917214997, "res": {"Yes": 0.9992575917214997, "No": 0.0007423941176111756}, "ground_truth": 1}, {"key": "19853740", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997806332644017, "res": {"Yes": 0.9997806332644017, "No": 0.0002192298138609183}, "ground_truth": 0}, {"key": "19853740", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999980183344636, "res": {"Yes": 0.9999980183344636, "No": 1.8966867685837517e-06}, "ground_truth": 0}, {"key": "33023078", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.0009375397509549429, "res": {"No": 0.9990624195035159, "Yes": 0.0009375397509549429}, "ground_truth": 0}, {"key": "33023078", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999947998470209, "res": {"Yes": 0.9999947998470209, "No": 5.0741983809761725e-06}, "ground_truth": 0}, {"key": "33023078", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 7.898435097749572e-07}, "ground_truth": 1}, {"key": "33023078", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999983759447187, "res": {"Yes": 0.9999983759447187, "No": 1.533748222264059e-06}, "ground_truth": 0}, {"key": "33023078", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999835948245663, "res": {"Yes": 0.9999835948245663, "No": 1.638770826800357e-05}, "ground_truth": 0}, {"key": "38329806", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.982880744597263, "res": {"Yes": 0.982880744597263, "No": 0.0171192280469619}, "ground_truth": 0}, {"key": "38329806", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.8582244645623854, "res": {"Yes": 0.8582244645623854, "No": 0.14177547992081926}, "ground_truth": 0}, {"key": "38329806", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2157293642876917, "res": {"No": 0.7842706118598343, "Yes": 0.2157293642876917}, "ground_truth": 1}, {"key": "38329806", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9886871747449408, "res": {"Yes": 0.9886871747449408, "No": 0.011312748537858472}, "ground_truth": 0}, {"key": "38329806", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9992941315424348, "res": {"Yes": 0.9992941315424348, "No": 0.0007058446409488493}, "ground_truth": 0}, {"key": "38761942", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9618517521363601, "res": {"Yes": 0.9618517521363601, "No": 0.038148165839843665}, "ground_truth": 0}, {"key": "38761942", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9989465479705907, "res": {"Yes": 0.9989465479705907, "No": 0.0010534634702258955}, "ground_truth": 0}, {"key": "38761942", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999911045595646, "res": {"Yes": 0.9999911045595646, "No": 8.810106760369456e-06}, "ground_truth": 1}, {"key": "38761942", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999981375378344, "res": {"Yes": 0.9999981375378344, "No": 1.6732712754515484e-06}, "ground_truth": 0}, {"key": "38761942", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.7901338322164804, "res": {"Yes": 0.7901338322164804, "No": 0.20986598891323596}, "ground_truth": 0}, {"key": "33773576", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 3.2399680380954565e-05, "res": {"No": 0.9999675027220479, "Yes": 3.2399680380954565e-05}, "ground_truth": 0}, {"key": "33773576", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 7.192614559519364e-08}, "ground_truth": 0}, {"key": "33773576", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999938462231346, "res": {"Yes": 0.9999938462231346, "No": 6.1222553995014015e-06}, "ground_truth": 1}, {"key": "33773576", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999967070975216, "res": {"Yes": 0.9999967070975216, "No": 3.2503495940746542e-06}, "ground_truth": 0}, {"key": "33773576", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999981375378344, "res": {"Yes": 0.9999981375378344, "No": 1.753774913124733e-06}, "ground_truth": 0}, {"key": "37642631", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9983973951448247, "res": {"Yes": 0.9983973951448247, "No": 0.0016025198217166309}, "ground_truth": 0}, {"key": "37642631", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999987335551019, "res": {"Yes": 0.9999987335551019, "No": 1.1427636213578479e-06}, "ground_truth": 0}, {"key": "37642631", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999899125338788, "res": {"Yes": 0.9999899125338788, "No": 1.0008568318410807e-05}, "ground_truth": 1}, {"key": "37642631", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999882437011058, "res": {"Yes": 0.9999882437011058, "No": 1.163891887650434e-05}, "ground_truth": 0}, {"key": "37642631", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999931310055916, "res": {"Yes": 0.9999931310055916, "No": 6.811079046916084e-06}, "ground_truth": 0}, {"key": "36609836", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.8417894208675983, "res": {"Yes": 0.8417894208675983, "No": 0.1582103978228936}, "ground_truth": 0}, {"key": "36609836", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 1.224894418078275e-07}, "ground_truth": 0}, {"key": "36609836", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 1.9746845240942354e-07}, "ground_truth": 1}, {"key": "36609836", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "No": 5.273903254743535e-08}, "ground_truth": 0}, {"key": "36609836", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 7.727077695382715e-08}, "ground_truth": 0}, {"key": "41035610", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9469636290763249, "res": {"Yes": 0.9469636290763249, "No": 0.05303619362045193}, "ground_truth": 0}, {"key": "41035610", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9911219695304996, "res": {"Yes": 0.9911219695304996, "No": 0.008877910976925403}, "ground_truth": 0}, {"key": "41035610", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9523478683550649, "res": {"Yes": 0.9523478683550649, "No": 0.04765197031495866}, "ground_truth": 1}, {"key": "41035610", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997226138086653, "res": {"Yes": 0.9997226138086653, "No": 0.00027734897698615817}, "ground_truth": 0}, {"key": "41035610", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9950345562769811, "res": {"Yes": 0.9950345562769811, "No": 0.004965412020167815}, "ground_truth": 0}, {"key": "37592684", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9995949114105473, "res": {"Yes": 0.9995949114105473, "No": 0.0004049706348620158}, "ground_truth": 0}, {"key": "37592684", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.999979422771714, "res": {"Yes": 0.999979422771714, "No": 2.036208428417894e-05}, "ground_truth": 0}, {"key": "37592684", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999801379802525, "res": {"Yes": 0.9999801379802525, "No": 1.976311967137435e-05}, "ground_truth": 1}, {"key": "37592684", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "\"Yes": 9.542340940343349e-09}, "ground_truth": 0}, {"key": "37592684", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.999992773397112, "res": {"Yes": 0.999992773397112, "No": 7.166558533223613e-06}, "ground_truth": 0}, {"key": "38951040", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9999871708812939, "res": {"Yes": 0.9999871708812939, "No": 1.2789666727299518e-05}, "ground_truth": 0}, {"key": "38951040", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9992647350398522, "res": {"Yes": 0.9992647350398522, "No": 0.0007351791110024017}, "ground_truth": 0}, {"key": "38951040", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999124339341318, "res": {"Yes": 0.9999124339341318, "No": 8.745422887814154e-05}, "ground_truth": 1}, {"key": "38951040", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999919389784903, "res": {"Yes": 0.9999919389784903, "No": 7.934596943865043e-06}, "ground_truth": 0}, {"key": "38951040", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 4.3234325173497235e-07}, "ground_truth": 0}, {"key": "40774469", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.8146875365422516, "res": {"Yes": 0.8146875365422516, "No": 0.18531225720188557}, "ground_truth": 0}, {"key": "40774469", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.8966378976269402, "res": {"Yes": 0.8966378976269402, "No": 0.10336189553103026}, "ground_truth": 0}, {"key": "40774469", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9342073343527704, "res": {"Yes": 0.9342073343527704, "No": 0.0657923834756774}, "ground_truth": 1}, {"key": "40774469", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5172431528742116, "res": {"Yes": 0.5172431528742116, "No": 0.48275670914850916}, "ground_truth": 0}, {"key": "40774469", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9633236003863324, "res": {"Yes": 0.9633236003863324, "No": 0.03667635534794789}, "ground_truth": 0}, {"key": "40876288", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 1.7021147779319867e-05, "res": {"No": 0.9999828796125555, "Yes": 1.7021147779319867e-05}, "ground_truth": 0}, {"key": "40876288", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9534201009456554, "res": {"Yes": 0.9534201009456554, "No": 0.04657972119745276}, "ground_truth": 0}, {"key": "40876288", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8862945361880902, "res": {"Yes": 0.8862945361880902, "No": 0.1137052164739164}, "ground_truth": 1}, {"key": "40876288", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9987887123018901, "res": {"Yes": 0.9987887123018901, "No": 0.0012112739766395049}, "ground_truth": 0}, {"key": "40876288", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9957833063237324, "res": {"Yes": 0.9957833063237324, "No": 0.00421664833465001}, "ground_truth": 0}, {"key": "40340131", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9632882046742537, "res": {"Yes": 0.9632882046742537, "No": 0.03671168067635992}, "ground_truth": 0}, {"key": "40340131", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9988735135627427, "res": {"Yes": 0.9988735135627427, "No": 0.0011262407073965807}, "ground_truth": 0}, {"key": "40340131", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9732413093947239, "res": {"Yes": 0.9732413093947239, "No": 0.026758483920951752}, "ground_truth": 1}, {"key": "40340131", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998156683911591, "res": {"Yes": 0.9998156683911591, "No": 0.00018421205336349955}, "ground_truth": 0}, {"key": "40340131", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.6288577691683823, "res": {"Yes": 0.6288577691683823, "No": 0.37114182937804313}, "ground_truth": 0}, {"key": "30121591", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9531100753101017, "res": {"Yes": 0.9531100753101017, "No": 0.04688969736731991}, "ground_truth": 0}, {"key": "30121591", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.3132104343896804, "res": {"No": 0.686789457672628, "Yes": 0.3132104343896804}, "ground_truth": 0}, {"key": "30121591", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 1.4887154752024524e-07}, "ground_truth": 1}, {"key": "30121591", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999157713474321, "res": {"Yes": 0.9999157713474321, "No": 8.41405452122982e-05}, "ground_truth": 0}, {"key": "30121591", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9988389051928037, "res": {"Yes": 0.9988389051928037, "No": 0.0011610389826997131}, "ground_truth": 0}, {"key": "35623366", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.0530057436789933, "res": {"No": 0.9469939958459015, "Yes": 0.0530057436789933}, "ground_truth": 0}, {"key": "35623366", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9855871835408917, "res": {"Yes": 0.9855871835408917, "No": 0.014412693033482245}, "ground_truth": 0}, {"key": "35623366", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9245317591127521, "res": {"Yes": 0.9245317591127521, "No": 0.07546783801813177}, "ground_truth": 1}, {"key": "35623366", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8459162919135751, "res": {"Yes": 0.8459162919135751, "No": 0.15408336374676884}, "ground_truth": 0}, {"key": "35623366", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.23101586295897558, "res": {"No": 0.7689837200397514, "Yes": 0.23101586295897558}, "ground_truth": 0}, {"key": "41014093", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.997579239777246, "res": {"Yes": 0.997579239777246, "No": 0.0024207559779832587}, "ground_truth": 0}, {"key": "41014093", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 4.1135715892269534e-07}, "ground_truth": 0}, {"key": "41014093", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 1.5723866918802788e-07}, "ground_truth": 1}, {"key": "41014093", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 7.745225979854787e-07}, "ground_truth": 0}, {"key": "41014093", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "No": 2.9150551644189596e-08}, "ground_truth": 0}, {"key": "11387984", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 4.6673322275088225e-07, "res": {"No": 0.9999993295729247, "Yes": 4.6673322275088225e-07}, "ground_truth": 0}, {"key": "11387984", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9972196759799556, "res": {"Yes": 0.9972196759799556, "No": 0.0027802901116937635}, "ground_truth": 0}, {"key": "11387984", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997361956824798, "res": {"Yes": 0.9997361956824798, "No": 0.0002637789806417103}, "ground_truth": 1}, {"key": "11387984", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997927857718344, "res": {"Yes": 0.9997927857718344, "No": 0.00020717348995440607}, "ground_truth": 0}, {"key": "11387984", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9976332198512856, "res": {"Yes": 0.9976332198512856, "No": 0.002366748042592703}, "ground_truth": 0}, {"key": "39508312", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 4.774773150092974e-05, "res": {"No": 0.9999521260689932, "Yes": 4.774773150092974e-05}, "ground_truth": 0}, {"key": "39508312", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999877668918251, "res": {"Yes": 0.9999877668918251, "No": 1.2180356826639852e-05}, "ground_truth": 0}, {"key": "39508312", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995292809529897, "res": {"Yes": 0.9995292809529897, "No": 0.000470600744334967}, "ground_truth": 1}, {"key": "39508312", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999797803764193, "res": {"Yes": 0.9999797803764193, "No": 2.0080085604048634e-05}, "ground_truth": 0}, {"key": "39508312", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999334121670682, "res": {"Yes": 0.9999334121670682, "No": 6.647776680259717e-05}, "ground_truth": 0}, {"key": "35815369", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.0294223761672091, "res": {"No": 0.9705774182937911, "Yes": 0.0294223761672091}, "ground_truth": 0}, {"key": "35815369", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996679270084451, "res": {"Yes": 0.9996679270084451, "No": 0.0003320227723448229}, "ground_truth": 0}, {"key": "35815369", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999964686909351, "res": {"Yes": 0.9999964686909351, "No": 3.4734785575027915e-06}, "ground_truth": 1}, {"key": "35815369", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995652531498106, "res": {"Yes": 0.9995652531498106, "No": 0.00043472743911672284}, "ground_truth": 0}, {"key": "35815369", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.998280364969578, "res": {"Yes": 0.998280364969578, "No": 0.0017196549822381347}, "ground_truth": 0}, {"key": "35802823", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.3824926429569505, "res": {"No": 0.6175070213113207, "Yes": 0.3824926429569505}, "ground_truth": 0}, {"key": "35802823", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994411669653781, "res": {"Yes": 0.9994411669653781, "No": 0.0005587411226709131}, "ground_truth": 0}, {"key": "35802823", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9977826202267016, "res": {"Yes": 0.9977826202267016, "No": 0.002217342216697045}, "ground_truth": 1}, {"key": "35802823", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996243322408098, "res": {"Yes": 0.9996243322408098, "No": 0.00037551332551484995}, "ground_truth": 0}, {"key": "35802823", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999592779711644, "res": {"Yes": 0.9999592779711644, "No": 4.0591778981519167e-05}, "ground_truth": 0}, {"key": "38499968", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9821516242828318, "res": {"Yes": 0.9821516242828318, "No": 0.017848198503916682}, "ground_truth": 0}, {"key": "38499968", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.7880282338557094, "res": {"Yes": 0.7880282338557094, "No": 0.2119711708741093}, "ground_truth": 0}, {"key": "38499968", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999961110815618, "res": {"Yes": 0.9999961110815618, "No": 3.8351193591186875e-06}, "ground_truth": 1}, {"key": "38499968", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998234115535755, "res": {"Yes": 0.9998234115535755, "No": 0.00017654972380360897}, "ground_truth": 0}, {"key": "38499968", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9981133596475252, "res": {"Yes": 0.9981133596475252, "No": 0.0018865987015452077}, "ground_truth": 0}, {"key": "36926726", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9997259469394023, "res": {"Yes": 0.9997259469394023, "No": 0.00027396724568978174}, "ground_truth": 0}, {"key": "36926726", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999803763825457, "res": {"Yes": 0.9999803763825457, "No": 1.9493801786975908e-05}, "ground_truth": 0}, {"key": "36926726", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9976583728961608, "res": {"Yes": 0.9976583728961608, "No": 0.0023416347088260152}, "ground_truth": 1}, {"key": "36926726", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999981375378344, "res": {"Yes": 0.9999981375378344, "No": 1.7323373360778898e-06}, "ground_truth": 0}, {"key": "36926726", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9980319082374088, "res": {"Yes": 0.9980319082374088, "No": 0.0019680456194616203}, "ground_truth": 0}, {"key": "40903712", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.0005064347843594186, "res": {"No": 0.9994934438129291, "Yes": 0.0005064347843594186}, "ground_truth": 0}, {"key": "40903712", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997507311526429, "res": {"Yes": 0.9997507311526429, "No": 0.00024914138783981195}, "ground_truth": 0}, {"key": "40903712", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999953958625991, "res": {"Yes": 0.9999953958625991, "No": 4.509537317803059e-06}, "ground_truth": 1}, {"key": "40903712", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 6.767269289381582e-07}, "ground_truth": 0}, {"key": "40903712", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999211350800014, "res": {"Yes": 0.9999211350800014, "No": 7.879531635391143e-05}, "ground_truth": 0}, {"key": "19614862", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.01433902410745305, "res": {"No": 0.9856607283363898, "Yes": 0.01433902410745305}, "ground_truth": 0}, {"key": "19614862", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9992717483040796, "res": {"Yes": 0.9992717483040796, "No": 0.000728225867742183}, "ground_truth": 0}, {"key": "19614862", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999368655381937, "res": {"Yes": 0.999368655381937, "No": 0.0006312675978380718}, "ground_truth": 1}, {"key": "19614862", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9980040056513704, "res": {"Yes": 0.9980040056513704, "No": 0.0019959263511401476}, "ground_truth": 0}, {"key": "19614862", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999406831293152, "res": {"Yes": 0.9999406831293152, "No": 5.917845607265882e-05}, "ground_truth": 0}, {"key": "38861704", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 6.066002030205468e-08, "res": {"No": 0.9999996871837189, "Yes": 6.066002030205468e-08}, "ground_truth": 0}, {"key": "38861704", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9728708688921731, "res": {"Yes": 0.9728708688921731, "No": 0.027129027469637065}, "ground_truth": 0}, {"key": "38861704", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999484309397297, "res": {"Yes": 0.9999484309397297, "No": 5.152687715114209e-05}, "ground_truth": 1}, {"key": "38861704", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9989955617861087, "res": {"Yes": 0.9989955617861087, "No": 0.0010043662835417626}, "ground_truth": 0}, {"key": "38861704", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999783499623655, "res": {"Yes": 0.9999783499623655, "No": 2.155747357310663e-05}, "ground_truth": 0}, {"key": "34349607", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.011118784541007894, "res": {"No": 0.9888810046062431, "Yes": 0.011118784541007894}, "ground_truth": 0}, {"key": "34349607", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999926541946805, "res": {"Yes": 0.9999926541946805, "No": 7.263105977677358e-06}, "ground_truth": 0}, {"key": "34349607", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999824028078323, "res": {"Yes": 0.9999824028078323, "No": 1.7543167501665852e-05}, "ground_truth": 1}, {"key": "34349607", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999757275415809, "res": {"Yes": 0.9999757275415809, "No": 2.4243767110229774e-05}, "ground_truth": 0}, {"key": "34349607", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999480733442354, "res": {"Yes": 0.9999480733442354, "No": 5.184209986943432e-05}, "ground_truth": 0}, {"key": "20773800", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9820005554465302, "res": {"Yes": 0.9820005554465302, "No": 0.017999355991068538}, "ground_truth": 0}, {"key": "20773800", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998085175151116, "res": {"Yes": 0.9998085175151116, "No": 0.00019139523016958816}, "ground_truth": 0}, {"key": "20773800", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999943230348141, "res": {"Yes": 0.9999943230348141, "No": 5.587061946134744e-06}, "ground_truth": 1}, {"key": "20773800", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999934961715081, "res": {"Yes": 0.999934961715081, "No": 6.496544791115439e-05}, "ground_truth": 0}, {"key": "20773800", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999679795226665, "res": {"Yes": 0.9999679795226665, "No": 3.198703159378133e-05}, "ground_truth": 0}, {"key": "35545608", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9610787024069272, "res": {"Yes": 0.9610787024069272, "No": 0.03892123462675794}, "ground_truth": 0}, {"key": "35545608", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999903893441826, "res": {"Yes": 0.9999903893441826, "No": 9.575201296360091e-06}, "ground_truth": 0}, {"key": "35545608", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999820452021894, "res": {"Yes": 0.9999820452021894, "No": 1.7879477700982703e-05}, "ground_truth": 1}, {"key": "35545608", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999937270200753, "res": {"Yes": 0.9999937270200753, "No": 6.235810470690111e-06}, "ground_truth": 0}, {"key": "35545608", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 1.7505353421565827e-07}, "ground_truth": 0}, {"key": "37258984", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9980416452838102, "res": {"Yes": 0.9980416452838102, "No": 0.0019582689201485715}, "ground_truth": 0}, {"key": "37258984", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999938462231346, "res": {"Yes": 0.9999938462231346, "No": 6.1190328804361415e-06}, "ground_truth": 0}, {"key": "37258984", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999570131939592, "res": {"Yes": 0.9999570131939592, "No": 4.286894015328377e-05}, "ground_truth": 1}, {"key": "37258984", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999983759447187, "res": {"Yes": 0.9999983759447187, "No": 1.5860054475254227e-06}, "ground_truth": 0}, {"key": "37258984", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999651187283657, "res": {"Yes": 0.9999651187283657, "No": 3.4719641280789285e-05}, "ground_truth": 0}, {"key": "37274562", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9982093306648331, "res": {"Yes": 0.9982093306648331, "No": 0.0017905830247702745}, "ground_truth": 0}, {"key": "37274562", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999970647075079, "res": {"Yes": 0.9999970647075079, "No": 2.8758392881618242e-06}, "ground_truth": 0}, {"key": "37274562", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998971773206141, "res": {"Yes": 0.9998971773206141, "No": 0.00010269665624819138}, "ground_truth": 1}, {"key": "37274562", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998963429787603, "res": {"Yes": 0.9998963429787603, "No": 0.00010352535493945034}, "ground_truth": 0}, {"key": "37274562", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999961110815618, "res": {"Yes": 0.9999961110815618, "No": 3.7936801278817717e-06}, "ground_truth": 0}, {"key": "40828068", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9975427061017382, "res": {"Yes": 0.9975427061017382, "No": 0.00245721176675916}, "ground_truth": 0}, {"key": "40828068", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9254674756959299, "res": {"Yes": 0.9254674756959299, "No": 0.07453177756829597}, "ground_truth": 0}, {"key": "40828068", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9990975228776562, "res": {"Yes": 0.9990975228776562, "No": 0.0009023879776038468}, "ground_truth": 1}, {"key": "40828068", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9989063406817488, "res": {"Yes": 0.9989063406817488, "No": 0.0010935642098513021}, "ground_truth": 0}, {"key": "40828068", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9987151161709921, "res": {"Yes": 0.9987151161709921, "No": 0.0012848661878835912}, "ground_truth": 0}, {"key": "37807180", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9861595480813106, "res": {"Yes": 0.9861595480813106, "No": 0.013840329794419053}, "ground_truth": 0}, {"key": "37807180", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.937552915059752, "res": {"Yes": 0.937552915059752, "No": 0.06244659921613896}, "ground_truth": 0}, {"key": "37807180", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5527733592910119, "res": {"Yes": 0.5527733592910119, "No": 0.44722623224291264}, "ground_truth": 1}, {"key": "37807180", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.38033410076620866, "res": {"No": 0.6196648571985233, "Yes": 0.38033410076620866}, "ground_truth": 0}, {"key": "37807180", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.0019670034217474236, "res": {"No": 0.9980325030646034, "Yes": 0.0019670034217474236}, "ground_truth": 0}, {"key": "40748607", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9987916886966877, "res": {"Yes": 0.9987916886966877, "No": 0.0012082406327629303}, "ground_truth": 0}, {"key": "40748607", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9922635039386298, "res": {"Yes": 0.9922635039386298, "No": 0.007736448088526431}, "ground_truth": 0}, {"key": "40748607", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999819260003368, "res": {"Yes": 0.9999819260003368, "No": 1.8033110817437893e-05}, "ground_truth": 1}, {"key": "40748607", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997926665865435, "res": {"Yes": 0.9997926665865435, "No": 0.0002072683971448928}, "ground_truth": 0}, {"key": "40748607", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9993424658703745, "res": {"Yes": 0.9993424658703745, "No": 0.0006575123571134917}, "ground_truth": 0}, {"key": "40123819", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.025237986970921653, "res": {"No": 0.9747618404765784, "Yes": 0.025237986970921653}, "ground_truth": 0}, {"key": "40123819", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9987940662232532, "res": {"Yes": 0.9987940662232532, "No": 0.0012058632240532995}, "ground_truth": 0}, {"key": "40123819", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9864495788265408, "res": {"Yes": 0.9864495788265408, "No": 0.013550284981941399}, "ground_truth": 1}, {"key": "40123819", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9327557391172874, "res": {"Yes": 0.9327557391172874, "No": 0.06724408632866415}, "ground_truth": 0}, {"key": "40123819", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999403255405914, "res": {"Yes": 0.9999403255405914, "No": 5.959594637787834e-05}, "ground_truth": 0}, {"key": "38453867", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.00022770195478888673, "res": {"No": 0.9997721716868126, "Yes": 0.00022770195478888673}, "ground_truth": 0}, {"key": "38453867", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9979261246573183, "res": {"Yes": 0.9979261246573183, "No": 0.002073902875142775}, "ground_truth": 0}, {"key": "38453867", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9972431543975012, "res": {"Yes": 0.9972431543975012, "No": 0.002756771583577526}, "ground_truth": 1}, {"key": "38453867", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, "No": 1.0859124029204404e-06}, "ground_truth": 0}, {"key": "38453867", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999956342685299, "res": {"Yes": 0.9999956342685299, "No": 4.250159696869072e-06}, "ground_truth": 0}, {"key": "38944856", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.17311120284057752, "res": {"No": 0.8268886800265298, "Yes": 0.17311120284057752}, "ground_truth": 0}, {"key": "38944856", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9838102754717006, "res": {"Yes": 0.9838102754717006, "No": 0.016189674767093848}, "ground_truth": 0}, {"key": "38944856", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.08496429241409234, "res": {"No": 0.9150355847734896, "Yes": 0.08496429241409234}, "ground_truth": 1}, {"key": "38944856", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9054382303988586, "res": {"Yes": 0.9054382303988586, "No": 0.09456168557769339}, "ground_truth": 0}, {"key": "38944856", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 5.44263035402767e-05, "res": {"No": 0.9999448550185404, "Yes": 5.44263035402767e-05}, "ground_truth": 0}, {"key": "35778898", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9936925897531571, "res": {"Yes": 0.9936925897531571, "No": 0.0063072847756819785}, "ground_truth": 0}, {"key": "35778898", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9887748227817308, "res": {"Yes": 0.9887748227817308, "No": 0.011224981426611986}, "ground_truth": 0}, {"key": "35778898", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9060340960844409, "res": {"Yes": 0.9060340960844409, "No": 0.09396529190024268}, "ground_truth": 1}, {"key": "35778898", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9018304750379477, "res": {"Yes": 0.9018304750379477, "No": 0.0981690781171211}, "ground_truth": 0}, {"key": "35778898", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9962442841094431, "res": {"Yes": 0.9962442841094431, "No": 0.003755638380850322}, "ground_truth": 0}, {"key": "32530125", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9950936974158108, "res": {"Yes": 0.9950936974158108, "No": 0.004906261659390984}, "ground_truth": 0}, {"key": "32530125", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999957534720165, "res": {"Yes": 0.9999957534720165, "No": 4.194912820605094e-06}, "ground_truth": 0}, {"key": "32530125", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999570131939592, "res": {"Yes": 0.9999570131939592, "No": 4.2899731104942724e-05}, "ground_truth": 1}, {"key": "32530125", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998734658461215, "res": {"Yes": 0.9998734658461215, "No": 0.00012646689293236243}, "ground_truth": 0}, {"key": "32530125", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999968263007362, "res": {"Yes": 0.9999968263007362, "No": 3.1262354778352398e-06}, "ground_truth": 0}, {"key": "35010363", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 3.2364707614836196e-05, "res": {"No": 0.9999675027220479, "Yes": 3.2364707614836196e-05}, "ground_truth": 0}, {"key": "35010363", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999652379302147, "res": {"Yes": 0.9999652379302147, "No": 3.473638018163765e-05}, "ground_truth": 0}, {"key": "35010363", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 6.385403298746666e-07}, "ground_truth": 1}, {"key": "35010363", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9990803878626551, "res": {"Yes": 0.9990803878626551, "No": 0.0009196227742451998}, "ground_truth": 0}, {"key": "35010363", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999980183344636, "res": {"Yes": 0.9999980183344636, "No": 1.953337714749812e-06}, "ground_truth": 0}, {"key": "27514800", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9679461475010189, "res": {"Yes": 0.9679461475010189, "No": 0.03205379800949472}, "ground_truth": 0}, {"key": "27514800", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.017494722761774373, "res": {"No": 0.9825046592605908, "Yes": 0.017494722761774373}, "ground_truth": 0}, {"key": "27514800", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999837713579866, "res": {"Yes": 0.999837713579866, "No": 0.00016219536378735683}, "ground_truth": 1}, {"key": "27514800", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9977651729527915, "res": {"Yes": 0.9977651729527915, "No": 0.0022348302602444837}, "ground_truth": 0}, {"key": "27514800", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999033752944676, "res": {"Yes": 0.9999033752944676, "No": 9.656762672343762e-05}, "ground_truth": 0}, {"key": "25725840", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9924143578141027, "res": {"Yes": 0.9924143578141027, "No": 0.007585361556604691}, "ground_truth": 0}, {"key": "25725840", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999981375378344, "res": {"Yes": 0.9999981375378344, "No": 1.8036286601522169e-06}, "ground_truth": 0}, {"key": "25725840", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999888397127765, "res": {"Yes": 0.9999888397127765, "No": 1.1047449180970782e-05}, "ground_truth": 1}, {"key": "25725840", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999978991308068, "res": {"Yes": 0.9999978991308068, "No": 2.0710206594688063e-06}, "ground_truth": 0}, {"key": "25725840", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999914621674475, "res": {"Yes": 0.9999914621674475, "No": 8.511145653984805e-06}, "ground_truth": 0}, {"key": "38327225", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.999736434039612, "res": {"Yes": 0.999736434039612, "No": 0.0002634281243651251}, "ground_truth": 0}, {"key": "38327225", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998553493529139, "res": {"Yes": 0.9998553493529139, "No": 0.00014451257933201342}, "ground_truth": 0}, {"key": "38327225", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9989106200056785, "res": {"Yes": 0.9989106200056785, "No": 0.0010893385519632798}, "ground_truth": 1}, {"key": "38327225", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999820452021894, "res": {"Yes": 0.9999820452021894, "No": 1.7845057122331712e-05}, "ground_truth": 0}, {"key": "38327225", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999416367032035, "res": {"Yes": 0.9999416367032035, "No": 5.801818860158108e-05}, "ground_truth": 0}, {"key": "11991724", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9558903491986002, "res": {"Yes": 0.9558903491986002, "No": 0.044109425023955345}, "ground_truth": 0}, {"key": "11991724", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999957534720165, "res": {"Yes": 0.9999957534720165, "No": 4.153068912611671e-06}, "ground_truth": 0}, {"key": "11991724", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996552993429273, "res": {"Yes": 0.9996552993429273, "No": 0.0003446445368695292}, "ground_truth": 1}, {"key": "11991724", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9984477772458319, "res": {"Yes": 0.9984477772458319, "No": 0.0015521798208237322}, "ground_truth": 0}, {"key": "11991724", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9994841543941172, "res": {"Yes": 0.9994841543941172, "No": 0.0005158081829827934}, "ground_truth": 0}, {"key": "32217545", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.0003097955482002566, "res": {"No": 0.9996900881026163, "Yes": 0.0003097955482002566}, "ground_truth": 0}, {"key": "32217545", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999726283256111, "res": {"Yes": 0.9999726283256111, "No": 2.7292134795823723e-05}, "ground_truth": 0}, {"key": "32217545", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999827604126034, "res": {"Yes": 0.9999827604126034, "No": 1.7117640808298693e-05}, "ground_truth": 1}, {"key": "32217545", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999495037169802, "res": {"Yes": 0.9999495037169802, "No": 5.042320863952265e-05}, "ground_truth": 0}, {"key": "32217545", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997127227020275, "res": {"Yes": 0.9997127227020275, "No": 0.0002871869961053522}, "ground_truth": 0}, {"key": "12731847", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.1211327533105497, "res": {"No": 0.8788671503303085, "Yes": 0.1211327533105497}, "ground_truth": 0}, {"key": "12731847", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995444054947095, "res": {"Yes": 0.9995444054947095, "No": 0.0004555217922171502}, "ground_truth": 0}, {"key": "12731847", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995177273508576, "res": {"Yes": 0.9995177273508576, "No": 0.0004822091581341018}, "ground_truth": 1}, {"key": "12731847", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998654802485932, "res": {"Yes": 0.9998654802485932, "No": 0.0001344863299697903}, "ground_truth": 0}, {"key": "12731847", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.45211621019214515, "res": {"No": 0.5478835032192092, "Yes": 0.45211621019214515}, "ground_truth": 0}, {"key": "36827234", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 1.5654095217022377e-05, "res": {"No": 0.9999841908319662, "Yes": 1.5654095217022377e-05}, "ground_truth": 0}, {"key": "36827234", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.6808270430860952, "res": {"Yes": 0.6808270430860952, "No": 0.3191726819283965}, "ground_truth": 0}, {"key": "36827234", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2964265839921271, "res": {"No": 0.7035731343340621, "Yes": 0.2964265839921271}, "ground_truth": 1}, {"key": "36827234", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996635216215513, "res": {"Yes": 0.9996635216215513, "No": 0.00033646066843674884}, "ground_truth": 0}, {"key": "36827234", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.21049790045393763, "res": {"No": 0.7895015200151079, "Yes": 0.21049790045393763}, "ground_truth": 0}, {"key": "29111539", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9870241092979752, "res": {"Yes": 0.9870241092979752, "No": 0.012975457375905337}, "ground_truth": 0}, {"key": "29111539", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.2382177041728124, "res": {"No": 0.7617820940539302, "Yes": 0.2382177041728124}, "ground_truth": 0}, {"key": "29111539", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.17559927965184233, "res": {"No": 0.8244002720238777, "Yes": 0.17559927965184233}, "ground_truth": 1}, {"key": "29111539", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5235982432968779, "res": {"Yes": 0.5235982432968779, "No": 0.47640166811799306}, "ground_truth": 0}, {"key": "29111539", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.6828049080262718, "res": {"Yes": 0.6828049080262718, "No": 0.3171947628242771}, "ground_truth": 0}, {"key": "37763052", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9295723196055453, "res": {"Yes": 0.9295723196055453, "No": 0.07042761036647403}, "ground_truth": 0}, {"key": "37763052", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996697144562833, "res": {"Yes": 0.9996697144562833, "No": 0.0003302208119989637}, "ground_truth": 0}, {"key": "37763052", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999962302846054, "res": {"Yes": 0.9999962302846054, "No": 3.6626816045145038e-06}, "ground_truth": 1}, {"key": "37763052", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.004568031092784478, "res": {"No": 0.9954318851213466, "Yes": 0.004568031092784478}, "ground_truth": 0}, {"key": "37763052", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999900317366834, "res": {"Yes": 0.9999900317366834, "No": 9.883192212088673e-06}, "ground_truth": 0}, {"key": "30682335", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.0007486032657700913, "res": {"No": 0.9992512860262887, "Yes": 0.0007486032657700913}, "ground_truth": 0}, {"key": "30682335", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999100500717374, "res": {"Yes": 0.9999100500717374, "No": 8.986850603995861e-05}, "ground_truth": 0}, {"key": "30682335", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998642883696566, "res": {"Yes": 0.9998642883696566, "No": 0.00013568995736707872}, "ground_truth": 1}, {"key": "30682335", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "No": 1.0025508057474512e-06}, "ground_truth": 0}, {"key": "30682335", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999167249015635, "res": {"Yes": 0.9999167249015635, "No": 8.319336174824384e-05}, "ground_truth": 0}, {"key": "12261276", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.0014328970141858671, "res": {"No": 0.9985671069729322, "Yes": 0.0014328970141858671}, "ground_truth": 0}, {"key": "12261276", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999505764963816, "res": {"Yes": 0.9999505764963816, "No": 4.93744734057432e-05}, "ground_truth": 0}, {"key": "12261276", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999458085984071, "res": {"Yes": 0.9999458085984071, "No": 5.412328814217444e-05}, "ground_truth": 1}, {"key": "12261276", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "No": 3.4564627652243086e-08}, "ground_truth": 0}, {"key": "12261276", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999906277489198, "res": {"Yes": 0.9999906277489198, "No": 9.284501995292754e-06}, "ground_truth": 0}, {"key": "36912979", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.00034533638050351787, "res": {"No": 0.9996543460520952, "Yes": 0.00034533638050351787}, "ground_truth": 0}, {"key": "36912979", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999921773835968, "res": {"Yes": 0.9999921773835968, "No": 7.724109630584207e-06}, "ground_truth": 0}, {"key": "36912979", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999511724841019, "res": {"Yes": 0.9999511724841019, "No": 4.87100345912153e-05}, "ground_truth": 1}, {"key": "36912979", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999567993691609, "res": {"Yes": 0.999567993691609, "No": 0.0004318839673047561}, "ground_truth": 0}, {"key": "36912979", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 1.8885826950480372e-07}, "ground_truth": 0}, {"key": "30205259", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9981520815077223, "res": {"Yes": 0.9981520815077223, "No": 0.0018478730574095286}, "ground_truth": 0}, {"key": "30205259", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998372368473594, "res": {"Yes": 0.9998372368473594, "No": 0.00016268902523654355}, "ground_truth": 0}, {"key": "30205259", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999496229129681, "res": {"Yes": 0.9999496229129681, "No": 5.029969723809778e-05}, "ground_truth": 1}, {"key": "30205259", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998932440088264, "res": {"Yes": 0.9998932440088264, "No": 0.00010664713254500222}, "ground_truth": 0}, {"key": "30205259", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9860187217339264, "res": {"Yes": 0.9860187217339264, "No": 0.013981084081657567}, "ground_truth": 0}, {"key": "39458032", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9998813249924359, "res": {"Yes": 0.9998813249924359, "No": 0.00011857522365645045}, "ground_truth": 0}, {"key": "39458032", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 9.249419048203565e-08, "res": {"No": 0.9999998063873687, "Yes": 9.249419048203565e-08}, "ground_truth": 0}, {"key": "39458032", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999741779333953, "res": {"Yes": 0.9999741779333953, "No": 2.5683366333002932e-05}, "ground_truth": 1}, {"key": "39458032", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9986091870780064, "res": {"Yes": 0.9986091870780064, "No": 0.0013908099551463826}, "ground_truth": 0}, {"key": "39458032", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9963613059500318, "res": {"Yes": 0.9963613059500318, "No": 0.00363861450604125}, "ground_truth": 0}, {"key": "35116452", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9864966848915244, "res": {"Yes": 0.9864966848915244, "No": 0.013503207893832006}, "ground_truth": 0}, {"key": "35116452", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9970312317338057, "res": {"Yes": 0.9970312317338057, "No": 0.0029687226584767085}, "ground_truth": 0}, {"key": "35116452", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999953958625991, "res": {"Yes": 0.9999953958625991, "No": 4.485825596082301e-06}, "ground_truth": 1}, {"key": "35116452", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999866940725246, "res": {"Yes": 0.9999866940725246, "No": 1.3188688863560634e-05}, "ground_truth": 0}, {"key": "35116452", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9995348808814496, "res": {"Yes": 0.9995348808814496, "No": 0.0004649749615294039}, "ground_truth": 0}, {"key": "40107476", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.958650860118169, "res": {"Yes": 0.958650860118169, "No": 0.04134901825979378}, "ground_truth": 0}, {"key": "40107476", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.63624281748899, "res": {"Yes": 0.63624281748899, "No": 0.3637558784280772}, "ground_truth": 0}, {"key": "40107476", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9968116924816877, "res": {"Yes": 0.9968116924816877, "No": 0.003188235340642777}, "ground_truth": 1}, {"key": "40107476", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.990141840384953, "res": {"Yes": 0.990141840384953, "No": 0.009858064774163063}, "ground_truth": 0}, {"key": "40107476", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.6861572020510527, "res": {"Yes": 0.6861572020510527, "No": 0.31384259768980144}, "ground_truth": 0}, {"key": "39501049", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.570108947758134, "res": {"Yes": 0.570108947758134, "No": 0.42988632315537184}, "ground_truth": 0}, {"key": "39501049", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9876130275284978, "res": {"Yes": 0.9876130275284978, "No": 0.012386811018062199}, "ground_truth": 0}, {"key": "39501049", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7624473668233267, "res": {"Yes": 0.7624473668233267, "No": 0.23755238771000392}, "ground_truth": 1}, {"key": "39501049", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9541735861059022, "res": {"Yes": 0.9541735861059022, "No": 0.04582628702494622}, "ground_truth": 0}, {"key": "39501049", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.897708081328815, "res": {"Yes": 0.897708081328815, "No": 0.10229172488102023}, "ground_truth": 0}, {"key": "39642178", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.00035536906102017786, "res": {"No": 0.9996441017867995, "Yes": 0.00035536906102017786}, "ground_truth": 0}, {"key": "39642178", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.8720122648300459, "res": {"Yes": 0.8720122648300459, "No": 0.12798775291394274}, "ground_truth": 0}, {"key": "39642178", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998993227682926, "res": {"Yes": 0.9998993227682926, "No": 0.00010060606639294356}, "ground_truth": 1}, {"key": "39642178", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.45274157087794364, "res": {"No": 0.5472575754038793, "Yes": 0.45274157087794364}, "ground_truth": 0}, {"key": "39642178", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9885866194975844, "res": {"Yes": 0.9885866194975844, "No": 0.011413155473631986}, "ground_truth": 0}, {"key": "38024796", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9997597774080685, "res": {"Yes": 0.9997597774080685, "No": 0.00024016628989176786}, "ground_truth": 0}, {"key": "38024796", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 1.0, "res": {"Yes": 1.0, "\"Yes": 1.3820539247667451e-08}, "ground_truth": 0}, {"key": "38024796", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "\"Yes": 1.8005780350829554e-08}, "ground_truth": 1}, {"key": "38024796", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "\"Yes": 3.88122504460047e-09}, "ground_truth": 0}, {"key": "38024796", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "No": 2.4009626088296658e-08}, "ground_truth": 0}, {"key": "36652079", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9973190362195412, "res": {"Yes": 0.9973190362195412, "No": 0.0026809904013087955}, "ground_truth": 0}, {"key": "36652079", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.999462600850677, "res": {"Yes": 0.999462600850677, "No": 0.0005373148083153691}, "ground_truth": 0}, {"key": "36652079", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999680987188567, "res": {"Yes": 0.9999680987188567, "No": 3.1825681492596114e-05}, "ground_truth": 1}, {"key": "36652079", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9969513675463925, "res": {"Yes": 0.9969513675463925, "No": 0.0030486573451215682}, "ground_truth": 0}, {"key": "36652079", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.05617502589166702, "res": {"No": 0.9438248059286621, "Yes": 0.05617502589166702}, "ground_truth": 0}, {"key": "32193402", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9947621053605857, "res": {"Yes": 0.9947621053605857, "No": 0.0052378247022458494}, "ground_truth": 0}, {"key": "32193402", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 1.6577340685876044e-07}, "ground_truth": 0}, {"key": "32193402", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "\"Yes": 5.589980713300145e-09}, "ground_truth": 1}, {"key": "32193402", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "\"Yes": 8.231364830335155e-09}, "ground_truth": 0}, {"key": "32193402", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 1.479043582674791e-07}, "ground_truth": 0}, {"key": "32589706", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9764342971047447, "res": {"Yes": 0.9764342971047447, "No": 0.02356547174318263}, "ground_truth": 0}, {"key": "32589706", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999117187730169, "res": {"Yes": 0.9999117187730169, "No": 8.819864128519033e-05}, "ground_truth": 0}, {"key": "32589706", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999888238005837, "res": {"Yes": 0.999888238005837, "No": 0.00011170422813603562}, "ground_truth": 1}, {"key": "32589706", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999200623291858, "res": {"Yes": 0.9999200623291858, "No": 7.992011377610913e-05}, "ground_truth": 0}, {"key": "32589706", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998568987599593, "res": {"Yes": 0.9998568987599593, "No": 0.0001430238726541727}, "ground_truth": 0}, {"key": "38590589", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.0019050405205495592, "res": {"No": 0.9980949583996291, "Yes": 0.0019050405205495592}, "ground_truth": 0}, {"key": "38590589", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9862981102728956, "res": {"Yes": 0.9862981102728956, "No": 0.013701842036052937}, "ground_truth": 0}, {"key": "38590589", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9990555122096378, "res": {"Yes": 0.9990555122096378, "No": 0.0009444214879443081}, "ground_truth": 1}, {"key": "38590589", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997371490813575, "res": {"Yes": 0.9997371490813575, "No": 0.0002627285994130153}, "ground_truth": 0}, {"key": "38590589", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9897921711725762, "res": {"Yes": 0.9897921711725762, "No": 0.010207744317094144}, "ground_truth": 0}, {"key": "37045414", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.7161685891722539, "res": {"Yes": 0.7161685891722539, "No": 0.28382977622785616}, "ground_truth": 0}, {"key": "37045414", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9990354080182265, "res": {"Yes": 0.9990354080182265, "No": 0.0009645142029636074}, "ground_truth": 0}, {"key": "37045414", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999112420092235, "res": {"Yes": 0.9999112420092235, "No": 8.862832997960793e-05}, "ground_truth": 1}, {"key": "37045414", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9993950839355403, "res": {"Yes": 0.9993950839355403, "No": 0.0006048324986513643}, "ground_truth": 0}, {"key": "37045414", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9773112855711399, "res": {"Yes": 0.9773112855711399, "No": 0.022688320552676826}, "ground_truth": 0}, {"key": "33310095", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 1.6258007521601125e-05, "res": {"No": 0.9999835948245663, "Yes": 1.6258007521601125e-05}, "ground_truth": 0}, {"key": "33310095", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 3.3662742129194334e-06, "res": {"No": 0.9999963494876631, "Yes": 3.3662742129194334e-06}, "ground_truth": 0}, {"key": "33310095", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.0002228751073769595, "res": {"No": 0.9997769387718559, "Yes": 0.0002228751073769595}, "ground_truth": 1}, {"key": "33310095", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.005348753691886358, "res": {"No": 0.994651226900687, "Yes": 0.005348753691886358}, "ground_truth": 0}, {"key": "33310095", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.0030472662553800807, "res": {"No": 0.9969526712605483, "Yes": 0.0030472662553800807}, "ground_truth": 0}, {"key": "37934604", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 7.308883053076264e-05, "res": {"No": 0.9999268564151225, "Yes": 7.308883053076264e-05}, "ground_truth": 0}, {"key": "37934604", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999977799274644, "res": {"Yes": 0.9999977799274644, "No": 2.1230963338267865e-06}, "ground_truth": 0}, {"key": "37934604", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "No": 9.453193783902208e-07}, "ground_truth": 1}, {"key": "37934604", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999984951481323, "res": {"Yes": 0.9999984951481323, "No": 1.4421810099042504e-06}, "ground_truth": 0}, {"key": "37934604", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998006516225288, "res": {"Yes": 0.9998006516225288, "No": 0.00019931740577760188}, "ground_truth": 0}, {"key": "39012181", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9929420496519297, "res": {"Yes": 0.9929420496519297, "No": 0.007057302377498953}, "ground_truth": 0}, {"key": "39012181", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997522803875712, "res": {"Yes": 0.9997522803875712, "No": 0.0002475952998918998}, "ground_truth": 0}, {"key": "39012181", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995136763936793, "res": {"Yes": 0.9995136763936793, "No": 0.0004861987825967148}, "ground_truth": 1}, {"key": "39012181", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9488357369770636, "res": {"Yes": 0.9488357369770636, "No": 0.05116181121227001}, "ground_truth": 0}, {"key": "39012181", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9989605845668361, "res": {"Yes": 0.9989605845668361, "No": 0.0010391055333320378}, "ground_truth": 0}, {"key": "40221674", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 4.111329921187437e-06, "res": {"No": 0.9999957534720165, "Yes": 4.111329921187437e-06}, "ground_truth": 0}, {"key": "40221674", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9979799036466708, "res": {"Yes": 0.9979799036466708, "No": 0.0020201053812381456}, "ground_truth": 0}, {"key": "40221674", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998916945254614, "res": {"Yes": 0.9998916945254614, "No": 0.00010819634198775104}, "ground_truth": 1}, {"key": "40221674", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9976707189949168, "res": {"Yes": 0.9976707189949168, "No": 0.0023292885532119473}, "ground_truth": 0}, {"key": "40221674", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9906521325169629, "res": {"Yes": 0.9906521325169629, "No": 0.009347755668025204}, "ground_truth": 0}, {"key": "36884862", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.6322268179874265, "res": {"Yes": 0.6322268179874265, "No": 0.367772992265067}, "ground_truth": 0}, {"key": "36884862", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999100500717374, "res": {"Yes": 0.9999100500717374, "No": 8.989106586611443e-05}, "ground_truth": 0}, {"key": "36884862", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997703840457793, "res": {"Yes": 0.9997703840457793, "No": 0.000229574950518486}, "ground_truth": 1}, {"key": "36884862", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9927233701072345, "res": {"Yes": 0.9927233701072345, "No": 0.007276611343023524}, "ground_truth": 0}, {"key": "36884862", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9867716953573624, "res": {"Yes": 0.9867716953573624, "No": 0.013228244506794055}, "ground_truth": 0}, {"key": "39054429", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.1740079832626975, "res": {"No": 0.8259918106315143, "Yes": 0.1740079832626975}, "ground_truth": 0}, {"key": "39054429", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9943016578933335, "res": {"Yes": 0.9943016578933335, "No": 0.005698300537006312}, "ground_truth": 0}, {"key": "39054429", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9980038865894997, "res": {"Yes": 0.9980038865894997, "No": 0.0019960401221846068}, "ground_truth": 1}, {"key": "39054429", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.998119416517203, "res": {"Yes": 0.998119416517203, "No": 0.001880581013786354}, "ground_truth": 0}, {"key": "39054429", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999965878943212, "res": {"Yes": 0.9999965878943212, "No": 3.389353034721762e-06}, "ground_truth": 0}, {"key": "36753964", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.0007514822263117759, "res": {"No": 0.9992483081919458, "Yes": 0.0007514822263117759}, "ground_truth": 0}, {"key": "36753964", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997445341369622, "res": {"Yes": 0.9997445341369622, "No": 0.0002554177801279032}, "ground_truth": 0}, {"key": "36753964", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9983110121480665, "res": {"Yes": 0.9983110121480665, "No": 0.0016889561560823555}, "ground_truth": 1}, {"key": "36753964", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9993173454147387, "res": {"Yes": 0.9993173454147387, "No": 0.0006826554440418053}, "ground_truth": 0}, {"key": "36753964", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996078921243943, "res": {"Yes": 0.9996078921243943, "No": 0.00039207923833161766}, "ground_truth": 0}, {"key": "37612459", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.0031631402336338133, "res": {"No": 0.9968365579604428, "Yes": 0.0031631402336338133}, "ground_truth": 0}, {"key": "37612459", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9950193353497914, "res": {"Yes": 0.9950193353497914, "No": 0.004980549762311972}, "ground_truth": 0}, {"key": "37612459", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9974006158648827, "res": {"Yes": 0.9974006158648827, "No": 0.002599353028790145}, "ground_truth": 1}, {"key": "37612459", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998300858170501, "res": {"Yes": 0.9998300858170501, "No": 0.00016974589066118893}, "ground_truth": 0}, {"key": "37612459", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9924521710101222, "res": {"Yes": 0.9924521710101222, "No": 0.007547578467110695}, "ground_truth": 0}, {"key": "36805789", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.4491937544195568, "res": {"No": 0.5508047601044713, "Yes": 0.4491937544195568}, "ground_truth": 0}, {"key": "36805789", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.1064388113776015, "res": {"No": 0.8935569752923795, "Yes": 0.1064388113776015}, "ground_truth": 0}, {"key": "36805789", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.31893524198086837, "res": {"No": 0.6810554307748207, "Yes": 0.31893524198086837}, "ground_truth": 1}, {"key": "36805789", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.1960944986445944, "res": {"No": 0.8038991765240273, "Yes": 0.1960944986445944}, "ground_truth": 0}, {"key": "36805789", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.1845760878950683, "res": {"No": 0.8154231483905544, "Yes": 0.1845760878950683}, "ground_truth": 0}, {"key": "12757394", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9848159126371513, "res": {"Yes": 0.9848159126371513, "No": 0.015184024857967902}, "ground_truth": 0}, {"key": "12757394", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7899754815621302, "res": {"Yes": 0.7899754815621302, "No": 0.2100241195205964}, "ground_truth": 1}, {"key": "12757394", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9989530898931649, "res": {"Yes": 0.9989530898931649, "No": 0.0010467642914433722}, "ground_truth": 0}, {"key": "12757394", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.43613013735387873, "res": {"No": 0.5638692390615399, "Yes": 0.43613013735387873}, "ground_truth": 0}, {"key": "32192542", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9558123641540063, "res": {"Yes": 0.9558123641540063, "No": 0.04418666200638487}, "ground_truth": 0}, {"key": "32192542", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.017255057328662327, "res": {"No": 0.9827447577111665, "Yes": 0.017255057328662327}, "ground_truth": 0}, {"key": "32192542", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.30731564499382175, "res": {"No": 0.6926838630166079, "Yes": 0.30731564499382175}, "ground_truth": 1}, {"key": "32192542", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.994479417432471, "res": {"Yes": 0.994479417432471, "No": 0.005520554339018627}, "ground_truth": 0}, {"key": "32192542", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999502188991202, "res": {"Yes": 0.9999502188991202, "No": 4.9685471079677074e-05}, "ground_truth": 0}, {"key": "34856060", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9988094105147189, "res": {"Yes": 0.9988094105147189, "No": 0.0011905835665532924}, "ground_truth": 0}, {"key": "34856060", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9760878757431676, "res": {"Yes": 0.9760878757431676, "No": 0.023912116069141665}, "ground_truth": 0}, {"key": "34856060", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 3.977894467154448e-07}, "ground_truth": 1}, {"key": "34856060", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999968263007362, "res": {"Yes": 0.9999968263007362, "No": 3.106726370958067e-06}, "ground_truth": 0}, {"key": "34856060", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998473675595163, "res": {"Yes": 0.9998473675595163, "No": 0.00015257870449482823}, "ground_truth": 0}, {"key": "36083416", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9079290756039228, "res": {"Yes": 0.9079290756039228, "No": 0.09207078459463998}, "ground_truth": 0}, {"key": "36083416", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.08103061765429906, "res": {"No": 0.9189686089908435, "Yes": 0.08103061765429906}, "ground_truth": 0}, {"key": "36083416", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.964637973526007, "res": {"Yes": 0.964637973526007, "No": 0.035361896804153845}, "ground_truth": 1}, {"key": "36083416", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997996981830881, "res": {"Yes": 0.9997996981830881, "No": 0.00020025460019900266}, "ground_truth": 0}, {"key": "36083416", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.008650881981993534, "res": {"No": 0.9913490764563568, "Yes": 0.008650881981993534}, "ground_truth": 0}, {"key": "33839050", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.932084241971399, "res": {"Yes": 0.932084241971399, "No": 0.06791545202389156}, "ground_truth": 0}, {"key": "33839050", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.4981364716707257, "res": {"No": 0.5018634240258372, "Yes": 0.4981364716707257}, "ground_truth": 0}, {"key": "33839050", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 7.784151003815134e-08}, "ground_truth": 1}, {"key": "33839050", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999533180476362, "res": {"Yes": 0.9999533180476362, "No": 4.660256385492953e-05}, "ground_truth": 0}, {"key": "33839050", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.998768505312711, "res": {"Yes": 0.998768505312711, "No": 0.0012315121185167593}, "ground_truth": 0}, {"key": "18464690", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 2.8696626088533148e-05, "res": {"No": 0.9999711979237877, "Yes": 2.8696626088533148e-05}, "ground_truth": 0}, {"key": "18464690", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999727475263555, "res": {"Yes": 0.9999727475263555, "No": 2.7154738402955633e-05}, "ground_truth": 0}, {"key": "18464690", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999913429644723, "res": {"Yes": 0.9999913429644723, "No": 8.598343410033643e-06}, "ground_truth": 1}, {"key": "18464690", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999664299234876, "res": {"Yes": 0.9999664299234876, "No": 3.3516355982473204e-05}, "ground_truth": 0}, {"key": "18464690", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.999602894867123, "res": {"Yes": 0.999602894867123, "No": 0.00039701225846825523}, "ground_truth": 0}, {"key": "39212665", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.24205020879155684, "res": {"No": 0.7579496957408235, "Yes": 0.24205020879155684}, "ground_truth": 0}, {"key": "39212665", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995565587991251, "res": {"Yes": 0.9995565587991251, "No": 0.000443208164941037}, "ground_truth": 0}, {"key": "39212665", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998473675595163, "res": {"Yes": 0.9998473675595163, "No": 0.00015252271838435817}, "ground_truth": 1}, {"key": "39212665", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998781068795701, "res": {"Yes": 0.9998781068795701, "No": 0.00012181951425053261}, "ground_truth": 0}, {"key": "39212665", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997143910639997, "res": {"Yes": 0.9997143910639997, "No": 0.0002855061465580379}, "ground_truth": 0}, {"key": "40094011", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9999158905423988, "res": {"Yes": 0.9999158905423988, "No": 8.407830424084076e-05}, "ground_truth": 0}, {"key": "40094011", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9922399342889706, "res": {"Yes": 0.9922399342889706, "No": 0.00775990878008335}, "ground_truth": 0}, {"key": "40094011", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995869316962124, "res": {"Yes": 0.9995869316962124, "No": 0.00041304773110430827}, "ground_truth": 1}, {"key": "40094011", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998996803423534, "res": {"Yes": 0.9998996803423534, "No": 0.0001001940251833567}, "ground_truth": 0}, {"key": "40094011", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.999792070690296, "res": {"Yes": 0.999792070690296, "No": 0.00020779157996895665}, "ground_truth": 0}, {"key": "36036272", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.8010950698543606, "res": {"Yes": 0.8010950698543606, "No": 0.19890463305596662}, "ground_truth": 0}, {"key": "36036272", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.005417954138553711, "res": {"No": 0.9945818882020219, "Yes": 0.005417954138553711}, "ground_truth": 0}, {"key": "36036272", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.962810120114483, "res": {"Yes": 0.962810120114483, "No": 0.0371897795028231}, "ground_truth": 1}, {"key": "36036272", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.99966459043237, "res": {"Yes": 0.99966459043237, "No": 0.00033532444576729023}, "ground_truth": 0}, {"key": "36036272", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9929647368496484, "res": {"Yes": 0.9929647368496484, "No": 0.007035260921843604}, "ground_truth": 0}, {"key": "30681904", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9581273755359684, "res": {"Yes": 0.9581273755359684, "No": 0.041872319618333556}, "ground_truth": 0}, {"key": "30681904", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998825168816821, "res": {"Yes": 0.9998825168816821, "No": 0.00011744681090692817}, "ground_truth": 0}, {"key": "30681904", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998406931906776, "res": {"Yes": 0.9998406931906776, "No": 0.000159254660282474}, "ground_truth": 1}, {"key": "30681904", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9989118108077384, "res": {"Yes": 0.9989118108077384, "No": 0.0010881814179601822}, "ground_truth": 0}, {"key": "30681904", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999325777929792, "res": {"Yes": 0.9999325777929792, "No": 6.729187881444154e-05}, "ground_truth": 0}, {"key": "27834240", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 2.8415439759888324e-06, "res": {"No": 0.9999967070975216, "Yes": 2.8415439759888324e-06}, "ground_truth": 0}, {"key": "27834240", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 1.460039734788282e-07}, "ground_truth": 0}, {"key": "27834240", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999987335551019, "res": {"Yes": 0.9999987335551019, "No": 1.195729617880779e-06}, "ground_truth": 1}, {"key": "27834240", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 7.17976498735523e-07}, "ground_truth": 0}, {"key": "27834240", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999976607241361, "res": {"Yes": 0.9999976607241361, "No": 2.214907314084051e-06}, "ground_truth": 0}, {"key": "35025075", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9989027683992808, "res": {"Yes": 0.9989027683992808, "No": 0.001097145579023667}, "ground_truth": 0}, {"key": "35025075", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997097471315166, "res": {"Yes": 0.9997097471315166, "No": 0.0002901382570917545}, "ground_truth": 0}, {"key": "35025075", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999939654258081, "res": {"Yes": 0.9999939654258081, "No": 5.930067275962636e-06}, "ground_truth": 1}, {"key": "35025075", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999937270200753, "res": {"Yes": 0.9999937270200753, "No": 6.187706852012337e-06}, "ground_truth": 0}, {"key": "35025075", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999953958625991, "res": {"Yes": 0.9999953958625991, "No": 4.49698223767522e-06}, "ground_truth": 0}, {"key": "33316985", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.7663796004857477, "res": {"Yes": 0.7663796004857477, "No": 0.2336202605459219}, "ground_truth": 0}, {"key": "33316985", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9935900831295132, "res": {"Yes": 0.9935900831295132, "No": 0.006409833393762891}, "ground_truth": 0}, {"key": "33316985", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997347656408535, "res": {"Yes": 0.9997347656408535, "No": 0.00026511734994955446}, "ground_truth": 1}, {"key": "33316985", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995182039610044, "res": {"Yes": 0.9995182039610044, "No": 0.0004817404858541245}, "ground_truth": 0}, {"key": "33316985", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9702094528705748, "res": {"Yes": 0.9702094528705748, "No": 0.029790506799445317}, "ground_truth": 0}, {"key": "17037056", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.00024109873971986446, "res": {"No": 0.9997588240176042, "Yes": 0.00024109873971986446}, "ground_truth": 0}, {"key": "17037056", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995361915424121, "res": {"Yes": 0.9995361915424121, "No": 0.0004637729302416466}, "ground_truth": 0}, {"key": "17037056", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9987469818835858, "res": {"Yes": 0.9987469818835858, "No": 0.0012529848426361407}, "ground_truth": 1}, {"key": "17037056", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999888397127765, "res": {"Yes": 0.9999888397127765, "No": 1.104824462594748e-05}, "ground_truth": 0}, {"key": "17037056", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997172511609895, "res": {"Yes": 0.9997172511609895, "No": 0.0002826472229330095}, "ground_truth": 0}, {"key": "34050457", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.23022667774195493, "res": {"No": 0.7697730406384782, "Yes": 0.23022667774195493}, "ground_truth": 0}, {"key": "34050457", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999819260003368, "res": {"Yes": 0.9999819260003368, "No": 1.8022258151705956e-05}, "ground_truth": 0}, {"key": "34050457", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999871708812939, "res": {"Yes": 0.9999871708812939, "No": 1.2781023834218853e-05}, "ground_truth": 1}, {"key": "34050457", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999779923581718, "res": {"Yes": 0.9999779923581718, "No": 2.1976606747524393e-05}, "ground_truth": 0}, {"key": "34050457", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.8295472408323382, "res": {"Yes": 0.8295472408323382, "No": 0.17045236593029744}, "ground_truth": 0}, {"key": "34713745", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.5431859871078397, "res": {"Yes": 0.5431859871078397, "No": 0.4568138369740701}, "ground_truth": 0}, {"key": "34713745", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9971011472114624, "res": {"Yes": 0.9971011472114624, "No": 0.002898828468469443}, "ground_truth": 0}, {"key": "34713745", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999783493530909, "res": {"Yes": 0.999783493530909, "No": 0.00021643268124620907}, "ground_truth": 1}, {"key": "34713745", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999814491960682, "res": {"Yes": 0.9999814491960682, "No": 1.8442182315697543e-05}, "ground_truth": 0}, {"key": "34713745", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.999779679844051, "res": {"Yes": 0.999779679844051, "No": 0.00022024455838306675}, "ground_truth": 0}, {"key": "40856210", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.7375121325413141, "res": {"Yes": 0.7375121325413141, "No": 0.26248700577112505}, "ground_truth": 0}, {"key": "40856210", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.824494357951362, "res": {"Yes": 0.824494357951362, "No": 0.1755054820169939}, "ground_truth": 1}, {"key": "40856210", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.030528429950717667, "res": {"No": 0.9694706132516814, "Yes": 0.030528429950717667}, "ground_truth": 0}, {"key": "40856210", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9267695610735496, "res": {"Yes": 0.9267695610735496, "No": 0.07322989791079884}, "ground_truth": 0}, {"key": "40848302", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.000570524411454016, "res": {"No": 0.9994293760977794, "Yes": 0.000570524411454016}, "ground_truth": 0}, {"key": "40848302", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999920581810364, "res": {"Yes": 0.9999920581810364, "No": 7.922046413268724e-06}, "ground_truth": 0}, {"key": "40848302", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999127915188807, "res": {"Yes": 0.9999127915188807, "No": 8.71635778073357e-05}, "ground_truth": 1}, {"key": "40848302", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999553444170902, "res": {"Yes": 0.9999553444170902, "No": 4.4565167581720776e-05}, "ground_truth": 0}, {"key": "40848302", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999917005724405, "res": {"Yes": 0.9999917005724405, "No": 8.212608193448668e-06}, "ground_truth": 0}, {"key": "40636168", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.0002545078188624104, "res": {"No": 0.9997452491745036, "Yes": 0.0002545078188624104}, "ground_truth": 0}, {"key": "40636168", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999161289333746, "res": {"Yes": 0.9999161289333746, "No": 8.379619141645839e-05}, "ground_truth": 0}, {"key": "40636168", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999750123381969, "res": {"Yes": 0.9999750123381969, "No": 2.495024290856276e-05}, "ground_truth": 1}, {"key": "40636168", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999194663630074, "res": {"Yes": 0.9999194663630074, "No": 8.049318785819813e-05}, "ground_truth": 0}, {"key": "40636168", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.3327912375030344, "res": {"No": 0.6672085054781803, "Yes": 0.3327912375030344}, "ground_truth": 0}, {"key": "34423311", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 3.274039546935037e-06, "res": {"No": 0.9999965878943212, "Yes": 3.274039546935037e-06}, "ground_truth": 0}, {"key": "34423311", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9934498235464457, "res": {"Yes": 0.9934498235464457, "No": 0.006550158704066733}, "ground_truth": 0}, {"key": "34423311", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999180360292298, "res": {"Yes": 0.9999180360292298, "No": 8.18161419067485e-05}, "ground_truth": 1}, {"key": "34423311", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9990506404971501, "res": {"Yes": 0.9990506404971501, "No": 0.00094930387986967}, "ground_truth": 0}, {"key": "34423311", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9988835081413465, "res": {"Yes": 0.9988835081413465, "No": 0.001116490494923215}, "ground_truth": 0}, {"key": "34833945", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9105517723329476, "res": {"Yes": 0.9105517723329476, "No": 0.08944807407523532}, "ground_truth": 0}, {"key": "34833945", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9953914796689639, "res": {"Yes": 0.9953914796689639, "No": 0.004608451862305473}, "ground_truth": 0}, {"key": "34833945", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9730908209410383, "res": {"Yes": 0.9730908209410383, "No": 0.026909104295195007}, "ground_truth": 1}, {"key": "34833945", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.0727701598484416, "res": {"No": 0.927229501151471, "Yes": 0.0727701598484416}, "ground_truth": 0}, {"key": "34833945", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.2994410825931299, "res": {"No": 0.7005585734988802, "Yes": 0.2994410825931299}, "ground_truth": 0}, {"key": "21272328", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 1.492596864431423e-05, "res": {"No": 0.999984906043415, "Yes": 1.492596864431423e-05}, "ground_truth": 0}, {"key": "21272328", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.8995479001824438, "res": {"Yes": 0.8995479001824438, "No": 0.1004521082387174}, "ground_truth": 0}, {"key": "21272328", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9895449856117106, "res": {"Yes": 0.9895449856117106, "No": 0.010454936706683191}, "ground_truth": 1}, {"key": "21272328", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996778176818734, "res": {"Yes": 0.9996778176818734, "No": 0.00032180187397750466}, "ground_truth": 0}, {"key": "21272328", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.8087192382963755, "res": {"Yes": 0.8087192382963755, "No": 0.19128037411192378}, "ground_truth": 0}, {"key": "38648957", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.0008892389263593792, "res": {"No": 0.9991097789320968, "Yes": 0.0008892389263593792}, "ground_truth": 0}, {"key": "38648957", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9648872942378056, "res": {"Yes": 0.9648872942378056, "No": 0.035112116803640195}, "ground_truth": 0}, {"key": "38648957", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9946717482591673, "res": {"Yes": 0.9946717482591673, "No": 0.005328030613412244}, "ground_truth": 1}, {"key": "38648957", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999846676395419, "res": {"Yes": 0.9999846676395419, "No": 1.5273809404550878e-05}, "ground_truth": 0}, {"key": "38648957", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9964531579571114, "res": {"Yes": 0.9964531579571114, "No": 0.003546798192758393}, "ground_truth": 0}, {"key": "24942981", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.318233464651155, "res": {"No": 0.6817663807788692, "Yes": 0.318233464651155}, "ground_truth": 0}, {"key": "24942981", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9993471080767461, "res": {"Yes": 0.9993471080767461, "No": 0.0006527743809191474}, "ground_truth": 0}, {"key": "24942981", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999949190499081, "res": {"Yes": 0.9999949190499081, "No": 4.996367048124466e-06}, "ground_truth": 1}, {"key": "24942981", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998125696772581, "res": {"Yes": 0.9998125696772581, "No": 0.00018737792336164527}, "ground_truth": 0}, {"key": "24942981", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999844292352256, "res": {"Yes": 0.9999844292352256, "No": 1.5542070268295246e-05}, "ground_truth": 0}, {"key": "35882366", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 1.3187822117986228e-08, "res": {"No": 1.0, "Yes": 1.3187822117986228e-08}, "ground_truth": 0}, {"key": "35882366", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9767369585669299, "res": {"Yes": 0.9767369585669299, "No": 0.02326288748700996}, "ground_truth": 0}, {"key": "35882366", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998473675595163, "res": {"Yes": 0.9998473675595163, "No": 0.00015259670984425313}, "ground_truth": 1}, {"key": "35882366", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.03333951329487991, "res": {"No": 0.9666599789762924, "Yes": 0.03333951329487991}, "ground_truth": 0}, {"key": "35882366", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.49866248242997907, "res": {"No": 0.5013365735315557, "Yes": 0.49866248242997907}, "ground_truth": 0}, {"key": "40559523", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.6257472145866105, "res": {"Yes": 0.6257472145866105, "No": 0.3742527266765708}, "ground_truth": 0}, {"key": "40559523", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9972385257041421, "res": {"Yes": 0.9972385257041421, "No": 0.0027615070932616123}, "ground_truth": 0}, {"key": "40559523", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999983475621529, "res": {"Yes": 0.999983475621529, "No": 1.641322739079733e-05}, "ground_truth": 1}, {"key": "40559523", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998776374469079, "res": {"Yes": 0.9998776374469079, "No": 0.00012234617883372033}, "ground_truth": 0}, {"key": "40559523", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999642843338196, "res": {"Yes": 0.9999642843338196, "No": 3.5622004753673596e-05}, "ground_truth": 0}, {"key": "24632722", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 1.4740556654776427e-06, "res": {"No": 0.999996945503965, "Yes": 1.4740556654776427e-06}, "ground_truth": 0}, {"key": "24632722", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994165168925134, "res": {"Yes": 0.9994165168925134, "No": 0.000583447549347205}, "ground_truth": 0}, {"key": "24632722", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 4.303685079659194e-07}, "ground_truth": 1}, {"key": "24632722", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999933694113825, "res": {"Yes": 0.9999933694113825, "No": 6.533328631921651e-06}, "ground_truth": 0}, {"key": "24632722", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999964686909351, "res": {"Yes": 0.9999964686909351, "No": 3.4794302459849032e-06}, "ground_truth": 0}, {"key": "36002759", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9992581873791276, "res": {"Yes": 0.9992581873791276, "No": 0.0007417239641369102}, "ground_truth": 0}, {"key": "36002759", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.999953079650794, "res": {"Yes": 0.999953079650794, "No": 4.682281377465601e-05}, "ground_truth": 0}, {"key": "36002759", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996630449321146, "res": {"Yes": 0.9996630449321146, "No": 0.00033679436726786934}, "ground_truth": 1}, {"key": "36002759", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999883629027115, "res": {"Yes": 0.9999883629027115, "No": 1.1579746236569456e-05}, "ground_truth": 0}, {"key": "36002759", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999013490263273, "res": {"Yes": 0.9999013490263273, "No": 9.855438797390958e-05}, "ground_truth": 0}, {"key": "29508534", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.7924838911990514, "res": {"Yes": 0.7924838911990514, "No": 0.20751478268366005}, "ground_truth": 0}, {"key": "29508534", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9982523292581809, "res": {"Yes": 0.9982523292581809, "No": 0.001747652884726275}, "ground_truth": 0}, {"key": "29508534", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999868132749457, "res": {"Yes": 0.9999868132749457, "No": 1.3080316695946604e-05}, "ground_truth": 1}, {"key": "29508534", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999869324773808, "res": {"Yes": 0.9999869324773808, "No": 1.2972186402742348e-05}, "ground_truth": 0}, {"key": "29508534", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999922965856715, "res": {"Yes": 0.9999922965856715, "No": 7.662042230653106e-06}, "ground_truth": 0}, {"key": "15631612", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9966004170122417, "res": {"Yes": 0.9966004170122417, "No": 0.0033992923546757496}, "ground_truth": 0}, {"key": "15631612", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 1.1599856692629766e-07}, "ground_truth": 0}, {"key": "15631612", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 1.1027561654433122e-07}, "ground_truth": 1}, {"key": "15631612", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "No": 1.1240123556164038e-08}, "ground_truth": 0}, {"key": "15631612", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999920581810364, "res": {"Yes": 0.9999920581810364, "No": 7.910844644714241e-06}, "ground_truth": 0}, {"key": "40731892", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9941484738596817, "res": {"Yes": 0.9941484738596817, "No": 0.0058513170870137365}, "ground_truth": 0}, {"key": "40731892", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "No": 9.112887778145167e-07}, "ground_truth": 0}, {"key": "40731892", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999973031140366, "res": {"Yes": 0.9999973031140366, "No": 2.6474384714140566e-06}, "ground_truth": 1}, {"key": "40731892", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999987335551019, "res": {"Yes": 0.9999987335551019, "No": 1.1386775645627834e-06}, "ground_truth": 0}, {"key": "40731892", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 1.278278372326754e-07}, "ground_truth": 0}, {"key": "35971910", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9751869010212356, "res": {"Yes": 0.9751869010212356, "No": 0.024813014133594477}, "ground_truth": 0}, {"key": "35971910", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9512936917315559, "res": {"Yes": 0.9512936917315559, "No": 0.048706065908861264}, "ground_truth": 0}, {"key": "35971910", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999970647075079, "res": {"Yes": 0.9999970647075079, "No": 2.827974010601315e-06}, "ground_truth": 1}, {"key": "35971910", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9442558207427929, "res": {"Yes": 0.9442558207427929, "No": 0.05574395527951098}, "ground_truth": 0}, {"key": "35971910", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9989317970354062, "res": {"Yes": 0.9989317970354062, "No": 0.0010681041761388416}, "ground_truth": 0}, {"key": "34428424", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.005133801813626753, "res": {"No": 0.9948661614266227, "Yes": 0.005133801813626753}, "ground_truth": 0}, {"key": "34428424", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999628539429318, "res": {"Yes": 0.9999628539429318, "No": 3.707854713271009e-05}, "ground_truth": 0}, {"key": "34428424", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999957534720165, "res": {"Yes": 0.9999957534720165, "No": 4.073271797499077e-06}, "ground_truth": 1}, {"key": "34428424", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999914621674475, "res": {"Yes": 0.9999914621674475, "No": 8.400536374890205e-06}, "ground_truth": 0}, {"key": "34428424", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 3.064240224197599e-07, "res": {"No": 0.9999993295729247, "Yes": 3.064240224197599e-07}, "ground_truth": 0}, {"key": "36971005", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9883480835459401, "res": {"Yes": 0.9883480835459401, "No": 0.011651841998610689}, "ground_truth": 0}, {"key": "36971005", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.6684305012991009, "res": {"Yes": 0.6684305012991009, "No": 0.3315693137370236}, "ground_truth": 0}, {"key": "36971005", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999925902855329, "res": {"Yes": 0.999925902855329, "No": 7.402253382245105e-05}, "ground_truth": 1}, {"key": "36971005", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999440206399028, "res": {"Yes": 0.9999440206399028, "No": 5.584335605749482e-05}, "ground_truth": 0}, {"key": "36971005", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.002155075976783236, "res": {"No": 0.9978446884267981, "Yes": 0.002155075976783236}, "ground_truth": 0}, {"key": "34649067", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9999976607241361, "res": {"Yes": 0.9999976607241361, "No": 2.2620682688017014e-06}, "ground_truth": 0}, {"key": "34649067", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999939654258081, "res": {"Yes": 0.9999939654258081, "No": 5.927559387960613e-06}, "ground_truth": 0}, {"key": "34649067", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999955150656573, "res": {"Yes": 0.9999955150656573, "No": 4.4075305117929685e-06}, "ground_truth": 1}, {"key": "34649067", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 4.3523749035402904e-07}, "ground_truth": 0}, {"key": "34649067", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "No": 2.4216902431551467e-08}, "ground_truth": 0}, {"key": "37355154", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9773597887791688, "res": {"Yes": 0.9773597887791688, "No": 0.022639826253318285}, "ground_truth": 0}, {"key": "37355154", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997516845353897, "res": {"Yes": 0.9997516845353897, "No": 0.0002481703878317134}, "ground_truth": 0}, {"key": "37355154", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999970647075079, "res": {"Yes": 0.9999970647075079, "No": 2.8849153495840625e-06}, "ground_truth": 1}, {"key": "37355154", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "No": 1.0090351305536967e-06}, "ground_truth": 0}, {"key": "37355154", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 5.832534563759125e-07}, "ground_truth": 0}, {"key": "38674697", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.00022139259156846018, "res": {"No": 0.9997784880673918, "Yes": 0.00022139259156846018}, "ground_truth": 0}, {"key": "38674697", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998916945254614, "res": {"Yes": 0.9998916945254614, "No": 0.000108226857658699}, "ground_truth": 0}, {"key": "38674697", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999984906043415, "res": {"Yes": 0.999984906043415, "No": 1.4987995159368875e-05}, "ground_truth": 1}, {"key": "38674697", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999946806438478, "res": {"Yes": 0.9999946806438478, "No": 5.2455209388968736e-06}, "ground_truth": 0}, {"key": "38674697", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.99986452674733, "res": {"Yes": 0.99986452674733, "No": 0.00013540246396203677}, "ground_truth": 0}, {"key": "40525767", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.8152857285538195, "res": {"Yes": 0.8152857285538195, "No": 0.1847098063115163}, "ground_truth": 0}, {"key": "40525767", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9991177474738177, "res": {"Yes": 0.9991177474738177, "No": 0.0008820668598605222}, "ground_truth": 0}, {"key": "40525767", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995874083093709, "res": {"Yes": 0.9995874083093709, "No": 0.00041249210160380337}, "ground_truth": 1}, {"key": "40525767", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9936324527011933, "res": {"Yes": 0.9936324527011933, "No": 0.006367215704543924}, "ground_truth": 0}, {"key": "40525767", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.996764548091723, "res": {"Yes": 0.996764548091723, "No": 0.0032352770929461785}, "ground_truth": 0}, {"key": "27165110", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9832356846432032, "res": {"Yes": 0.9832356846432032, "No": 0.016764096375160378}, "ground_truth": 0}, {"key": "27165110", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.801062634171647, "res": {"Yes": 0.801062634171647, "No": 0.19893679853564072}, "ground_truth": 0}, {"key": "27165110", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997467984109344, "res": {"Yes": 0.9997467984109344, "No": 0.000253108830917636}, "ground_truth": 1}, {"key": "27165110", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9993779438272142, "res": {"Yes": 0.9993779438272142, "No": 0.0006219820028303821}, "ground_truth": 0}, {"key": "27165110", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9777902033120747, "res": {"Yes": 0.9777902033120747, "No": 0.022209672206712484}, "ground_truth": 0}, {"key": "35497491", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9998565412111963, "res": {"Yes": 0.9998565412111963, "No": 0.00014326191923526768}, "ground_truth": 0}, {"key": "35497491", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999882437011058, "res": {"Yes": 0.9999882437011058, "No": 1.169833539750442e-05}, "ground_truth": 0}, {"key": "35497491", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999387759842665, "res": {"Yes": 0.9999387759842665, "No": 6.11216755120047e-05}, "ground_truth": 1}, {"key": "35497491", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999920581810364, "res": {"Yes": 0.9999920581810364, "No": 7.851044324893897e-06}, "ground_truth": 0}, {"key": "35497491", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999239957484695, "res": {"Yes": 0.9999239957484695, "No": 7.587297481588509e-05}, "ground_truth": 0}, {"key": "40690716", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9086934721778425, "res": {"Yes": 0.9086934721778425, "No": 0.09130641782866125}, "ground_truth": 0}, {"key": "40690716", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996037216190272, "res": {"Yes": 0.9996037216190272, "No": 0.00039607934524509215}, "ground_truth": 0}, {"key": "40690716", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996784135000271, "res": {"Yes": 0.9996784135000271, "No": 0.00032156302476981757}, "ground_truth": 1}, {"key": "40690716", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9975581280316757, "res": {"Yes": 0.9975581280316757, "No": 0.002441767296531142}, "ground_truth": 0}, {"key": "40690716", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9228013589924472, "res": {"Yes": 0.9228013589924472, "No": 0.0771985426284048}, "ground_truth": 0}, {"key": "34835193", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.0003065492899560943, "res": {"No": 0.9996930672734899, "Yes": 0.0003065492899560943}, "ground_truth": 0}, {"key": "34835193", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9850963532852579, "res": {"Yes": 0.9850963532852579, "No": 0.014903450733378685}, "ground_truth": 0}, {"key": "34835193", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999024217570665, "res": {"Yes": 0.9999024217570665, "No": 9.752000709860377e-05}, "ground_truth": 1}, {"key": "34835193", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9980005630424905, "res": {"Yes": 0.9980005630424905, "No": 0.001999371495619517}, "ground_truth": 0}, {"key": "34835193", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997716949855572, "res": {"Yes": 0.9997716949855572, "No": 0.00022819067149705668}, "ground_truth": 0}, {"key": "39471712", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999918197754583, "res": {"Yes": 0.9999918197754583, "No": 8.144220690373476e-06}, "ground_truth": 0}, {"key": "39471712", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999739395305799, "res": {"Yes": 0.9999739395305799, "No": 2.6001300551039074e-05}, "ground_truth": 1}, {"key": "39471712", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997390558518481, "res": {"Yes": 0.9997390558518481, "No": 0.00026082183715356804}, "ground_truth": 0}, {"key": "39471712", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9993819942442721, "res": {"Yes": 0.9993819942442721, "No": 0.0006178369316308447}, "ground_truth": 0}, {"key": "39115192", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.021726938126568306, "res": {"No": 0.9782727787104214, "Yes": 0.021726938126568306}, "ground_truth": 0}, {"key": "39115192", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.7443632455723292, "res": {"Yes": 0.7443632455723292, "No": 0.25563585207015416}, "ground_truth": 0}, {"key": "39115192", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4739707348575306, "res": {"No": 0.5260285448190718, "Yes": 0.4739707348575306}, "ground_truth": 1}, {"key": "39115192", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9311805735128605, "res": {"Yes": 0.9311805735128605, "No": 0.06881928968579881}, "ground_truth": 0}, {"key": "39115192", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9914220751504269, "res": {"Yes": 0.9914220751504269, "No": 0.00857771000749046}, "ground_truth": 0}, {"key": "23520673", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9865817166782658, "res": {"Yes": 0.9865817166782658, "No": 0.013418150146132506}, "ground_truth": 0}, {"key": "23520673", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999766811478886, "res": {"Yes": 0.9999766811478886, "No": 2.3221476413882926e-05}, "ground_truth": 0}, {"key": "23520673", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999980257181892, "res": {"Yes": 0.999980257181892, "No": 1.967849781090901e-05}, "ground_truth": 1}, {"key": "23520673", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999955150656573, "res": {"Yes": 0.9999955150656573, "No": 4.426098773202676e-06}, "ground_truth": 0}, {"key": "23520673", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999965878943212, "res": {"Yes": 0.9999965878943212, "No": 3.3145655457638165e-06}, "ground_truth": 0}, {"key": "35764233", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9924765960548318, "res": {"Yes": 0.9924765960548318, "No": 0.007523272972523981}, "ground_truth": 0}, {"key": "35764233", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9986192905575675, "res": {"Yes": 0.9986192905575675, "No": 0.0013806953912893119}, "ground_truth": 0}, {"key": "35764233", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999970647075079, "res": {"Yes": 0.9999970647075079, "No": 2.907066676843223e-06}, "ground_truth": 1}, {"key": "35764233", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999402063417121, "res": {"Yes": 0.9999402063417121, "No": 5.967508269405478e-05}, "ground_truth": 0}, {"key": "35764233", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999740587314805, "res": {"Yes": 0.9999740587314805, "No": 2.585589336039514e-05}, "ground_truth": 0}, {"key": "35228910", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.999909096527971, "res": {"Yes": 0.999909096527971, "No": 9.06617508837663e-05}, "ground_truth": 0}, {"key": "35228910", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.018976669032815308, "res": {"No": 0.9810224479076854, "Yes": 0.018976669032815308}, "ground_truth": 0}, {"key": "35228910", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999758467426946, "res": {"Yes": 0.9999758467426946, "No": 2.409221015019973e-05}, "ground_truth": 1}, {"key": "35228910", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9843227044671775, "res": {"Yes": 0.9843227044671775, "No": 0.01567718199371494}, "ground_truth": 0}, {"key": "35228910", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9973632603207305, "res": {"Yes": 0.9973632603207305, "No": 0.0026365540461509656}, "ground_truth": 0}, {"key": "36795599", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.8969183066855283, "res": {"Yes": 0.8969183066855283, "No": 0.10308115380130439}, "ground_truth": 0}, {"key": "36795599", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999876476902904, "res": {"Yes": 0.9999876476902904, "No": 1.2304112523067459e-05}, "ground_truth": 0}, {"key": "36795599", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9986705244367036, "res": {"Yes": 0.9986705244367036, "No": 0.0013293907824466755}, "ground_truth": 1}, {"key": "36795599", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997942159164498, "res": {"Yes": 0.9997942159164498, "No": 0.00020568616946549167}, "ground_truth": 0}, {"key": "36795599", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9926243307394578, "res": {"Yes": 0.9926243307394578, "No": 0.007375538223216021}, "ground_truth": 0}, {"key": "38641949", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 1.3142740565844204e-05, "res": {"No": 0.9999868132749457, "Yes": 1.3142740565844204e-05}, "ground_truth": 0}, {"key": "38641949", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999821644040562, "res": {"Yes": 0.9999821644040562, "No": 1.772897058846492e-05}, "ground_truth": 0}, {"key": "38641949", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999890781166442, "res": {"Yes": 0.9999890781166442, "No": 1.088220142660188e-05}, "ground_truth": 1}, {"key": "38641949", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999475965531086, "res": {"Yes": 0.9999475965531086, "No": 5.2323288457998136e-05}, "ground_truth": 0}, {"key": "38641949", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999742971333243, "res": {"Yes": 0.9999742971333243, "No": 2.5652231296858017e-05}, "ground_truth": 0}, {"key": "29968443", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9992717483040796, "res": {"Yes": 0.9992717483040796, "No": 0.0007281343355035297}, "ground_truth": 0}, {"key": "29968443", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996666162047319, "res": {"Yes": 0.9996666162047319, "No": 0.00033326186354640664}, "ground_truth": 0}, {"key": "29968443", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998062530813588, "res": {"Yes": 0.9998062530813588, "No": 0.00019372130576035423}, "ground_truth": 1}, {"key": "29968443", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994437843453111, "res": {"Yes": 0.9994437843453111, "No": 0.0005561714210448991}, "ground_truth": 0}, {"key": "29968443", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999604699583327, "res": {"Yes": 0.9999604699583327, "No": 3.946094931523143e-05}, "ground_truth": 0}, {"key": "21268042", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.999471890129134, "res": {"Yes": 0.999471890129134, "No": 0.0005281062835039386}, "ground_truth": 0}, {"key": "21268042", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9952131669784173, "res": {"Yes": 0.9952131669784173, "No": 0.0047868055013044475}, "ground_truth": 0}, {"key": "21268042", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998539191008537, "res": {"Yes": 0.9998539191008537, "No": 0.00014602744127189106}, "ground_truth": 1}, {"key": "21268042", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.995461415643496, "res": {"Yes": 0.995461415643496, "No": 0.004538551820877919}, "ground_truth": 0}, {"key": "21268042", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9994522394456187, "res": {"Yes": 0.9994522394456187, "No": 0.0005476433522235263}, "ground_truth": 0}, {"key": "26808572", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.38953006420020236, "res": {"No": 0.61046955843984, "Yes": 0.38953006420020236}, "ground_truth": 0}, {"key": "26808572", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.978717018322202, "res": {"Yes": 0.978717018322202, "No": 0.021282898842172698}, "ground_truth": 0}, {"key": "26808572", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999792070690296, "res": {"Yes": 0.999792070690296, "No": 0.0002077955280464827}, "ground_truth": 1}, {"key": "26808572", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9987953759427709, "res": {"Yes": 0.9987953759427709, "No": 0.0012045683403414673}, "ground_truth": 0}, {"key": "26808572", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9937659392950886, "res": {"Yes": 0.9937659392950886, "No": 0.006233998738469135}, "ground_truth": 0}, {"key": "37829390", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.06100421614903586, "res": {"No": 0.9389955207683514, "Yes": 0.06100421614903586}, "ground_truth": 0}, {"key": "37829390", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9974809089485164, "res": {"Yes": 0.9974809089485164, "No": 0.0025188891201201762}, "ground_truth": 0}, {"key": "37829390", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9981460207466839, "res": {"Yes": 0.9981460207466839, "No": 0.0018539058411396274}, "ground_truth": 1}, {"key": "37829390", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8531425324182935, "res": {"Yes": 0.8531425324182935, "No": 0.1468557744519419}, "ground_truth": 0}, {"key": "37829390", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.8742918277771173, "res": {"Yes": 0.8742918277771173, "No": 0.12570743705761064}, "ground_truth": 0}, {"key": "35716045", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9952041911913407, "res": {"Yes": 0.9952041911913407, "No": 0.004795774793841382}, "ground_truth": 0}, {"key": "35716045", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997745552364907, "res": {"Yes": 0.9997745552364907, "No": 0.0002253470273889096}, "ground_truth": 0}, {"key": "35716045", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999907469518097, "res": {"Yes": 0.9999907469518097, "No": 9.201850065959129e-06}, "ground_truth": 1}, {"key": "35716045", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9971346896599647, "res": {"Yes": 0.9971346896599647, "No": 0.0028652499214562362}, "ground_truth": 0}, {"key": "35716045", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999901509395023, "res": {"Yes": 0.9999901509395023, "No": 9.714130304865523e-06}, "ground_truth": 0}, {"key": "34367070", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9643299144634622, "res": {"Yes": 0.9643299144634622, "No": 0.035669881561933015}, "ground_truth": 0}, {"key": "34367070", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.21622280463321794, "res": {"No": 0.78377701400423, "Yes": 0.21622280463321794}, "ground_truth": 0}, {"key": "34367070", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3650125654871553, "res": {"No": 0.6349870334891857, "Yes": 0.3650125654871553}, "ground_truth": 1}, {"key": "34367070", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.867677549579163, "res": {"Yes": 0.867677549579163, "No": 0.13232212344959615}, "ground_truth": 0}, {"key": "34367070", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.7034910884193264, "res": {"Yes": 0.7034910884193264, "No": 0.2965084979731795}, "ground_truth": 0}, {"key": "35239748", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.48347655279548035, "res": {"No": 0.5165226699006844, "Yes": 0.48347655279548035}, "ground_truth": 0}, {"key": "35239748", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9986737387628264, "res": {"Yes": 0.9986737387628264, "No": 0.0013262649443170492}, "ground_truth": 0}, {"key": "35239748", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994264014106056, "res": {"Yes": 0.9994264014106056, "No": 0.0005735756619105485}, "ground_truth": 1}, {"key": "35239748", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.1659644284556424, "res": {"No": 0.8340351093228943, "Yes": 0.1659644284556424}, "ground_truth": 0}, {"key": "35239748", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.01120505794937606, "res": {"No": 0.9887945065180493, "Yes": 0.01120505794937606}, "ground_truth": 0}, {"key": "40421370", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9995283277923265, "res": {"Yes": 0.9995283277923265, "No": 0.0004715869789741398}, "ground_truth": 0}, {"key": "40421370", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9991639184111499, "res": {"Yes": 0.9991639184111499, "No": 0.0008359787197901687}, "ground_truth": 0}, {"key": "40421370", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999837140256179, "res": {"Yes": 0.9999837140256179, "No": 1.625342340987016e-05}, "ground_truth": 1}, {"key": "40421370", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999795419732683, "res": {"Yes": 0.9999795419732683, "No": 2.0417871228519986e-05}, "ground_truth": 0}, {"key": "40421370", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998414082969939, "res": {"Yes": 0.9998414082969939, "No": 0.00015854628315273324}, "ground_truth": 0}, {"key": "37288396", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999155329675407, "res": {"Yes": 0.9999155329675407, "No": 8.435959527152907e-05}, "ground_truth": 0}, {"key": "37288396", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999822836059372, "res": {"Yes": 0.9999822836059372, "No": 1.759670305303635e-05}, "ground_truth": 1}, {"key": "37288396", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999994561441089, "res": {"Yes": 0.999994561441089, "No": 5.342429745267917e-06}, "ground_truth": 0}, {"key": "37288396", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9578306798536727, "res": {"Yes": 0.9578306798536727, "No": 0.04216915708172659}, "ground_truth": 0}, {"key": "38903688", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9999290019304823, "res": {"Yes": 0.9999290019304823, "No": 7.087732444868785e-05}, "ground_truth": 0}, {"key": "38903688", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9967917924313822, "res": {"Yes": 0.9967917924313822, "No": 0.003208143044630087}, "ground_truth": 0}, {"key": "38903688", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.46497364612540415, "res": {"No": 0.5350262488599865, "Yes": 0.46497364612540415}, "ground_truth": 1}, {"key": "38903688", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9976736813829807, "res": {"Yes": 0.9976736813829807, "No": 0.002326294780761689}, "ground_truth": 0}, {"key": "38903688", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.5565425784791102, "res": {"Yes": 0.5565425784791102, "No": 0.44345723856423613}, "ground_truth": 0}, {"key": "28071228", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 4.829841048763528e-06, "res": {"No": 0.9999950382530095, "Yes": 4.829841048763528e-06}, "ground_truth": 0}, {"key": "28071228", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.6954407512225718, "res": {"Yes": 0.6954407512225718, "No": 0.304558193022829}, "ground_truth": 0}, {"key": "28071228", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9987028687030052, "res": {"Yes": 0.9987028687030052, "No": 0.0012971224083972086}, "ground_truth": 1}, {"key": "28071228", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9269580898093135, "res": {"Yes": 0.9269580898093135, "No": 0.07304111333495887}, "ground_truth": 0}, {"key": "28071228", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9885550445452123, "res": {"Yes": 0.9885550445452123, "No": 0.01144483434143462}, "ground_truth": 0}, {"key": "36855834", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9998890723329287, "res": {"Yes": 0.9998890723329287, "No": 0.00011081892444448764}, "ground_truth": 0}, {"key": "36855834", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 5.055550006265528e-07}, "ground_truth": 0}, {"key": "36855834", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999424710798518, "res": {"Yes": 0.9999424710798518, "No": 5.750837319385204e-05}, "ground_truth": 1}, {"key": "36855834", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 2.326294482940572e-07}, "ground_truth": 0}, {"key": "36855834", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.999996945503965, "res": {"Yes": 0.999996945503965, "No": 3.0261571823840776e-06}, "ground_truth": 0}, {"key": "40548717", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9067912733150497, "res": {"Yes": 0.9067912733150497, "No": 0.09320844032164367}, "ground_truth": 0}, {"key": "40548717", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.6763698075020683, "res": {"Yes": 0.6763698075020683, "No": 0.32362947929563834}, "ground_truth": 0}, {"key": "40548717", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9118474925247275, "res": {"Yes": 0.9118474925247275, "No": 0.08815230692787361}, "ground_truth": 1}, {"key": "40548717", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9977667154989412, "res": {"Yes": 0.9977667154989412, "No": 0.0022332141686450305}, "ground_truth": 0}, {"key": "40548717", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9967123259748034, "res": {"Yes": 0.9967123259748034, "No": 0.0032876837154928603}, "ground_truth": 0}, {"key": "37051175", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9996612574862052, "res": {"Yes": 0.9996612574862052, "No": 0.0003387212703980675}, "ground_truth": 0}, {"key": "37051175", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.999978707566687, "res": {"Yes": 0.999978707566687, "No": 2.1225463256957132e-05}, "ground_truth": 0}, {"key": "37051175", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 4.225956606659373e-07}, "ground_truth": 1}, {"key": "37051175", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 7.823262374171562e-07}, "ground_truth": 0}, {"key": "37051175", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.999987886094374, "res": {"Yes": 0.999987886094374, "No": 1.2024407686806918e-05}, "ground_truth": 0}, {"key": "38882119", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9983118452389537, "res": {"Yes": 0.9983118452389537, "No": 0.001688126913351323}, "ground_truth": 0}, {"key": "38882119", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994056794284768, "res": {"Yes": 0.9994056794284768, "No": 0.0005942045129279703}, "ground_truth": 0}, {"key": "38882119", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9987871681757345, "res": {"Yes": 0.9987871681757345, "No": 0.0012126963622711197}, "ground_truth": 1}, {"key": "38882119", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994866563559582, "res": {"Yes": 0.9994866563559582, "No": 0.0005133028172024329}, "ground_truth": 0}, {"key": "38882119", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9995577466228676, "res": {"Yes": 0.9995577466228676, "No": 0.00044212792264983897}, "ground_truth": 0}, {"key": "19485402", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9953853384210966, "res": {"Yes": 0.9953853384210966, "No": 0.004614651170075104}, "ground_truth": 0}, {"key": "19485402", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998865693437434, "res": {"Yes": 0.9998865693437434, "No": 0.00011332123326849277}, "ground_truth": 0}, {"key": "19485402", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999855020530962, "res": {"Yes": 0.9999855020530962, "No": 1.4386964563558856e-05}, "ground_truth": 1}, {"key": "19485402", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999901509395023, "res": {"Yes": 0.9999901509395023, "No": 9.792331290450734e-06}, "ground_truth": 0}, {"key": "19485402", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9986452048646677, "res": {"Yes": 0.9986452048646677, "No": 0.001354744098068935}, "ground_truth": 0}, {"key": "36060907", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.8015991492235233, "res": {"Yes": 0.8015991492235233, "No": 0.1984006317326685}, "ground_truth": 0}, {"key": "36060907", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9992192667309422, "res": {"Yes": 0.9992192667309422, "No": 0.0007806384870405426}, "ground_truth": 0}, {"key": "36060907", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2947911321563974, "res": {"No": 0.705208637665778, "Yes": 0.2947911321563974}, "ground_truth": 1}, {"key": "36060907", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996791284902776, "res": {"Yes": 0.9996791284902776, "No": 0.00032074568737477325}, "ground_truth": 0}, {"key": "36060907", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9894514928706454, "res": {"Yes": 0.9894514928706454, "No": 0.010548411490686265}, "ground_truth": 0}, {"key": "24037309", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 2.2527767309099733e-06, "res": {"No": 0.9999963494876631, "Yes": 2.2527767309099733e-06}, "ground_truth": 0}, {"key": "24037309", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998695326316075, "res": {"Yes": 0.9998695326316075, "No": 0.0001304127678897446}, "ground_truth": 0}, {"key": "24037309", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.977819377095793, "res": {"Yes": 0.977819377095793, "No": 0.022179684981026043}, "ground_truth": 1}, {"key": "24037309", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998086367022921, "res": {"Yes": 0.9998086367022921, "No": 0.00019122668522675762}, "ground_truth": 0}, {"key": "24037309", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9873696148690201, "res": {"Yes": 0.9873696148690201, "No": 0.012630080107541332}, "ground_truth": 0}, {"key": "35605805", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9999893165220688, "res": {"Yes": 0.9999893165220688, "No": 1.062639029946265e-05}, "ground_truth": 0}, {"key": "35605805", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.999705695416093, "res": {"Yes": 0.999705695416093, "No": 0.00029423405556118287}, "ground_truth": 0}, {"key": "35605805", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999648803297101, "res": {"Yes": 0.9999648803297101, "No": 3.5061180787083195e-05}, "ground_truth": 1}, {"key": "35605805", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998083983279453, "res": {"Yes": 0.9998083983279453, "No": 0.00019148147314555154}, "ground_truth": 0}, {"key": "35605805", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.2722912536745196, "res": {"No": 0.7277085386119877, "Yes": 0.2722912536745196}, "ground_truth": 0}, {"key": "17706248", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9686497379309857, "res": {"Yes": 0.9686497379309857, "No": 0.031350175933659206}, "ground_truth": 0}, {"key": "17706248", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.8513044745229859, "res": {"Yes": 0.8513044745229859, "No": 0.1486954982523777}, "ground_truth": 0}, {"key": "17706248", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9992165272451793, "res": {"Yes": 0.9992165272451793, "No": 0.0007833570212952613}, "ground_truth": 1}, {"key": "17706248", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999891973193493, "res": {"Yes": 0.9999891973193493, "No": 1.071589933738976e-05}, "ground_truth": 0}, {"key": "17706248", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999614235510903, "res": {"Yes": 0.9999614235510903, "No": 3.852485270988718e-05}, "ground_truth": 0}, {"key": "36883559", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9989238261908263, "res": {"Yes": 0.9989238261908263, "No": 0.0010761403381040334}, "ground_truth": 0}, {"key": "36883559", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999845484373766, "res": {"Yes": 0.9999845484373766, "No": 1.537107594401745e-05}, "ground_truth": 0}, {"key": "36883559", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999978707566687, "res": {"Yes": 0.999978707566687, "No": 2.1209104731807804e-05}, "ground_truth": 1}, {"key": "36883559", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999583243784529, "res": {"Yes": 0.9999583243784529, "No": 4.1625275128159366e-05}, "ground_truth": 0}, {"key": "36883559", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.14389907401925978, "res": {"No": 0.8561004684868962, "Yes": 0.14389907401925978}, "ground_truth": 0}, {"key": "32799471", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.00642143285529228, "res": {"No": 0.9935785486091905, "Yes": 0.00642143285529228}, "ground_truth": 0}, {"key": "32799471", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9974988161857757, "res": {"Yes": 0.9974988161857757, "No": 0.0025011066126160004}, "ground_truth": 0}, {"key": "32799471", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998982500468776, "res": {"Yes": 0.9998982500468776, "No": 0.00010166687711942334}, "ground_truth": 1}, {"key": "32799471", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9989584447955557, "res": {"Yes": 0.9989584447955557, "No": 0.0010415603017453086}, "ground_truth": 0}, {"key": "32799471", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9988836235123983, "res": {"Yes": 0.9988836235123983, "No": 0.0011163158894643111}, "ground_truth": 0}, {"key": "34797243", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.0008738218814903062, "res": {"No": 0.999126080730072, "Yes": 0.0008738218814903062}, "ground_truth": 0}, {"key": "34797243", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 6.891609457226533e-07}, "ground_truth": 0}, {"key": "34797243", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 1.3443137381174973e-07}, "ground_truth": 1}, {"key": "34797243", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "No": 5.4053691355192605e-08}, "ground_truth": 0}, {"key": "34797243", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999932502087799, "res": {"Yes": 0.9999932502087799, "No": 6.671065270963646e-06}, "ground_truth": 0}, {"key": "32154876", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 1.9514087701431828e-06, "res": {"No": 0.9999977799274644, "Yes": 1.9514087701431828e-06}, "ground_truth": 0}, {"key": "32154876", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9311168028638261, "res": {"Yes": 0.9311168028638261, "No": 0.06888286676691552}, "ground_truth": 0}, {"key": "32154876", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995668021572923, "res": {"Yes": 0.9995668021572923, "No": 0.000433084369592048}, "ground_truth": 1}, {"key": "32154876", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999620195462757, "res": {"Yes": 0.9999620195462757, "No": 3.785759264179217e-05}, "ground_truth": 0}, {"key": "32154876", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999924157887603, "res": {"Yes": 0.9999924157887603, "No": 7.4485773763640906e-06}, "ground_truth": 0}, {"key": "37962274", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9999572515937392, "res": {"Yes": 0.9999572515937392, "No": 4.263231603089955e-05}, "ground_truth": 0}, {"key": "37962274", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 3.7223033283311655e-07}, "ground_truth": 0}, {"key": "37962274", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999959918780326, "res": {"Yes": 0.9999959918780326, "No": 3.872977371798919e-06}, "ground_truth": 1}, {"key": "37962274", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999975250738268, "res": {"Yes": 0.999975250738268, "No": 2.469430272112502e-05}, "ground_truth": 0}, {"key": "37962274", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999676219241812, "res": {"Yes": 0.9999676219241812, "No": 3.22423420091664e-05}, "ground_truth": 0}, {"key": "35574030", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9972052139064731, "res": {"Yes": 0.9972052139064731, "No": 0.0027946371553424724}, "ground_truth": 0}, {"key": "35574030", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999551060207649, "res": {"Yes": 0.9999551060207649, "No": 4.477529596451165e-05}, "ground_truth": 0}, {"key": "35574030", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999987886094374, "res": {"Yes": 0.999987886094374, "No": 1.2017063017735162e-05}, "ground_truth": 1}, {"key": "35574030", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994453331245771, "res": {"Yes": 0.9994453331245771, "No": 0.0005546213107938565}, "ground_truth": 0}, {"key": "35574030", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999803763825457, "res": {"Yes": 0.9999803763825457, "No": 1.9489045879584175e-05}, "ground_truth": 0}, {"key": "39105949", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.00017994325621925792, "res": {"No": 0.9998193593674841, "Yes": 0.00017994325621925792}, "ground_truth": 0}, {"key": "39105949", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.035266072365779486, "res": {"No": 0.9647326043293484, "Yes": 0.035266072365779486}, "ground_truth": 0}, {"key": "39105949", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 8.494734194303175e-05, "res": {"No": 0.9999146986083706, "Yes": 8.494734194303175e-05}, "ground_truth": 1}, {"key": "39105949", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8316557548263335, "res": {"Yes": 0.8316557548263335, "No": 0.16834349059953937}, "ground_truth": 0}, {"key": "39105949", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.24960303078312193, "res": {"No": 0.7503943354694876, "Yes": 0.24960303078312193}, "ground_truth": 0}, {"key": "41064322", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9989798573996179, "res": {"Yes": 0.9989798573996179, "No": 0.001020042485604469}, "ground_truth": 0}, {"key": "41064322", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.6755229144172067, "res": {"Yes": 0.6755229144172067, "No": 0.3244769752178248}, "ground_truth": 0}, {"key": "41064322", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999700809486411, "res": {"Yes": 0.999700809486411, "No": 0.00029909658798487764}, "ground_truth": 1}, {"key": "41064322", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997961227857844, "res": {"Yes": 0.9997961227857844, "No": 0.00020378888039975852}, "ground_truth": 0}, {"key": "41064322", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.882902956902916, "res": {"Yes": 0.882902956902916, "No": 0.11709681590440886}, "ground_truth": 0}, {"key": "28105101", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.998333348209196, "res": {"Yes": 0.998333348209196, "No": 0.001666567761628356}, "ground_truth": 0}, {"key": "28105101", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999919389784903, "res": {"Yes": 0.9999919389784903, "No": 7.92769684650145e-06}, "ground_truth": 0}, {"key": "28105101", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999896223785124, "res": {"Yes": 0.999896223785124, "No": 0.00010367630131933186}, "ground_truth": 1}, {"key": "28105101", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9977765688930166, "res": {"Yes": 0.9977765688930166, "No": 0.0022233682571111213}, "ground_truth": 0}, {"key": "28105101", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997456066634737, "res": {"Yes": 0.9997456066634737, "No": 0.00025428166195308287}, "ground_truth": 0}, {"key": "36036068", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9983698321693741, "res": {"Yes": 0.9983698321693741, "No": 0.001630196857472875}, "ground_truth": 0}, {"key": "36036068", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999908661547138, "res": {"Yes": 0.9999908661547138, "No": 9.068352074827275e-06}, "ground_truth": 0}, {"key": "36036068", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999700059218314, "res": {"Yes": 0.9999700059218314, "No": 2.994579931002949e-05}, "ground_truth": 1}, {"key": "36036068", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999987335551019, "res": {"Yes": 0.9999987335551019, "No": 1.1289370857405641e-06}, "ground_truth": 0}, {"key": "36036068", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998976540877003, "res": {"Yes": 0.9998976540877003, "No": 0.00010228853127910447}, "ground_truth": 0}, {"key": "37991460", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 1.8384695605954838e-05, "res": {"No": 0.9999812107925193, "Yes": 1.8384695605954838e-05}, "ground_truth": 0}, {"key": "37991460", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999614235510903, "res": {"Yes": 0.9999614235510903, "No": 3.844484274028526e-05}, "ground_truth": 0}, {"key": "37991460", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 2.7550184815757225e-07}, "ground_truth": 1}, {"key": "37991460", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 4.4327167190528277e-07}, "ground_truth": 0}, {"key": "37991460", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999955150656573, "res": {"Yes": 0.9999955150656573, "No": 4.3549997720584675e-06}, "ground_truth": 0}, {"key": "38437830", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 6.709662495318755e-07, "res": {"No": 0.999999091165773, "Yes": 6.709662495318755e-07}, "ground_truth": 0}, {"key": "38437830", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994333075908317, "res": {"Yes": 0.9994333075908317, "No": 0.0005666246876215713}, "ground_truth": 0}, {"key": "38437830", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999250684975053, "res": {"Yes": 0.9999250684975053, "No": 7.49067163924548e-05}, "ground_truth": 1}, {"key": "38437830", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.00016896702169204064, "res": {"No": 0.9998309200956185, "Yes": 0.00016896702169204064}, "ground_truth": 0}, {"key": "38437830", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9640510725030712, "res": {"Yes": 0.9640510725030712, "No": 0.035948845837129574}, "ground_truth": 0}, {"key": "36507138", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 2.6374394942340414e-06, "res": {"No": 0.9999970647075079, "Yes": 2.6374394942340414e-06}, "ground_truth": 0}, {"key": "36507138", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.0013993287802624343, "res": {"No": 0.9986003855751407, "Yes": 0.0013993287802624343}, "ground_truth": 0}, {"key": "36507138", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998856158223689, "res": {"Yes": 0.9998856158223689, "No": 0.0001142965695842675}, "ground_truth": 1}, {"key": "36507138", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998542766686762, "res": {"Yes": 0.9998542766686762, "No": 0.00014568919598257802}, "ground_truth": 0}, {"key": "36507138", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.8922658880908947, "res": {"Yes": 0.8922658880908947, "No": 0.1077337471382176}, "ground_truth": 0}, {"key": "37824866", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9906947185955007, "res": {"Yes": 0.9906947185955007, "No": 0.009305143194703923}, "ground_truth": 0}, {"key": "37824866", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9984450470939624, "res": {"Yes": 0.9984450470939624, "No": 0.0015549607108570652}, "ground_truth": 0}, {"key": "37824866", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999982567412194, "res": {"Yes": 0.9999982567412194, "No": 1.6799643248212905e-06}, "ground_truth": 1}, {"key": "37824866", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999995276659155, "res": {"Yes": 0.999995276659155, "No": 4.678102401615872e-06}, "ground_truth": 0}, {"key": "37824866", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.999700809486411, "res": {"Yes": 0.999700809486411, "No": 0.0002991273965200522}, "ground_truth": 0}, {"key": "25088134", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.00013759582149063408, "res": {"No": 0.9998622621767251, "Yes": 0.00013759582149063408}, "ground_truth": 0}, {"key": "25088134", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.999975250738268, "res": {"Yes": 0.999975250738268, "No": 2.4714931075036376e-05}, "ground_truth": 0}, {"key": "25088134", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9956944232872067, "res": {"Yes": 0.9956944232872067, "No": 0.004305568905033057}, "ground_truth": 1}, {"key": "25088134", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9601077415849557, "res": {"Yes": 0.9601077415849557, "No": 0.03989198709264691}, "ground_truth": 0}, {"key": "25088134", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997491818801253, "res": {"Yes": 0.9997491818801253, "No": 0.00025077811845534666}, "ground_truth": 0}, {"key": "40172531", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9929543974595635, "res": {"Yes": 0.9929543974595635, "No": 0.007045635020051775}, "ground_truth": 0}, {"key": "40172531", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.6962704824031944, "res": {"Yes": 0.6962704824031944, "No": 0.30372934094201914}, "ground_truth": 0}, {"key": "40172531", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 2.7552292485512983e-07}, "ground_truth": 1}, {"key": "40172531", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999952483661937, "res": {"Yes": 0.999952483661937, "No": 4.7434914859916786e-05}, "ground_truth": 0}, {"key": "40172531", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9496132941955272, "res": {"Yes": 0.9496132941955272, "No": 0.05038664092759982}, "ground_truth": 0}, {"key": "37035874", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.1838401038478795, "res": {"No": 0.8161594652582812, "Yes": 0.1838401038478795}, "ground_truth": 0}, {"key": "37035874", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999967070975216, "res": {"Yes": 0.9999967070975216, "No": 3.270623233274064e-06}, "ground_truth": 0}, {"key": "37035874", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999924157887603, "res": {"Yes": 0.9999924157887603, "No": 7.540002834621385e-06}, "ground_truth": 1}, {"key": "37035874", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999819260003368, "res": {"Yes": 0.9999819260003368, "No": 1.8008224279508666e-05}, "ground_truth": 0}, {"key": "37035874", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999853828508316, "res": {"Yes": 0.9999853828508316, "No": 1.4502956975263303e-05}, "ground_truth": 0}, {"key": "36404465", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9907086438983321, "res": {"Yes": 0.9907086438983321, "No": 0.00929115877825634}, "ground_truth": 0}, {"key": "36404465", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999900317366834, "res": {"Yes": 0.9999900317366834, "No": 9.839329396349218e-06}, "ground_truth": 0}, {"key": "36404465", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998813249924359, "res": {"Yes": 0.9998813249924359, "No": 0.00011856893933613269}, "ground_truth": 1}, {"key": "36404465", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999846676395419, "res": {"Yes": 0.9999846676395419, "No": 1.5228665774247123e-05}, "ground_truth": 0}, {"key": "36404465", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.22154264052736886, "res": {"No": 0.77845700284817, "Yes": 0.22154264052736886}, "ground_truth": 0}, {"key": "39602052", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.0005109928295759597, "res": {"No": 0.9994889200458753, "Yes": 0.0005109928295759597}, "ground_truth": 0}, {"key": "39602052", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.13093948618841406, "res": {"No": 0.8690602704698178, "Yes": 0.13093948618841406}, "ground_truth": 0}, {"key": "39602052", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996922331199457, "res": {"Yes": 0.9996922331199457, "No": 0.0003076656243336183}, "ground_truth": 1}, {"key": "39602052", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996776985102879, "res": {"Yes": 0.9996776985102879, "No": 0.0003222380499492974}, "ground_truth": 0}, {"key": "39602052", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.8668759170788525, "res": {"Yes": 0.8668759170788525, "No": 0.13312394066448316}, "ground_truth": 0}, {"key": "33792789", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.001350230446484425, "res": {"No": 0.9986497247430934, "Yes": 0.001350230446484425}, "ground_truth": 0}, {"key": "33792789", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9870122596511617, "res": {"Yes": 0.9870122596511617, "No": 0.012987680820465069}, "ground_truth": 0}, {"key": "33792789", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999533180476362, "res": {"Yes": 0.9999533180476362, "No": 4.6577871040260344e-05}, "ground_truth": 1}, {"key": "33792789", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999099308804668, "res": {"Yes": 0.9999099308804668, "No": 8.994663571413419e-05}, "ground_truth": 0}, {"key": "33792789", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997049803968442, "res": {"Yes": 0.9997049803968442, "No": 0.00029501185971760263}, "ground_truth": 0}, {"key": "32776626", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.8120016880039673, "res": {"Yes": 0.8120016880039673, "No": 0.18799821704506597}, "ground_truth": 0}, {"key": "32776626", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9952556750329071, "res": {"Yes": 0.9952556750329071, "No": 0.004744326480441786}, "ground_truth": 0}, {"key": "32776626", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9989668921237764, "res": {"Yes": 0.9989668921237764, "No": 0.0010330806689261391}, "ground_truth": 1}, {"key": "32776626", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999716893632173, "res": {"Yes": 0.999716893632173, "No": 0.00028307547493349085}, "ground_truth": 0}, {"key": "32776626", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9986412838916974, "res": {"Yes": 0.9986412838916974, "No": 0.0013587293736913903}, "ground_truth": 0}, {"key": "37195090", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.000102786760720757, "res": {"No": 0.9998970581288781, "Yes": 0.000102786760720757}, "ground_truth": 0}, {"key": "37195090", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998458181472437, "res": {"Yes": 0.9998458181472437, "No": 0.0001540810184589858}, "ground_truth": 0}, {"key": "37195090", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999920581810364, "res": {"Yes": 0.9999920581810364, "No": 7.887849661389836e-06}, "ground_truth": 1}, {"key": "37195090", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999984951481323, "res": {"Yes": 0.9999984951481323, "No": 1.3699011143991272e-06}, "ground_truth": 0}, {"key": "37195090", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 1.9753777600179803e-07}, "ground_truth": 0}, {"key": "33981824", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.42921977293787333, "res": {"No": 0.5707774786053054, "Yes": 0.42921977293787333}, "ground_truth": 0}, {"key": "33981824", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9993031772937845, "res": {"Yes": 0.9993031772937845, "No": 0.0006967685279451871}, "ground_truth": 0}, {"key": "33981824", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999214625108432, "res": {"Yes": 0.999214625108432, "No": 0.0007852701965328398}, "ground_truth": 1}, {"key": "33981824", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8266522815445694, "res": {"Yes": 0.8266522815445694, "No": 0.17334712360150328}, "ground_truth": 0}, {"key": "33981824", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999094541095266, "res": {"Yes": 0.9999094541095266, "No": 9.04544634669492e-05}, "ground_truth": 0}, {"key": "39569142", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.7564483371668512, "res": {"Yes": 0.7564483371668512, "No": 0.24355085698809828}, "ground_truth": 0}, {"key": "39569142", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9919903545305792, "res": {"Yes": 0.9919903545305792, "No": 0.008009475505566655}, "ground_truth": 0}, {"key": "39569142", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9968315965178736, "res": {"Yes": 0.9968315965178736, "No": 0.0031683184159358403}, "ground_truth": 1}, {"key": "39569142", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9945862451764429, "res": {"Yes": 0.9945862451764429, "No": 0.005413659404009147}, "ground_truth": 0}, {"key": "39569142", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9130890477597565, "res": {"Yes": 0.9130890477597565, "No": 0.08691042399431712}, "ground_truth": 0}, {"key": "40268210", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 8.470236793223984e-05, "res": {"No": 0.9999151753838112, "Yes": 8.470236793223984e-05}, "ground_truth": 0}, {"key": "40268210", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9982867798399858, "res": {"Yes": 0.9982867798399858, "No": 0.001713203104703357}, "ground_truth": 0}, {"key": "40268210", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9975024949681934, "res": {"Yes": 0.9975024949681934, "No": 0.0024974551638877727}, "ground_truth": 1}, {"key": "40268210", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998316352049432, "res": {"Yes": 0.9998316352049432, "No": 0.00016829990857399754}, "ground_truth": 0}, {"key": "40268210", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999462853826817, "res": {"Yes": 0.9999462853826817, "No": 5.368268757262597e-05}, "ground_truth": 0}, {"key": "34925159", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.001078118701336742, "res": {"No": 0.9989218019736359, "Yes": 0.001078118701336742}, "ground_truth": 0}, {"key": "34925159", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.0011208854787641965, "res": {"No": 0.998879106171314, "Yes": 0.0011208854787641965}, "ground_truth": 0}, {"key": "34925159", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995526268112412, "res": {"Yes": 0.9995526268112412, "No": 0.00044732372878751483}, "ground_truth": 1}, {"key": "34925159", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.998908956621099, "res": {"Yes": 0.998908956621099, "No": 0.001090963094419365}, "ground_truth": 0}, {"key": "34925159", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.41882127557442916, "res": {"No": 0.5811785815972941, "Yes": 0.41882127557442916}, "ground_truth": 0}, {"key": "36181903", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9997575130947032, "res": {"Yes": 0.9997575130947032, "No": 0.00024243401201958493}, "ground_truth": 0}, {"key": "36181903", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.999110017069941, "res": {"Yes": 0.999110017069941, "No": 0.0008899655534285263}, "ground_truth": 0}, {"key": "36181903", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9894279224379308, "res": {"Yes": 0.9894279224379308, "No": 0.010572002983842695}, "ground_truth": 1}, {"key": "36181903", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.41825220799576207, "res": {"No": 0.5817477176827234, "Yes": 0.41825220799576207}, "ground_truth": 0}, {"key": "36181903", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.7025466319340458, "res": {"Yes": 0.7025466319340458, "No": 0.29745310401052444}, "ground_truth": 0}, {"key": "38620559", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9987627979662713, "res": {"Yes": 0.9987627979662713, "No": 0.0012371789890005497}, "ground_truth": 0}, {"key": "38620559", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9942632456799502, "res": {"Yes": 0.9942632456799502, "No": 0.005736727805097522}, "ground_truth": 0}, {"key": "38620559", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998175716220805, "res": {"Yes": 0.9998175716220805, "No": 0.00018233862913793585}, "ground_truth": 1}, {"key": "38620559", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997681196883961, "res": {"Yes": 0.9997681196883961, "No": 0.0002318372363426909}, "ground_truth": 0}, {"key": "38620559", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.999508329509569, "res": {"Yes": 0.999508329509569, "No": 0.0004916010718230777}, "ground_truth": 0}, {"key": "32719657", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.011217404877283635, "res": {"No": 0.988782498670474, "Yes": 0.011217404877283635}, "ground_truth": 0}, {"key": "32719657", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9801188785808498, "res": {"Yes": 0.9801188785808498, "No": 0.019881114857002787}, "ground_truth": 0}, {"key": "32719657", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9343072064548814, "res": {"Yes": 0.9343072064548814, "No": 0.06569256669493048}, "ground_truth": 1}, {"key": "32719657", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9895528060166341, "res": {"Yes": 0.9895528060166341, "No": 0.010447124561647907}, "ground_truth": 0}, {"key": "32719657", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.00014834365242139466, "res": {"No": 0.9998515353819624, "Yes": 0.00014834365242139466}, "ground_truth": 0}, {"key": "37530914", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 1.7379904951144162e-06, "res": {"No": 0.9999981375378344, "Yes": 1.7379904951144162e-06}, "ground_truth": 0}, {"key": "37530914", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999561788061766, "res": {"Yes": 0.9999561788061766, "No": 4.3704914597533755e-05}, "ground_truth": 0}, {"key": "37530914", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996661395438096, "res": {"Yes": 0.9996661395438096, "No": 0.0003338075261855019}, "ground_truth": 1}, {"key": "37530914", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9986947806407038, "res": {"Yes": 0.9986947806407038, "No": 0.0013051973807861436}, "ground_truth": 0}, {"key": "37530914", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998686983108214, "res": {"Yes": 0.9998686983108214, "No": 0.00013126006155028257}, "ground_truth": 0}, {"key": "33306933", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.04896762076552969, "res": {"No": 0.951032259833493, "Yes": 0.04896762076552969}, "ground_truth": 0}, {"key": "33306933", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9877948040627957, "res": {"Yes": 0.9877948040627957, "No": 0.012205068044739377}, "ground_truth": 0}, {"key": "33306933", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999920581810364, "res": {"Yes": 0.9999920581810364, "No": 7.90806842549892e-06}, "ground_truth": 1}, {"key": "33306933", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "No": 9.467374747361011e-07}, "ground_truth": 0}, {"key": "33306933", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9989162127221141, "res": {"Yes": 0.9989162127221141, "No": 0.0010836871766920506}, "ground_truth": 0}, {"key": "33837212", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9999976607241361, "res": {"Yes": 0.9999976607241361, "No": 2.2266908463579454e-06}, "ground_truth": 0}, {"key": "33837212", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.00674882407760051, "res": {"No": 0.9932511388486165, "Yes": 0.00674882407760051}, "ground_truth": 0}, {"key": "33837212", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9918741080086653, "res": {"Yes": 0.9918741080086653, "No": 0.008125814578306847}, "ground_truth": 1}, {"key": "33837212", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999981375378344, "res": {"Yes": 0.9999981375378344, "No": 1.8056317995016504e-06}, "ground_truth": 0}, {"key": "33837212", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 4.148607148161827e-07, "res": {"No": 0.9999994487765019, "Yes": 4.148607148161827e-07}, "ground_truth": 0}, {"key": "40945179", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.26257133013914724, "res": {"No": 0.7374284149159304, "Yes": 0.26257133013914724}, "ground_truth": 0}, {"key": "40945179", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999013490263273, "res": {"Yes": 0.9999013490263273, "No": 9.860140963074674e-05}, "ground_truth": 0}, {"key": "40945179", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995567971134283, "res": {"Yes": 0.9995567971134283, "No": 0.00044315077319987263}, "ground_truth": 1}, {"key": "40945179", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9993892575391485, "res": {"Yes": 0.9993892575391485, "No": 0.0006106385774999366}, "ground_truth": 0}, {"key": "40945179", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9994779626589337, "res": {"Yes": 0.9994779626589337, "No": 0.0005219289972119752}, "ground_truth": 0}, {"key": "34152358", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.00018342824543404244, "res": {"No": 0.9998165026676953, "Yes": 0.00018342824543404244}, "ground_truth": 0}, {"key": "34152358", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999711979237877, "res": {"Yes": 0.9999711979237877, "No": 2.8763595946991817e-05}, "ground_truth": 0}, {"key": "34152358", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999987335551019, "res": {"Yes": 0.9999987335551019, "No": 1.2376452198682279e-06}, "ground_truth": 1}, {"key": "34152358", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999816875976741, "res": {"Yes": 0.9999816875976741, "No": 1.8226827695906796e-05}, "ground_truth": 0}, {"key": "34152358", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 6.505907764487796e-07}, "ground_truth": 0}, {"key": "34136541", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.07723634056531173, "res": {"No": 0.9227635931968358, "Yes": 0.07723634056531173}, "ground_truth": 0}, {"key": "34136541", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996126584061322, "res": {"Yes": 0.9996126584061322, "No": 0.00038729747149834735}, "ground_truth": 0}, {"key": "34136541", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999644035315693, "res": {"Yes": 0.9999644035315693, "No": 3.5457012880327084e-05}, "ground_truth": 1}, {"key": "34136541", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999882437011058, "res": {"Yes": 0.9999882437011058, "No": 1.1678044704671572e-05}, "ground_truth": 0}, {"key": "34136541", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9948641561764086, "res": {"Yes": 0.9948641561764086, "No": 0.005135829038730679}, "ground_truth": 0}, {"key": "37469603", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9966789311894231, "res": {"Yes": 0.9966789311894231, "No": 0.003321014401838406}, "ground_truth": 0}, {"key": "37469603", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9464981981806719, "res": {"Yes": 0.9464981981806719, "No": 0.053501552864325314}, "ground_truth": 0}, {"key": "37469603", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999728667271139, "res": {"Yes": 0.9999728667271139, "No": 2.7059944127155067e-05}, "ground_truth": 1}, {"key": "37469603", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999928926002577, "res": {"Yes": 0.9999928926002577, "No": 6.996419778245584e-06}, "ground_truth": 0}, {"key": "37469603", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.7499064301761958, "res": {"Yes": 0.7499064301761958, "No": 0.2500933827162942}, "ground_truth": 0}, {"key": "37353611", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.992825703618687, "res": {"Yes": 0.992825703618687, "No": 0.007174080348332007}, "ground_truth": 0}, {"key": "37353611", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9986435456170383, "res": {"Yes": 0.9986435456170383, "No": 0.0013562930489743514}, "ground_truth": 0}, {"key": "37353611", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999533180476362, "res": {"Yes": 0.9999533180476362, "No": 4.658229614819847e-05}, "ground_truth": 1}, {"key": "37353611", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997415548027153, "res": {"Yes": 0.9997415548027153, "No": 0.0002583185615527672}, "ground_truth": 0}, {"key": "37353611", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9698023045490346, "res": {"Yes": 0.9698023045490346, "No": 0.030197483073848216}, "ground_truth": 0}, {"key": "37211649", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9810089059672813, "res": {"Yes": 0.9810089059672813, "No": 0.01899089357948248}, "ground_truth": 0}, {"key": "37211649", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998091134211619, "res": {"Yes": 0.9998091134211619, "No": 0.00019079690886471658}, "ground_truth": 0}, {"key": "37211649", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999902701413353, "res": {"Yes": 0.9999902701413353, "No": 9.58514551546256e-06}, "ground_truth": 1}, {"key": "37211649", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999978991308068, "res": {"Yes": 0.9999978991308068, "No": 1.96275590173913e-06}, "ground_truth": 0}, {"key": "37211649", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999950382530095, "res": {"Yes": 0.9999950382530095, "No": 4.727003810687593e-06}, "ground_truth": 0}, {"key": "37320976", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9999759659438225, "res": {"Yes": 0.9999759659438225, "No": 2.3982359948707882e-05}, "ground_truth": 0}, {"key": "37320976", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 7.499517243657998e-07}, "ground_truth": 0}, {"key": "37320976", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 2.2888480383067068e-07}, "ground_truth": 1}, {"key": "37320976", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 8.43160926308494e-07}, "ground_truth": 0}, {"key": "37320976", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999974223173222, "res": {"Yes": 0.9999974223173222, "No": 2.458306155149548e-06}, "ground_truth": 0}, {"key": "34492412", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.007368556894699432, "res": {"No": 0.992631154062559, "Yes": 0.007368556894699432}, "ground_truth": 0}, {"key": "34492412", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996305285214034, "res": {"Yes": 0.9996305285214034, "No": 0.0003693907614950873}, "ground_truth": 0}, {"key": "34492412", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995582232321059, "res": {"Yes": 0.9995582232321059, "No": 0.0004417353548009731}, "ground_truth": 1}, {"key": "34492412", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999762043451211, "res": {"Yes": 0.9999762043451211, "No": 2.3730462443274868e-05}, "ground_truth": 0}, {"key": "34492412", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.17444192513178364, "res": {"No": 0.8255568257617192, "Yes": 0.17444192513178364}, "ground_truth": 0}, {"key": "36655016", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9901217357589969, "res": {"Yes": 0.9901217357589969, "No": 0.009878195752919264}, "ground_truth": 0}, {"key": "36655016", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9974395151476906, "res": {"Yes": 0.9974395151476906, "No": 0.0025604224873146775}, "ground_truth": 0}, {"key": "36655016", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998045845560857, "res": {"Yes": 0.9998045845560857, "No": 0.00019534682294392012}, "ground_truth": 1}, {"key": "36655016", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9912574800335121, "res": {"Yes": 0.9912574800335121, "No": 0.008742404611336699}, "ground_truth": 0}, {"key": "36655016", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.5914668151568431, "res": {"Yes": 0.5914668151568431, "No": 0.40853284254779487}, "ground_truth": 0}, {"key": "35220773", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.28825666436889247, "res": {"No": 0.7117431478904582, "Yes": 0.28825666436889247}, "ground_truth": 0}, {"key": "35220773", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9940491650776674, "res": {"Yes": 0.9940491650776674, "No": 0.005950809608652559}, "ground_truth": 0}, {"key": "35220773", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999698867214138, "res": {"Yes": 0.9999698867214138, "No": 2.9992071336976487e-05}, "ground_truth": 1}, {"key": "35220773", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996836531082333, "res": {"Yes": 0.9996836531082333, "No": 0.00031632322601924566}, "ground_truth": 0}, {"key": "35220773", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9982950880164543, "res": {"Yes": 0.9982950880164543, "No": 0.0017048785541860006}, "ground_truth": 0}, {"key": "31569808", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9991372688167309, "res": {"Yes": 0.9991372688167309, "No": 0.000862614210374257}, "ground_truth": 0}, {"key": "31569808", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.999826391131764, "res": {"Yes": 0.999826391131764, "No": 0.00017353625810126065}, "ground_truth": 0}, {"key": "31569808", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9974207732357957, "res": {"Yes": 0.9974207732357957, "No": 0.002579019476895419}, "ground_truth": 1}, {"key": "31569808", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9313924169761663, "res": {"Yes": 0.9313924169761663, "No": 0.0686058411442368}, "ground_truth": 0}, {"key": "31569808", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9981900836775399, "res": {"Yes": 0.9981900836775399, "No": 0.0018098867585475543}, "ground_truth": 0}, {"key": "37696256", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9995334511277878, "res": {"Yes": 0.9995334511277878, "No": 0.00046643121389806637}, "ground_truth": 0}, {"key": "37696256", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9987070280095036, "res": {"Yes": 0.9987070280095036, "No": 0.0012928799882656735}, "ground_truth": 0}, {"key": "37696256", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997159402826034, "res": {"Yes": 0.9997159402826034, "No": 0.000283953518628215}, "ground_truth": 1}, {"key": "37696256", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999419942923962, "res": {"Yes": 0.9999419942923962, "No": 5.792531787275712e-05}, "ground_truth": 0}, {"key": "37696256", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999982567412194, "res": {"Yes": 0.9999982567412194, "No": 1.7192042835951907e-06}, "ground_truth": 0}, {"key": "36874328", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 2.593580789200612e-05, "res": {"No": 0.9999738203326934, "Yes": 2.593580789200612e-05}, "ground_truth": 0}, {"key": "36874328", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.979349003689521, "res": {"Yes": 0.979349003689521, "No": 0.020650944956845965}, "ground_truth": 0}, {"key": "36874328", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.998843422451967, "res": {"Yes": 0.998843422451967, "No": 0.0011564954622415025}, "ground_truth": 1}, {"key": "36874328", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9986169133471758, "res": {"Yes": 0.9986169133471758, "No": 0.0013828799971788506}, "ground_truth": 0}, {"key": "36874328", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9237092835859578, "res": {"Yes": 0.9237092835859578, "No": 0.0762900771200741}, "ground_truth": 0}, {"key": "24532377", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9999940846288958, "res": {"Yes": 0.9999940846288958, "No": 5.835124241459963e-06}, "ground_truth": 0}, {"key": "24532377", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997310713178026, "res": {"Yes": 0.9997310713178026, "No": 0.00026890107417414013}, "ground_truth": 0}, {"key": "24532377", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999710787232282, "res": {"Yes": 0.9999710787232282, "No": 2.8803345914029393e-05}, "ground_truth": 1}, {"key": "24532377", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999907469518097, "res": {"Yes": 0.9999907469518097, "No": 9.113196774123856e-06}, "ground_truth": 0}, {"key": "24532377", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999807339855921, "res": {"Yes": 0.9999807339855921, "No": 1.919620413074035e-05}, "ground_truth": 0}, {"key": "39560618", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9987712400444995, "res": {"Yes": 0.9987712400444995, "No": 0.0012287330267826656}, "ground_truth": 0}, {"key": "39560618", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999772771531678, "res": {"Yes": 0.9999772771531678, "No": 2.2624345431527595e-05}, "ground_truth": 1}, {"key": "39560618", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.99995629800496, "res": {"Yes": 0.99995629800496, "No": 4.362365538661573e-05}, "ground_truth": 0}, {"key": "39560618", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.02975986515825872, "res": {"No": 0.9702398597113006, "Yes": 0.02975986515825872}, "ground_truth": 0}, {"key": "34922693", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 7.487264855181195e-06, "res": {"No": 0.9999924157887603, "Yes": 7.487264855181195e-06}, "ground_truth": 0}, {"key": "34922693", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999932502087799, "res": {"Yes": 0.9999932502087799, "No": 6.620888384265593e-06}, "ground_truth": 0}, {"key": "34922693", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9370137850052613, "res": {"Yes": 0.9370137850052613, "No": 0.06298613853835337}, "ground_truth": 1}, {"key": "34922693", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996776985102879, "res": {"Yes": 0.9996776985102879, "No": 0.00032221227193643555}, "ground_truth": 0}, {"key": "34922693", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999515300767047, "res": {"Yes": 0.9999515300767047, "No": 4.841789893010694e-05}, "ground_truth": 0}, {"key": "33629577", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.0045652381391404154, "res": {"No": 0.9954344847946525, "Yes": 0.0045652381391404154}, "ground_truth": 0}, {"key": "33629577", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.36678457220287625, "res": {"No": 0.6332152609214481, "Yes": 0.36678457220287625}, "ground_truth": 0}, {"key": "33629577", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9944347371721153, "res": {"Yes": 0.9944347371721153, "No": 0.005564962682652132}, "ground_truth": 1}, {"key": "33629577", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9990169750858693, "res": {"Yes": 0.9990169750858693, "No": 0.0009828555467890567}, "ground_truth": 0}, {"key": "33629577", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.8647386469143383, "res": {"Yes": 0.8647386469143383, "No": 0.1352610316096825}, "ground_truth": 0}, {"key": "32284359", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9820810307433568, "res": {"Yes": 0.9820810307433568, "No": 0.017919021082860184}, "ground_truth": 0}, {"key": "32284359", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999872900832717, "res": {"Yes": 0.9999872900832717, "No": 1.2670539292684695e-05}, "ground_truth": 0}, {"key": "32284359", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 3.827254291606015e-07}, "ground_truth": 1}, {"key": "32284359", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 4.979012862258719e-07}, "ground_truth": 0}, {"key": "32284359", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999599931652947, "res": {"Yes": 0.9999599931652947, "No": 3.9958545649655795e-05}, "ground_truth": 0}, {"key": "28082962", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9995722831769311, "res": {"Yes": 0.9995722831769311, "No": 0.00042762025966053854}, "ground_truth": 0}, {"key": "28082962", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9924679034866323, "res": {"Yes": 0.9924679034866323, "No": 0.007532050224953676}, "ground_truth": 0}, {"key": "28082962", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.08969715744463023, "res": {"No": 0.9103016872161184, "Yes": 0.08969715744463023}, "ground_truth": 1}, {"key": "28082962", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999903893441826, "res": {"Yes": 0.9999903893441826, "No": 9.491888179115727e-06}, "ground_truth": 0}, {"key": "28082962", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 4.1902384891186875e-07}, "ground_truth": 0}, {"key": "24796803", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9478296802991383, "res": {"Yes": 0.9478296802991383, "No": 0.05217026929951364}, "ground_truth": 0}, {"key": "24796803", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994989207821441, "res": {"Yes": 0.9994989207821441, "No": 0.0005009117600682408}, "ground_truth": 0}, {"key": "24796803", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999816741023978, "res": {"Yes": 0.999816741023978, "No": 0.00018314158788282128}, "ground_truth": 1}, {"key": "24796803", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998899066707154, "res": {"Yes": 0.9998899066707154, "No": 0.00011002675439200188}, "ground_truth": 0}, {"key": "24796803", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996607808478312, "res": {"Yes": 0.9996607808478312, "No": 0.0003391335077305647}, "ground_truth": 0}, {"key": "35466150", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.0006219678218023819, "res": {"No": 0.9993779438272142, "Yes": 0.0006219678218023819}, "ground_truth": 0}, {"key": "35466150", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9822797454788017, "res": {"Yes": 0.9822797454788017, "No": 0.017720268173057413}, "ground_truth": 0}, {"key": "35466150", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.993634099747712, "res": {"Yes": 0.993634099747712, "No": 0.006365719584654328}, "ground_truth": 1}, {"key": "35466150", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.25255404842614065, "res": {"No": 0.7474434714912259, "Yes": 0.25255404842614065}, "ground_truth": 0}, {"key": "35466150", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.6224359546702862, "res": {"Yes": 0.6224359546702862, "No": 0.3775639351491946}, "ground_truth": 0}, {"key": "35754289", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9992039125458926, "res": {"Yes": 0.9992039125458926, "No": 0.0007959803964488312}, "ground_truth": 0}, {"key": "35754289", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9922725464771435, "res": {"Yes": 0.9922725464771435, "No": 0.0077274040556310946}, "ground_truth": 0}, {"key": "35754289", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9963974036780355, "res": {"Yes": 0.9963974036780355, "No": 0.0036026287199509488}, "ground_truth": 1}, {"key": "35754289", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998337805059911, "res": {"Yes": 0.9998337805059911, "No": 0.00016612533904998577}, "ground_truth": 0}, {"key": "35754289", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.017407537097990897, "res": {"No": 0.9825924755212915, "Yes": 0.017407537097990897}, "ground_truth": 0}, {"key": "36678662", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9954962646698935, "res": {"Yes": 0.9954962646698935, "No": 0.004503683326537025}, "ground_truth": 0}, {"key": "36678662", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.990715779007679, "res": {"Yes": 0.990715779007679, "No": 0.009284174453290415}, "ground_truth": 0}, {"key": "36678662", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.782246633542072, "res": {"Yes": 0.782246633542072, "No": 0.2177530187435088}, "ground_truth": 1}, {"key": "36678662", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.11090259338532552, "res": {"No": 0.8890972561127534, "Yes": 0.11090259338532552}, "ground_truth": 0}, {"key": "36678662", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.2425362003383351, "res": {"No": 0.7574635966357, "Yes": 0.2425362003383351}, "ground_truth": 0}, {"key": "35399671", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9997998173492212, "res": {"Yes": 0.9997998173492212, "No": 0.0002000974620226503}, "ground_truth": 0}, {"key": "35399671", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9985362065922052, "res": {"Yes": 0.9985362065922052, "No": 0.0014637238621262453}, "ground_truth": 0}, {"key": "35399671", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9991164373715391, "res": {"Yes": 0.9991164373715391, "No": 0.0008835338088385599}, "ground_truth": 1}, {"key": "35399671", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9956205658125579, "res": {"Yes": 0.9956205658125579, "No": 0.004379473794731973}, "ground_truth": 0}, {"key": "35399671", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9993511583888156, "res": {"Yes": 0.9993511583888156, "No": 0.0006487455388756976}, "ground_truth": 0}, {"key": "36888180", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.8603169337568203, "res": {"Yes": 0.8603169337568203, "No": 0.13968258227278224}, "ground_truth": 0}, {"key": "36888180", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9571712259934385, "res": {"Yes": 0.9571712259934385, "No": 0.042828560394603164}, "ground_truth": 0}, {"key": "36888180", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.993591848343223, "res": {"Yes": 0.993591848343223, "No": 0.006408013259550351}, "ground_truth": 1}, {"key": "36888180", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9443910412476163, "res": {"Yes": 0.9443910412476163, "No": 0.05560836163022908}, "ground_truth": 0}, {"key": "36888180", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.2651867458130184, "res": {"No": 0.7348118639129041, "Yes": 0.2651867458130184}, "ground_truth": 0}, {"key": "28061069", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.850019175789406, "res": {"Yes": 0.850019175789406, "No": 0.14998064898119598}, "ground_truth": 0}, {"key": "28061069", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.8541728202793931, "res": {"Yes": 0.8541728202793931, "No": 0.1458271299854413}, "ground_truth": 0}, {"key": "28061069", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999637905422071, "res": {"Yes": 0.999637905422071, "No": 0.0003620207598973823}, "ground_truth": 1}, {"key": "28061069", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998702477485516, "res": {"Yes": 0.9998702477485516, "No": 0.0001296410604194687}, "ground_truth": 0}, {"key": "28061069", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996954469456997, "res": {"Yes": 0.9996954469456997, "No": 0.00030443985830302684}, "ground_truth": 0}, {"key": "22259982", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9773053601507784, "res": {"Yes": 0.9773053601507784, "No": 0.0226945924712168}, "ground_truth": 0}, {"key": "22259982", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9992109326774816, "res": {"Yes": 0.9992109326774816, "No": 0.0007889575822266787}, "ground_truth": 0}, {"key": "22259982", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999914621674475, "res": {"Yes": 0.9999914621674475, "No": 8.329351133146521e-06}, "ground_truth": 1}, {"key": "22259982", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999691715222073, "res": {"Yes": 0.9999691715222073, "No": 3.079691608833185e-05}, "ground_truth": 0}, {"key": "22259982", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998288939803051, "res": {"Yes": 0.9998288939803051, "No": 0.0001710282175778429}, "ground_truth": 0}, {"key": "34026805", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9996405269659191, "res": {"Yes": 0.9996405269659191, "No": 0.00035944711453906935}, "ground_truth": 0}, {"key": "34026805", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9948528297124665, "res": {"Yes": 0.9948528297124665, "No": 0.005147106844138339}, "ground_truth": 0}, {"key": "34026805", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8343264219269052, "res": {"Yes": 0.8343264219269052, "No": 0.16567318437892367}, "ground_truth": 1}, {"key": "34026805", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9786720218629542, "res": {"Yes": 0.9786720218629542, "No": 0.021327840372157843}, "ground_truth": 0}, {"key": "34026805", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.000571033660476123, "res": {"No": 0.9994288995399836, "Yes": 0.000571033660476123}, "ground_truth": 0}, {"key": "36713809", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9996790093285327, "res": {"Yes": 0.9996790093285327, "No": 0.0003209526350765893}, "ground_truth": 0}, {"key": "36713809", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9992949652838554, "res": {"Yes": 0.9992949652838554, "No": 0.0007049196734586486}, "ground_truth": 0}, {"key": "36713809", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999408023272514, "res": {"Yes": 0.9999408023272514, "No": 5.915952199632944e-05}, "ground_truth": 1}, {"key": "36713809", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999005146829923, "res": {"Yes": 0.9999005146829923, "No": 9.940342382255876e-05}, "ground_truth": 0}, {"key": "36713809", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9992746070547645, "res": {"Yes": 0.9992746070547645, "No": 0.0007253306051106824}, "ground_truth": 0}, {"key": "39726411", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997745552364907, "res": {"Yes": 0.9997745552364907, "No": 0.00022535221043014443}, "ground_truth": 0}, {"key": "39726411", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999658339276736, "res": {"Yes": 0.9999658339276736, "No": 3.410989635807263e-05}, "ground_truth": 1}, {"key": "39726411", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999962302846054, "res": {"Yes": 0.9999962302846054, "No": 3.693097578081682e-06}, "ground_truth": 0}, {"key": "39726411", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998756112369238, "res": {"Yes": 0.9998756112369238, "No": 0.00012424899270778436}, "ground_truth": 0}, {"key": "37069841", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.005455988315666475, "res": {"No": 0.9945439140997037, "Yes": 0.005455988315666475}, "ground_truth": 0}, {"key": "37069841", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999899125338788, "res": {"Yes": 0.9999899125338788, "No": 9.944638462643936e-06}, "ground_truth": 0}, {"key": "37069841", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9855283733371542, "res": {"Yes": 0.9855283733371542, "No": 0.014471331856051607}, "ground_truth": 1}, {"key": "37069841", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9990764614245015, "res": {"Yes": 0.9990764614245015, "No": 0.0009234892489756989}, "ground_truth": 0}, {"key": "37069841", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 1.0931305576448234e-07, "res": {"No": 0.9999996871837189, "Yes": 1.0931305576448234e-07}, "ground_truth": 0}, {"key": "38894693", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9982419949044355, "res": {"Yes": 0.9982419949044355, "No": 0.001757944588219571}, "ground_truth": 0}, {"key": "38894693", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997473942801988, "res": {"Yes": 0.9997473942801988, "No": 0.00025251598609136533}, "ground_truth": 0}, {"key": "38894693", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999920581810364, "res": {"Yes": 0.9999920581810364, "No": 7.832200914805852e-06}, "ground_truth": 1}, {"key": "38894693", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996150415555235, "res": {"Yes": 0.9996150415555235, "No": 0.00038487998482536565}, "ground_truth": 0}, {"key": "38894693", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.6517006814464222, "res": {"Yes": 0.6517006814464222, "No": 0.3482983635546706}, "ground_truth": 0}, {"key": "33946032", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9989590402748622, "res": {"Yes": 0.9989590402748622, "No": 0.001040889544672974}, "ground_truth": 0}, {"key": "33946032", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9993808029816449, "res": {"Yes": 0.9993808029816449, "No": 0.0006191902388154176}, "ground_truth": 0}, {"key": "33946032", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9990395761025471, "res": {"Yes": 0.9990395761025471, "No": 0.0009603587935939508}, "ground_truth": 1}, {"key": "33946032", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997818250336202, "res": {"Yes": 0.9997818250336202, "No": 0.0002181121888060154}, "ground_truth": 0}, {"key": "33946032", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.971129999084248, "res": {"Yes": 0.971129999084248, "No": 0.028869941835880146}, "ground_truth": 0}, {"key": "39035311", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.6027337934281791, "res": {"Yes": 0.6027337934281791, "No": 0.39726577652708905}, "ground_truth": 0}, {"key": "39035311", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.8817401331545298, "res": {"Yes": 0.8817401331545298, "No": 0.11825972665290463}, "ground_truth": 0}, {"key": "39035311", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997198728829297, "res": {"Yes": 0.9997198728829297, "No": 0.00028002602855232734}, "ground_truth": 1}, {"key": "39035311", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9944631491750892, "res": {"Yes": 0.9944631491750892, "No": 0.005536830798184737}, "ground_truth": 0}, {"key": "39035311", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9646862249595389, "res": {"Yes": 0.9646862249595389, "No": 0.035313251107528316}, "ground_truth": 0}, {"key": "27680038", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9990482586933288, "res": {"Yes": 0.9990482586933288, "No": 0.0009516801086176605}, "ground_truth": 0}, {"key": "27680038", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9964880674117291, "res": {"Yes": 0.9964880674117291, "No": 0.003511917831432189}, "ground_truth": 0}, {"key": "27680038", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995989627169574, "res": {"Yes": 0.9995989627169574, "No": 0.00040092206546802684}, "ground_truth": 1}, {"key": "27680038", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9988948088740633, "res": {"Yes": 0.9988948088740633, "No": 0.0011051513334955966}, "ground_truth": 0}, {"key": "27680038", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9951279296258368, "res": {"Yes": 0.9951279296258368, "No": 0.004872003647056529}, "ground_truth": 0}, {"key": "36901907", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 2.9479960987114456e-07, "res": {"No": 0.9999995679800934, "Yes": 2.9479960987114456e-07}, "ground_truth": 0}, {"key": "36901907", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.3663886220578093, "res": {"No": 0.6336111758407073, "Yes": 0.3663886220578093}, "ground_truth": 0}, {"key": "36901907", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9804282901719064, "res": {"Yes": 0.9804282901719064, "No": 0.01957167325905493}, "ground_truth": 1}, {"key": "36901907", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5889167538730309, "res": {"Yes": 0.5889167538730309, "No": 0.4110830980031554}, "ground_truth": 0}, {"key": "36901907", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9944583146042411, "res": {"Yes": 0.9944583146042411, "No": 0.005541644395353407}, "ground_truth": 0}, {"key": "21530542", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9987077423848962, "res": {"Yes": 0.9987077423848962, "No": 0.0012921851227805604}, "ground_truth": 0}, {"key": "21530542", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9921290246337632, "res": {"Yes": 0.9921290246337632, "No": 0.007870924909615768}, "ground_truth": 0}, {"key": "21530542", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999017066011127, "res": {"Yes": 0.9999017066011127, "No": 9.815663881979622e-05}, "ground_truth": 1}, {"key": "21530542", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999273332003598, "res": {"Yes": 0.9999273332003598, "No": 7.253226939685976e-05}, "ground_truth": 0}, {"key": "21530542", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9993869940049946, "res": {"Yes": 0.9993869940049946, "No": 0.0006128728904835149}, "ground_truth": 0}, {"key": "38192532", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9475346166215524, "res": {"Yes": 0.9475346166215524, "No": 0.05246531470554462}, "ground_truth": 0}, {"key": "38192532", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.1931728876743129, "res": {"No": 0.8068268450240018, "Yes": 0.1931728876743129}, "ground_truth": 0}, {"key": "38192532", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6361975058890936, "res": {"Yes": 0.6361975058890936, "No": 0.36380215111869396}, "ground_truth": 1}, {"key": "38192532", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.37927486473082944, "res": {"No": 0.6207245340015193, "Yes": 0.37927486473082944}, "ground_truth": 0}, {"key": "38192532", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.7938701659979046, "res": {"Yes": 0.7938701659979046, "No": 0.20612949318282311}, "ground_truth": 0}, {"key": "34102400", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9952783406465825, "res": {"Yes": 0.9952783406465825, "No": 0.004720946357555078}, "ground_truth": 0}, {"key": "34102400", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9984698820260144, "res": {"Yes": 0.9984698820260144, "No": 0.0015296664100460607}, "ground_truth": 0}, {"key": "34102400", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3166040941505241, "res": {"No": 0.6833947920341105, "Yes": 0.3166040941505241}, "ground_truth": 1}, {"key": "34102400", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.989543238076809, "res": {"Yes": 0.989543238076809, "No": 0.01045639422644687}, "ground_truth": 0}, {"key": "34102400", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.8961690875750306, "res": {"Yes": 0.8961690875750306, "No": 0.10383016951992957}, "ground_truth": 0}, {"key": "36133399", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.3329878420595144, "res": {"No": 0.6670120281638164, "Yes": 0.3329878420595144}, "ground_truth": 0}, {"key": "36133399", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998834704001013, "res": {"Yes": 0.9998834704001013, "No": 0.00011640273807063468}, "ground_truth": 0}, {"key": "36133399", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999981375378344, "res": {"Yes": 0.9999981375378344, "No": 1.7891015347812293e-06}, "ground_truth": 1}, {"key": "36133399", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998642883696566, "res": {"Yes": 0.9998642883696566, "No": 0.000135621586859381}, "ground_truth": 0}, {"key": "36133399", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.008086615430795363, "res": {"No": 0.9919131638205743, "Yes": 0.008086615430795363}, "ground_truth": 0}, {"key": "34314544", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.8396310414553533, "res": {"Yes": 0.8396310414553533, "No": 0.16036869033755558}, "ground_truth": 0}, {"key": "34314544", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.999713080199361, "res": {"Yes": 0.999713080199361, "No": 0.00028683426008782034}, "ground_truth": 0}, {"key": "34314544", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999870516788303, "res": {"Yes": 0.9999870516788303, "No": 1.2885537494295814e-05}, "ground_truth": 1}, {"key": "34314544", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997263044414649, "res": {"Yes": 0.9997263044414649, "No": 0.0002736318412909491}, "ground_truth": 0}, {"key": "34314544", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996617341547962, "res": {"Yes": 0.9996617341547962, "No": 0.0003381339970726162}, "ground_truth": 0}, {"key": "33460074", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 7.932914249892776e-05, "res": {"No": 0.999920539107184, "Yes": 7.932914249892776e-05}, "ground_truth": 0}, {"key": "33460074", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.999983475621529, "res": {"Yes": 0.999983475621529, "No": 1.6470115517274823e-05}, "ground_truth": 0}, {"key": "33460074", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9981605147306538, "res": {"Yes": 0.9981605147306538, "No": 0.0018392249848102474}, "ground_truth": 1}, {"key": "33460074", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996830572769599, "res": {"Yes": 0.9996830572769599, "No": 0.00031682182764593633}, "ground_truth": 0}, {"key": "33460074", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998250801202664, "res": {"Yes": 0.9998250801202664, "No": 0.00017481697298502873}, "ground_truth": 0}, {"key": "36191495", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.010858359878074587, "res": {"No": 0.9891410220287181, "Yes": 0.010858359878074587}, "ground_truth": 0}, {"key": "36191495", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995913367555803, "res": {"Yes": 0.9995913367555803, "No": 0.0004086299823622685}, "ground_truth": 0}, {"key": "36191495", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.0009130393304852967, "res": {"No": 0.9990869271449367, "Yes": 0.0009130393304852967}, "ground_truth": 1}, {"key": "36191495", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.2336153545719633, "res": {"No": 0.7663840071811197, "Yes": 0.2336153545719633}, "ground_truth": 0}, {"key": "36191495", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9994609329188965, "res": {"Yes": 0.9994609329188965, "No": 0.0005389827452989828}, "ground_truth": 0}, {"key": "39532668", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.8917754908090141, "res": {"Yes": 0.8917754908090141, "No": 0.1082242733423537}, "ground_truth": 0}, {"key": "39532668", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.993923020191394, "res": {"Yes": 0.993923020191394, "No": 0.006076952148850732}, "ground_truth": 0}, {"key": "39532668", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9984045249257605, "res": {"Yes": 0.9984045249257605, "No": 0.0015954006229834724}, "ground_truth": 1}, {"key": "39532668", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9966420995004167, "res": {"Yes": 0.9966420995004167, "No": 0.003357831165101871}, "ground_truth": 0}, {"key": "39532668", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999852636485811, "res": {"Yes": 0.9999852636485811, "No": 1.4694518865559092e-05}, "ground_truth": 0}, {"key": "20328247", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 6.990000662041113e-07}, "ground_truth": 0}, {"key": "20328247", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999932502087799, "res": {"Yes": 0.9999932502087799, "No": 6.719028381292388e-06}, "ground_truth": 1}, {"key": "20328247", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999772771531678, "res": {"Yes": 0.9999772771531678, "No": 2.2661751739283543e-05}, "ground_truth": 0}, {"key": "20328247", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999866940725246, "res": {"Yes": 0.9999866940725246, "No": 1.3170843462901961e-05}, "ground_truth": 0}, {"key": "39112675", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9960994965024733, "res": {"Yes": 0.9960994965024733, "No": 0.003900494262118723}, "ground_truth": 0}, {"key": "39112675", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999940846288958, "res": {"Yes": 0.9999940846288958, "No": 5.865768810297205e-06}, "ground_truth": 0}, {"key": "39112675", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999847868417213, "res": {"Yes": 0.9999847868417213, "No": 1.5148364282343836e-05}, "ground_truth": 1}, {"key": "39112675", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999946806438478, "res": {"Yes": 0.9999946806438478, "No": 5.217000176981604e-06}, "ground_truth": 0}, {"key": "39112675", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.5713599400849074, "res": {"Yes": 0.5713599400849074, "No": 0.4286378881755801}, "ground_truth": 0}, {"key": "31620300", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9990395761025471, "res": {"Yes": 0.9990395761025471, "No": 0.0009603222046209398}, "ground_truth": 0}, {"key": "31620300", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999947998470209, "res": {"Yes": 0.9999947998470209, "No": 5.0764213666653715e-06}, "ground_truth": 1}, {"key": "31620300", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9978247457021435, "res": {"Yes": 0.9978247457021435, "No": 0.002175176866101026}, "ground_truth": 0}, {"key": "31620300", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9246020687282542, "res": {"Yes": 0.9246020687282542, "No": 0.07539760261731811}, "ground_truth": 0}, {"key": "37518509", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.05923190182843443, "res": {"No": 0.9407678015183806, "Yes": 0.05923190182843443}, "ground_truth": 0}, {"key": "37518509", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.999910407653634, "res": {"Yes": 0.999910407653634, "No": 8.951028706631885e-05}, "ground_truth": 0}, {"key": "37518509", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994612903561739, "res": {"Yes": 0.9994612903561739, "No": 0.0005386212091858178}, "ground_truth": 1}, {"key": "37518509", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999559404106522, "res": {"Yes": 0.9999559404106522, "No": 4.380607725905754e-05}, "ground_truth": 0}, {"key": "37518509", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.5587566117582413, "res": {"Yes": 0.5587566117582413, "No": 0.44124295361791865}, "ground_truth": 0}, {"key": "35454095", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 2.841742891029094e-06, "res": {"No": 0.9999958726752174, "Yes": 2.841742891029094e-06}, "ground_truth": 0}, {"key": "35454095", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997957652387589, "res": {"Yes": 0.9997957652387589, "No": 0.00020416695336045805}, "ground_truth": 0}, {"key": "35454095", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994053219910994, "res": {"Yes": 0.9994053219910994, "No": 0.0005946350511572816}, "ground_truth": 1}, {"key": "35454095", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9991889159466383, "res": {"Yes": 0.9991889159466383, "No": 0.0008106525640642234}, "ground_truth": 0}, {"key": "35454095", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999882437011058, "res": {"Yes": 0.9999882437011058, "No": 1.1691821239049552e-05}, "ground_truth": 0}, {"key": "38542788", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9999456893978742, "res": {"Yes": 0.9999456893978742, "No": 5.4233812346342985e-05}, "ground_truth": 0}, {"key": "38542788", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.999996945503965, "res": {"Yes": 0.999996945503965, "No": 2.973246810712559e-06}, "ground_truth": 0}, {"key": "38542788", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999947998470209, "res": {"Yes": 0.9999947998470209, "No": 5.0906959900547525e-06}, "ground_truth": 1}, {"key": "38542788", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9708536179197257, "res": {"Yes": 0.9708536179197257, "No": 0.029146291370247844}, "ground_truth": 0}, {"key": "38542788", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999504572972803, "res": {"Yes": 0.9999504572972803, "No": 4.942159912876801e-05}, "ground_truth": 0}, {"key": "23944937", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.001631966254571329, "res": {"No": 0.9983678090747959, "Yes": 0.001631966254571329}, "ground_truth": 0}, {"key": "23944937", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999962302846054, "res": {"Yes": 0.9999962302846054, "No": 3.7206878531932126e-06}, "ground_truth": 0}, {"key": "23944937", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999790651681366, "res": {"Yes": 0.9999790651681366, "No": 2.0824841104010445e-05}, "ground_truth": 1}, {"key": "23944937", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999959918780326, "res": {"Yes": 0.9999959918780326, "No": 3.877988370453151e-06}, "ground_truth": 0}, {"key": "23944937", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996173055462103, "res": {"Yes": 0.9996173055462103, "No": 0.00038263512465879136}, "ground_truth": 0}, {"key": "31753944", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.024838132646607683, "res": {"No": 0.9751617327472925, "Yes": 0.024838132646607683}, "ground_truth": 0}, {"key": "31753944", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9768053130109844, "res": {"Yes": 0.9768053130109844, "No": 0.023194421078080434}, "ground_truth": 0}, {"key": "31753944", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5045534239240429, "res": {"Yes": 0.5045534239240429, "No": 0.49544609016047025}, "ground_truth": 1}, {"key": "31753944", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997386983052361, "res": {"Yes": 0.9997386983052361, "No": 0.00026124915216289824}, "ground_truth": 0}, {"key": "31753944", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998619046160455, "res": {"Yes": 0.9998619046160455, "No": 0.00013804104354969102}, "ground_truth": 0}, {"key": "35527214", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9734438006591137, "res": {"Yes": 0.9734438006591137, "No": 0.026556078821650425}, "ground_truth": 0}, {"key": "35527214", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9712079072287975, "res": {"Yes": 0.9712079072287975, "No": 0.028791971444264253}, "ground_truth": 0}, {"key": "35527214", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.996110948126551, "res": {"Yes": 0.996110948126551, "No": 0.003889055316784767}, "ground_truth": 1}, {"key": "35527214", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9826603838511936, "res": {"Yes": 0.9826603838511936, "No": 0.017339682813200766}, "ground_truth": 0}, {"key": "35527214", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.8152347014169803, "res": {"Yes": 0.8152347014169803, "No": 0.18476520908966043}, "ground_truth": 0}, {"key": "40400404", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.018294123333953, "res": {"No": 0.9817057834902064, "Yes": 0.018294123333953}, "ground_truth": 0}, {"key": "40400404", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 3.717500243935131e-05, "res": {"No": 0.999962734742367, "Yes": 3.717500243935131e-05}, "ground_truth": 0}, {"key": "40400404", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9993834201035557, "res": {"Yes": 0.9993834201035557, "No": 0.0006165428700398374}, "ground_truth": 1}, {"key": "40400404", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9974206543432466, "res": {"Yes": 0.9974206543432466, "No": 0.00257938314427961}, "ground_truth": 0}, {"key": "40400404", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9895169710001855, "res": {"Yes": 0.9895169710001855, "No": 0.01048291279764986}, "ground_truth": 0}, {"key": "21713119", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 1.7882304544590627e-06, "res": {"No": 0.9999974223173222, "Yes": 1.7882304544590627e-06}, "ground_truth": 0}, {"key": "21713119", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.999970244320709, "res": {"Yes": 0.999970244320709, "No": 2.962165133510112e-05}, "ground_truth": 0}, {"key": "21713119", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999961110815618, "res": {"Yes": 0.9999961110815618, "No": 3.8575974135553105e-06}, "ground_truth": 1}, {"key": "21713119", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999903893441826, "res": {"Yes": 0.9999903893441826, "No": 9.581015207890625e-06}, "ground_truth": 0}, {"key": "21713119", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999261412516917, "res": {"Yes": 0.9999261412516917, "No": 7.369379168277588e-05}, "ground_truth": 0}, {"key": "28730678", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.8630803817498215, "res": {"Yes": 0.8630803817498215, "No": 0.13691958449226577}, "ground_truth": 0}, {"key": "28730678", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999781115595554, "res": {"Yes": 0.9999781115595554, "No": 2.1850560163015693e-05}, "ground_truth": 0}, {"key": "28730678", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995669213056622, "res": {"Yes": 0.9995669213056622, "No": 0.00043303500078798986}, "ground_truth": 1}, {"key": "28730678", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8974884210858757, "res": {"Yes": 0.8974884210858757, "No": 0.1025116016479038}, "ground_truth": 0}, {"key": "28730678", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999919389784903, "res": {"Yes": 0.9999919389784903, "No": 7.976203592428946e-06}, "ground_truth": 0}, {"key": "36823733", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.43500281789277906, "res": {"No": 0.5649964675382111, "Yes": 0.43500281789277906}, "ground_truth": 0}, {"key": "36823733", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.7695737721946699, "res": {"Yes": 0.7695737721946699, "No": 0.23042627865547033}, "ground_truth": 0}, {"key": "36823733", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9945482709077711, "res": {"Yes": 0.9945482709077711, "No": 0.005451666157807578}, "ground_truth": 1}, {"key": "36823733", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9934429985696017, "res": {"Yes": 0.9934429985696017, "No": 0.006556965232915365}, "ground_truth": 0}, {"key": "36823733", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.023641429942787468, "res": {"No": 0.9763583803366548, "Yes": 0.023641429942787468}, "ground_truth": 0}, {"key": "35988862", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.999859282501636, "res": {"Yes": 0.999859282501636, "No": 0.00014065703591492317}, "ground_truth": 0}, {"key": "35988862", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998412891059067, "res": {"Yes": 0.9998412891059067, "No": 0.00015869808457110346}, "ground_truth": 0}, {"key": "35988862", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999975415208221, "res": {"Yes": 0.9999975415208221, "No": 2.3476939722291734e-06}, "ground_truth": 1}, {"key": "35988862", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999208966888319, "res": {"Yes": 0.9999208966888319, "No": 7.897438488792748e-05}, "ground_truth": 0}, {"key": "35988862", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9990667031926357, "res": {"Yes": 0.9990667031926357, "No": 0.000933261796512816}, "ground_truth": 0}, {"key": "40499665", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9845426611827495, "res": {"Yes": 0.9845426611827495, "No": 0.015457196364068674}, "ground_truth": 0}, {"key": "40499665", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.7517748285425778, "res": {"Yes": 0.7517748285425778, "No": 0.24822509583037744}, "ground_truth": 0}, {"key": "40499665", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9987088101039144, "res": {"Yes": 0.9987088101039144, "No": 0.0012911370690248457}, "ground_truth": 1}, {"key": "40499665", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9897613356676027, "res": {"Yes": 0.9897613356676027, "No": 0.010238388958701192}, "ground_truth": 0}, {"key": "40499665", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.963191306607877, "res": {"Yes": 0.963191306607877, "No": 0.036808525111420756}, "ground_truth": 0}, {"key": "32829820", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.00015273701067895938, "res": {"No": 0.9998470100041632, "Yes": 0.00015273701067895938}, "ground_truth": 0}, {"key": "32829820", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9979233959318538, "res": {"Yes": 0.9979233959318538, "No": 0.0020766214681988827}, "ground_truth": 0}, {"key": "32829820", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999284059529762, "res": {"Yes": 0.9999284059529762, "No": 7.150458388782924e-05}, "ground_truth": 1}, {"key": "32829820", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998360449920916, "res": {"Yes": 0.9998360449920916, "No": 0.00016390738750243934}, "ground_truth": 0}, {"key": "32829820", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9969198480314451, "res": {"Yes": 0.9969198480314451, "No": 0.0030800852617297973}, "ground_truth": 0}, {"key": "20583553", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9975495845814287, "res": {"Yes": 0.9975495845814287, "No": 0.002450349283237823}, "ground_truth": 0}, {"key": "20583553", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9983335863117279, "res": {"Yes": 0.9983335863117279, "No": 0.0016663327921397045}, "ground_truth": 0}, {"key": "20583553", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999054015547151, "res": {"Yes": 0.9999054015547151, "No": 9.448098128147771e-05}, "ground_truth": 1}, {"key": "20583553", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998986076205548, "res": {"Yes": 0.9998986076205548, "No": 0.00010132980144446975}, "ground_truth": 0}, {"key": "20583553", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999186320055549, "res": {"Yes": 0.9999186320055549, "No": 8.127720191378337e-05}, "ground_truth": 0}, {"key": "30501550", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9678531284742813, "res": {"Yes": 0.9678531284742813, "No": 0.03214659640497243}, "ground_truth": 0}, {"key": "30501550", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9986165562418314, "res": {"Yes": 0.9986165562418314, "No": 0.0013834548415500635}, "ground_truth": 0}, {"key": "30501550", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999428286693428, "res": {"Yes": 0.9999428286693428, "No": 5.703143220072921e-05}, "ground_truth": 1}, {"key": "30501550", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994061559951887, "res": {"Yes": 0.9994061559951887, "No": 0.0005937946532319065}, "ground_truth": 0}, {"key": "30501550", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9934410000623317, "res": {"Yes": 0.9934410000623317, "No": 0.006558876211000222}, "ground_truth": 0}, {"key": "38755897", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9697951299781243, "res": {"Yes": 0.9697951299781243, "No": 0.030204519907163673}, "ground_truth": 0}, {"key": "38755897", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.999026122726584, "res": {"Yes": 0.999026122726584, "No": 0.0009737516171852021}, "ground_truth": 0}, {"key": "38755897", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9728343081190488, "res": {"Yes": 0.9728343081190488, "No": 0.02716539386761038}, "ground_truth": 1}, {"key": "38755897", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999247109143655, "res": {"Yes": 0.9999247109143655, "No": 7.516235881478636e-05}, "ground_truth": 0}, {"key": "38755897", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.999934246531854, "res": {"Yes": 0.999934246531854, "No": 6.572843010297211e-05}, "ground_truth": 0}, {"key": "35507201", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9968808722180702, "res": {"Yes": 0.9968808722180702, "No": 0.0031191492207602654}, "ground_truth": 0}, {"key": "35507201", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9969388034462582, "res": {"Yes": 0.9969388034462582, "No": 0.0030612039276093875}, "ground_truth": 0}, {"key": "35507201", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9989321542534807, "res": {"Yes": 0.9989321542534807, "No": 0.0010678190303924635}, "ground_truth": 1}, {"key": "35507201", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997476326300034, "res": {"Yes": 0.9997476326300034, "No": 0.0002522994202953113}, "ground_truth": 0}, {"key": "35507201", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9994778435111729, "res": {"Yes": 0.9994778435111729, "No": 0.0005220902981918314}, "ground_truth": 0}, {"key": "36453511", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9980011576514032, "res": {"Yes": 0.9980011576514032, "No": 0.0019988537254562824}, "ground_truth": 0}, {"key": "36453511", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9987958522683993, "res": {"Yes": 0.9987958522683993, "No": 0.001204095880860538}, "ground_truth": 0}, {"key": "36453511", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.998983426161635, "res": {"Yes": 0.998983426161635, "No": 0.0010165385513876783}, "ground_truth": 1}, {"key": "36453511", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995398851253298, "res": {"Yes": 0.9995398851253298, "No": 0.0004600286903960002}, "ground_truth": 0}, {"key": "36453511", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9995707341609555, "res": {"Yes": 0.9995707341609555, "No": 0.0004291524675337833}, "ground_truth": 0}, {"key": "38066835", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.0004543492874993015, "res": {"No": 0.9995454778265137, "Yes": 0.0004543492874993015}, "ground_truth": 0}, {"key": "38066835", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.8435598880921343, "res": {"Yes": 0.8435598880921343, "No": 0.15643923799745002}, "ground_truth": 0}, {"key": "38066835", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8877812201943084, "res": {"Yes": 0.8877812201943084, "No": 0.11221828850759921}, "ground_truth": 1}, {"key": "38066835", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.27323244477759057, "res": {"No": 0.7267669404079213, "Yes": 0.27323244477759057}, "ground_truth": 0}, {"key": "38066835", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.6755635956320121, "res": {"Yes": 0.6755635956320121, "No": 0.32443583414576627}, "ground_truth": 0}, {"key": "39697181", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.998077144859206, "res": {"Yes": 0.998077144859206, "No": 0.0019227919664516065}, "ground_truth": 0}, {"key": "39697181", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999893165220688, "res": {"Yes": 0.9999893165220688, "No": 1.05905760942058e-05}, "ground_truth": 0}, {"key": "39697181", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999919389784903, "res": {"Yes": 0.9999919389784903, "No": 7.947107754153042e-06}, "ground_truth": 1}, {"key": "39697181", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999959918780326, "res": {"Yes": 0.9999959918780326, "No": 3.891592893342229e-06}, "ground_truth": 0}, {"key": "39697181", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999951574563252, "res": {"Yes": 0.9999951574563252, "No": 4.7233229086165695e-06}, "ground_truth": 0}, {"key": "21820893", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9685336483423355, "res": {"Yes": 0.9685336483423355, "No": 0.031466222816692355}, "ground_truth": 0}, {"key": "21820893", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.14966968695670402, "res": {"No": 0.8503302292043387, "Yes": 0.14966968695670402}, "ground_truth": 0}, {"key": "21820893", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9893415728947482, "res": {"Yes": 0.9893415728947482, "No": 0.010658303705183267}, "ground_truth": 1}, {"key": "21820893", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9853584306665699, "res": {"Yes": 0.9853584306665699, "No": 0.01464145485565657}, "ground_truth": 0}, {"key": "21820893", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9612538095972939, "res": {"Yes": 0.9612538095972939, "No": 0.038745959484002494}, "ground_truth": 0}, {"key": "40519933", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 2.710805033088745e-07, "res": {"No": 0.9999996871837189, "Yes": 2.710805033088745e-07}, "ground_truth": 0}, {"key": "40519933", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999928926002577, "res": {"Yes": 0.9999928926002577, "No": 6.925148929895723e-06}, "ground_truth": 0}, {"key": "40519933", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.728085538316692, "res": {"Yes": 0.728085538316692, "No": 0.2719027808289755}, "ground_truth": 1}, {"key": "40519933", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9730729814305444, "res": {"Yes": 0.9730729814305444, "No": 0.02692668167869088}, "ground_truth": 0}, {"key": "40519933", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9981772551208394, "res": {"Yes": 0.9981772551208394, "No": 0.0018225772648512374}, "ground_truth": 0}, {"key": "30446033", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 6.67359346327663e-05, "res": {"No": 0.999933054575945, "Yes": 6.67359346327663e-05}, "ground_truth": 0}, {"key": "30446033", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999180360292298, "res": {"Yes": 0.9999180360292298, "No": 8.13620996214737e-05}, "ground_truth": 0}, {"key": "30446033", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999661915245194, "res": {"Yes": 0.9999661915245194, "No": 3.3756552882863434e-05}, "ground_truth": 1}, {"key": "30446033", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998943167248708, "res": {"Yes": 0.9998943167248708, "No": 0.00010554881612300197}, "ground_truth": 0}, {"key": "30446033", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9992461640771738, "res": {"Yes": 0.9992461640771738, "No": 0.0007537227222229654}, "ground_truth": 0}, {"key": "40216291", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996566101717876, "res": {"Yes": 0.9996566101717876, "No": 0.0003433353127960272}, "ground_truth": 0}, {"key": "40216291", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999971839107652, "res": {"Yes": 0.9999971839107652, "No": 2.71261077831453e-06}, "ground_truth": 1}, {"key": "40216291", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994329501735562, "res": {"Yes": 0.9994329501735562, "No": 0.0005670007111525538}, "ground_truth": 0}, {"key": "40216291", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9663977133085586, "res": {"Yes": 0.9663977133085586, "No": 0.03360225839953102}, "ground_truth": 0}, {"key": "33479118", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.7998284274522107, "res": {"Yes": 0.7998284274522107, "No": 0.2001713758691245}, "ground_truth": 0}, {"key": "33479118", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999901509395023, "res": {"Yes": 0.9999901509395023, "No": 9.790245746004232e-06}, "ground_truth": 0}, {"key": "33479118", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999228038037797, "res": {"Yes": 0.9999228038037797, "No": 7.707623969907273e-05}, "ground_truth": 1}, {"key": "33479118", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999705695416093, "res": {"Yes": 0.999705695416093, "No": 0.0002942284651672361}, "ground_truth": 0}, {"key": "33479118", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9984494434568728, "res": {"Yes": 0.9984494434568728, "No": 0.001550567478334854}, "ground_truth": 0}, {"key": "22297373", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.5245658557100077, "res": {"Yes": 0.5245658557100077, "No": 0.4754321971064901}, "ground_truth": 0}, {"key": "22297373", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9722806739468839, "res": {"Yes": 0.9722806739468839, "No": 0.027719235067741578}, "ground_truth": 1}, {"key": "22297373", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9991471358955074, "res": {"Yes": 0.9991471358955074, "No": 0.0008527194180899094}, "ground_truth": 0}, {"key": "22297373", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.10456259413087443, "res": {"No": 0.895436885128841, "Yes": 0.10456259413087443}, "ground_truth": 0}, {"key": "36463668", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9832362618027195, "res": {"Yes": 0.9832362618027195, "No": 0.01676314587784276}, "ground_truth": 0}, {"key": "36463668", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9980084002718845, "res": {"Yes": 0.9980084002718845, "No": 0.0019915309787056803}, "ground_truth": 0}, {"key": "36463668", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9993948456797808, "res": {"Yes": 0.9993948456797808, "No": 0.0006048969772323681}, "ground_truth": 1}, {"key": "36463668", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998215046521906, "res": {"Yes": 0.9998215046521906, "No": 0.00017842485959869737}, "ground_truth": 0}, {"key": "36463668", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9992404503339962, "res": {"Yes": 0.9992404503339962, "No": 0.0007595115717758084}, "ground_truth": 0}, {"key": "35264615", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9997575130947032, "res": {"Yes": 0.9997575130947032, "No": 0.0002424542561047675}, "ground_truth": 0}, {"key": "35264615", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999932502087799, "res": {"Yes": 0.9999932502087799, "No": 6.612161189337068e-06}, "ground_truth": 0}, {"key": "35264615", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "No": 9.786292195644499e-07}, "ground_truth": 1}, {"key": "35264615", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999760851449647, "res": {"Yes": 0.9999760851449647, "No": 2.381933290730552e-05}, "ground_truth": 0}, {"key": "35264615", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.999943901441583, "res": {"Yes": 0.999943901441583, "No": 5.601763534108076e-05}, "ground_truth": 0}, {"key": "39898482", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9804808856778084, "res": {"Yes": 0.9804808856778084, "No": 0.019519115724935025}, "ground_truth": 0}, {"key": "39898482", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995335702821776, "res": {"Yes": 0.9995335702821776, "No": 0.0004664189002765598}, "ground_truth": 0}, {"key": "39898482", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999936078174301, "res": {"Yes": 0.9999936078174301, "No": 6.311652688099074e-06}, "ground_truth": 1}, {"key": "39898482", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999975415208221, "res": {"Yes": 0.9999975415208221, "No": 2.3924921373470994e-06}, "ground_truth": 0}, {"key": "39898482", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999820452021894, "res": {"Yes": 0.9999820452021894, "No": 1.789004759540013e-05}, "ground_truth": 0}, {"key": "37228721", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.999960112362533, "res": {"Yes": 0.999960112362533, "No": 3.981125088775298e-05}, "ground_truth": 0}, {"key": "37228721", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999379416057018, "res": {"Yes": 0.9999379416057018, "No": 6.203889691726088e-05}, "ground_truth": 0}, {"key": "37228721", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999897933310884, "res": {"Yes": 0.9999897933310884, "No": 1.0162653283061621e-05}, "ground_truth": 1}, {"key": "37228721", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "No": 9.6054980776703e-07}, "ground_truth": 0}, {"key": "37228721", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997633526753965, "res": {"Yes": 0.9997633526753965, "No": 0.00023654927991619356}, "ground_truth": 0}, {"key": "24535799", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.5674900062010813, "res": {"Yes": 0.5674900062010813, "No": 0.43250826628290767}, "ground_truth": 0}, {"key": "24535799", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9991270335171394, "res": {"Yes": 0.9991270335171394, "No": 0.0008728432251334766}, "ground_truth": 0}, {"key": "24535799", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998442687373722, "res": {"Yes": 0.9998442687373722, "No": 0.00015547245193711266}, "ground_truth": 1}, {"key": "24535799", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9984685802218308, "res": {"Yes": 0.9984685802218308, "No": 0.0015313573193822061}, "ground_truth": 0}, {"key": "24535799", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.999859520878116, "res": {"Yes": 0.999859520878116, "No": 0.00014039284910081994}, "ground_truth": 0}, {"key": "35177759", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9823670583119982, "res": {"Yes": 0.9823670583119982, "No": 0.01763275547706159}, "ground_truth": 0}, {"key": "35177759", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998400972658052, "res": {"Yes": 0.9998400972658052, "No": 0.00015985954052567713}, "ground_truth": 0}, {"key": "35177759", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999502188991202, "res": {"Yes": 0.9999502188991202, "No": 4.9706144535426855e-05}, "ground_truth": 1}, {"key": "35177759", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999430670627412, "res": {"Yes": 0.9999430670627412, "No": 5.689301406662645e-05}, "ground_truth": 0}, {"key": "35177759", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998472483677188, "res": {"Yes": 0.9998472483677188, "No": 0.00015264501434670316}, "ground_truth": 0}, {"key": "34364829", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.34966878539221663, "res": {"No": 0.6503308926545603, "Yes": 0.34966878539221663}, "ground_truth": 0}, {"key": "34364829", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999906277489198, "res": {"Yes": 0.9999906277489198, "No": 9.333719148241064e-06}, "ground_truth": 0}, {"key": "34364829", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998728699017159, "res": {"Yes": 0.9998728699017159, "No": 0.00012709178703398754}, "ground_truth": 1}, {"key": "34364829", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999355576904948, "res": {"Yes": 0.9999355576904948, "No": 6.441783643015688e-05}, "ground_truth": 0}, {"key": "34364829", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999497421129699, "res": {"Yes": 0.9999497421129699, "No": 5.016650424950954e-05}, "ground_truth": 0}, {"key": "38090732", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9999671451237333, "res": {"Yes": 0.9999671451237333, "No": 3.2728247992030654e-05}, "ground_truth": 0}, {"key": "38090732", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998048229195251, "res": {"Yes": 0.9998048229195251, "No": 0.0001950384186672059}, "ground_truth": 0}, {"key": "38090732", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999371072378327, "res": {"Yes": 0.9999371072378327, "No": 6.275050074400984e-05}, "ground_truth": 1}, {"key": "38090732", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999992773397112, "res": {"Yes": 0.999992773397112, "No": 7.170250261406636e-06}, "ground_truth": 0}, {"key": "38090732", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999872900832717, "res": {"Yes": 0.9999872900832717, "No": 1.2585326325440181e-05}, "ground_truth": 0}, {"key": "30651479", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.00025576078089086944, "res": {"No": 0.9997432232128037, "Yes": 0.00025576078089086944}, "ground_truth": 0}, {"key": "30651479", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999976607241361, "res": {"Yes": 0.9999976607241361, "No": 2.2907323225820256e-06}, "ground_truth": 0}, {"key": "30651479", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999971839107652, "res": {"Yes": 0.9999971839107652, "No": 2.771169845568233e-06}, "ground_truth": 1}, {"key": "30651479", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999965878943212, "res": {"Yes": 0.9999965878943212, "No": 3.3727268044216133e-06}, "ground_truth": 0}, {"key": "30651479", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999909853566321, "res": {"Yes": 0.9999909853566321, "No": 8.905085855501567e-06}, "ground_truth": 0}, {"key": "39380921", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 1.884856074567281e-05, "res": {"No": 0.9999810915907662, "Yes": 1.884856074567281e-05}, "ground_truth": 0}, {"key": "39380921", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997682388807585, "res": {"Yes": 0.9997682388807585, "No": 0.0002316567054614877}, "ground_truth": 0}, {"key": "39380921", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999874092852638, "res": {"Yes": 0.9999874092852638, "No": 1.2478479966462641e-05}, "ground_truth": 1}, {"key": "39380921", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995702575457518, "res": {"Yes": 0.9995702575457518, "No": 0.00042965349487541035}, "ground_truth": 0}, {"key": "39380921", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999037328639782, "res": {"Yes": 0.9999037328639782, "No": 9.616510499848027e-05}, "ground_truth": 0}, {"key": "39037490", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9843824143090592, "res": {"Yes": 0.9843824143090592, "No": 0.015617543707142769}, "ground_truth": 0}, {"key": "39037490", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998849006744357, "res": {"Yes": 0.9998849006744357, "No": 0.00011497613188136091}, "ground_truth": 0}, {"key": "39037490", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999599931652947, "res": {"Yes": 0.9999599931652947, "No": 3.9855465911036555e-05}, "ground_truth": 1}, {"key": "39037490", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999690523188893, "res": {"Yes": 0.9999690523188893, "No": 3.083441875095284e-05}, "ground_truth": 0}, {"key": "39037490", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999906277489198, "res": {"Yes": 0.9999906277489198, "No": 9.285634773634248e-06}, "ground_truth": 0}, {"key": "35917499", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.7854757058712103, "res": {"Yes": 0.7854757058712103, "No": 0.21452422712980473}, "ground_truth": 0}, {"key": "35917499", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9946509931576761, "res": {"Yes": 0.9946509931576761, "No": 0.005349016322140149}, "ground_truth": 0}, {"key": "35917499", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999810662776555, "res": {"Yes": 0.999810662776555, "No": 0.00018921492872369741}, "ground_truth": 1}, {"key": "35917499", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999344849262062, "res": {"Yes": 0.9999344849262062, "No": 6.543796260114722e-05}, "ground_truth": 0}, {"key": "35917499", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.999870009379513, "res": {"Yes": 0.999870009379513, "No": 0.000129958030752272}, "ground_truth": 0}, {"key": "34908073", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.513019460414333, "res": {"Yes": 0.513019460414333, "No": 0.48698049891353384}, "ground_truth": 0}, {"key": "34908073", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999924157887603, "res": {"Yes": 0.9999924157887603, "No": 7.490207928545324e-06}, "ground_truth": 0}, {"key": "34908073", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999893165220688, "res": {"Yes": 0.9999893165220688, "No": 1.059227072194747e-05}, "ground_truth": 1}, {"key": "34908073", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999031369065324, "res": {"Yes": 0.9999031369065324, "No": 9.679918336783737e-05}, "ground_truth": 0}, {"key": "34908073", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999392527721875, "res": {"Yes": 0.9999392527721875, "No": 6.073028835005194e-05}, "ground_truth": 0}, {"key": "36344759", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.15898873950390008, "res": {"No": 0.8410111340804299, "Yes": 0.15898873950390008}, "ground_truth": 0}, {"key": "36344759", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999338889494318, "res": {"Yes": 0.9999338889494318, "No": 6.597966298583084e-05}, "ground_truth": 0}, {"key": "36344759", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999844292352256, "res": {"Yes": 0.9999844292352256, "No": 1.5437421941653056e-05}, "ground_truth": 1}, {"key": "36344759", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999228038037797, "res": {"Yes": 0.9999228038037797, "No": 7.706009391818547e-05}, "ground_truth": 0}, {"key": "36344759", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9992837717639798, "res": {"Yes": 0.9992837717639798, "No": 0.0007161134169404722}, "ground_truth": 0}, {"key": "39984637", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.7939132187443714, "res": {"Yes": 0.7939132187443714, "No": 0.2060864166204055}, "ground_truth": 0}, {"key": "39984637", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.0007640003581615675, "res": {"No": 0.9992358122802784, "Yes": 0.0007640003581615675}, "ground_truth": 0}, {"key": "39984637", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999974223173222, "res": {"Yes": 0.9999974223173222, "No": 2.5451626509600407e-06}, "ground_truth": 1}, {"key": "39984637", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999996945503965, "res": {"Yes": 0.999996945503965, "No": 2.9744839386905928e-06}, "ground_truth": 0}, {"key": "39984637", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999193471666147, "res": {"Yes": 0.9999193471666147, "No": 8.051774202481501e-05}, "ground_truth": 0}, {"key": "17917326", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.30290625826608625, "res": {"No": 0.6970934378729148, "Yes": 0.30290625826608625}, "ground_truth": 0}, {"key": "17917326", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.942936198164266, "res": {"Yes": 0.942936198164266, "No": 0.05706359108120636}, "ground_truth": 0}, {"key": "17917326", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9993979394712174, "res": {"Yes": 0.9993979394712174, "No": 0.0006019775534370717}, "ground_truth": 1}, {"key": "17917326", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9975988097804213, "res": {"Yes": 0.9975988097804213, "No": 0.0024011398905200852}, "ground_truth": 0}, {"key": "17917326", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9848215842083234, "res": {"Yes": 0.9848215842083234, "No": 0.015178155848510443}, "ground_truth": 0}, {"key": "32193638", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 4.6077214160066105e-05, "res": {"No": 0.9999534372470786, "Yes": 4.6077214160066105e-05}, "ground_truth": 0}, {"key": "32193638", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999241149394169, "res": {"Yes": 0.9999241149394169, "No": 7.583618534526707e-05}, "ground_truth": 0}, {"key": "32193638", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9824081270253038, "res": {"Yes": 0.9824081270253038, "No": 0.017591912492854123}, "ground_truth": 1}, {"key": "32193638", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998652418606938, "res": {"Yes": 0.9998652418606938, "No": 0.00013466706644831944}, "ground_truth": 0}, {"key": "32193638", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9019001892280869, "res": {"Yes": 0.9019001892280869, "No": 0.0980995401485792}, "ground_truth": 0}, {"key": "34564692", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9923873530982261, "res": {"Yes": 0.9923873530982261, "No": 0.0076125772364066665}, "ground_truth": 0}, {"key": "34564692", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.8885470396678942, "res": {"Yes": 0.8885470396678942, "No": 0.1114525432233134}, "ground_truth": 0}, {"key": "34564692", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9702230261957657, "res": {"Yes": 0.9702230261957657, "No": 0.029776722942728994}, "ground_truth": 1}, {"key": "34564692", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999045999598008, "res": {"Yes": 0.999045999598008, "No": 0.0009539058319841548}, "ground_truth": 0}, {"key": "34564692", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.018188095360286347, "res": {"No": 0.981811603332632, "Yes": 0.018188095360286347}, "ground_truth": 0}, {"key": "39329284", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9944749415890292, "res": {"Yes": 0.9944749415890292, "No": 0.0055239648864470435}, "ground_truth": 0}, {"key": "39329284", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999901509395023, "res": {"Yes": 0.9999901509395023, "No": 9.824326030170281e-06}, "ground_truth": 0}, {"key": "39329284", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998997995354019, "res": {"Yes": 0.9998997995354019, "No": 0.00010013753168581706}, "ground_truth": 1}, {"key": "39329284", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.99988049066181, "res": {"Yes": 0.99988049066181, "No": 0.00011914325087024224}, "ground_truth": 0}, {"key": "39329284", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.885143960474728, "res": {"Yes": 0.885143960474728, "No": 0.11485455387521099}, "ground_truth": 0}, {"key": "37438541", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9957139271405901, "res": {"Yes": 0.9957139271405901, "No": 0.004285988778969893}, "ground_truth": 0}, {"key": "37438541", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9975830340771004, "res": {"Yes": 0.9975830340771004, "No": 0.002416908584168141}, "ground_truth": 0}, {"key": "37438541", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999654763299556, "res": {"Yes": 0.9999654763299556, "No": 3.449842110661027e-05}, "ground_truth": 1}, {"key": "37438541", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999984906043415, "res": {"Yes": 0.999984906043415, "No": 1.4977357459636006e-05}, "ground_truth": 0}, {"key": "37438541", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996900881026163, "res": {"Yes": 0.9996900881026163, "No": 0.0003098463588365202}, "ground_truth": 0}, {"key": "34652757", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.00039077602038626723, "res": {"No": 0.9996090836976962, "Yes": 0.00039077602038626723}, "ground_truth": 0}, {"key": "34652757", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995247570437253, "res": {"Yes": 0.9995247570437253, "No": 0.0004751768535317445}, "ground_truth": 0}, {"key": "34652757", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9993474654933032, "res": {"Yes": 0.9993474654933032, "No": 0.0006524461179705982}, "ground_truth": 1}, {"key": "34652757", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998722739576655, "res": {"Yes": 0.9998722739576655, "No": 0.0001276450746274562}, "ground_truth": 0}, {"key": "34652757", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998392629795794, "res": {"Yes": 0.9998392629795794, "No": 0.00016063272662014577}, "ground_truth": 0}, {"key": "31361004", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9909877777276507, "res": {"Yes": 0.9909877777276507, "No": 0.009012096937307548}, "ground_truth": 0}, {"key": "31361004", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.999524876167093, "res": {"Yes": 0.999524876167093, "No": 0.00047501190076565543}, "ground_truth": 0}, {"key": "31361004", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995965795959031, "res": {"Yes": 0.9995965795959031, "No": 0.00040329252130840327}, "ground_truth": 1}, {"key": "31361004", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998798947132175, "res": {"Yes": 0.9998798947132175, "No": 0.0001200446261176873}, "ground_truth": 0}, {"key": "31361004", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996065813694129, "res": {"Yes": 0.9996065813694129, "No": 0.00039339408016500564}, "ground_truth": 0}, {"key": "26150727", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.005819982640355651, "res": {"No": 0.9941799226534282, "Yes": 0.005819982640355651}, "ground_truth": 0}, {"key": "26150727", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9263421871841847, "res": {"Yes": 0.9263421871841847, "No": 0.07365772894732871}, "ground_truth": 0}, {"key": "26150727", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999942038320978, "res": {"Yes": 0.9999942038320978, "No": 5.725297756542608e-06}, "ground_truth": 1}, {"key": "26150727", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997635910190081, "res": {"Yes": 0.9997635910190081, "No": 0.00023631403043870066}, "ground_truth": 0}, {"key": "26150727", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997214221100283, "res": {"Yes": 0.9997214221100283, "No": 0.0002785277117378632}, "ground_truth": 0}, {"key": "36997402", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9993968709554876, "res": {"Yes": 0.9993968709554876, "No": 0.0006030412232349841}, "ground_truth": 0}, {"key": "36997402", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9666928459743135, "res": {"Yes": 0.9666928459743135, "No": 0.0333069564969763}, "ground_truth": 0}, {"key": "36997402", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998782260650476, "res": {"Yes": 0.9998782260650476, "No": 0.00012161563735872662}, "ground_truth": 1}, {"key": "36997402", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998909793831794, "res": {"Yes": 0.9998909793831794, "No": 0.00010890604716985368}, "ground_truth": 0}, {"key": "36997402", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999473581626301, "res": {"Yes": 0.9999473581626301, "No": 5.25492885115176e-05}, "ground_truth": 0}, {"key": "37430643", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9976141175528027, "res": {"Yes": 0.9976141175528027, "No": 0.002385873438717832}, "ground_truth": 0}, {"key": "37430643", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999992773397112, "res": {"Yes": 0.999992773397112, "No": 7.1175214791827666e-06}, "ground_truth": 1}, {"key": "37430643", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.998337985678036, "res": {"Yes": 0.998337985678036, "No": 0.001661935154261305}, "ground_truth": 0}, {"key": "37430643", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9983818302507631, "res": {"Yes": 0.9983818302507631, "No": 0.0016178567491813471}, "ground_truth": 0}, {"key": "36964631", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.35142568941369795, "res": {"No": 0.6485738847174132, "Yes": 0.35142568941369795}, "ground_truth": 0}, {"key": "36964631", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995086869338116, "res": {"Yes": 0.9995086869338116, "No": 0.0004912223408397543}, "ground_truth": 0}, {"key": "36964631", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999269756164101, "res": {"Yes": 0.9999269756164101, "No": 7.291198712685875e-05}, "ground_truth": 1}, {"key": "36964631", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999598739650709, "res": {"Yes": 0.9999598739650709, "No": 3.998764606215369e-05}, "ground_truth": 0}, {"key": "36964631", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999807339855921, "res": {"Yes": 0.9999807339855921, "No": 1.9208800971962316e-05}, "ground_truth": 0}, {"key": "35502013", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.2090202378263895, "res": {"No": 0.7909795934238898, "Yes": 0.2090202378263895}, "ground_truth": 0}, {"key": "35502013", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9908216822771966, "res": {"Yes": 0.9908216822771966, "No": 0.00917828785421511}, "ground_truth": 0}, {"key": "35502013", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9965336472204283, "res": {"Yes": 0.9965336472204283, "No": 0.00346625194468564}, "ground_truth": 1}, {"key": "35502013", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997876610970966, "res": {"Yes": 0.9997876610970966, "No": 0.00021231936036156512}, "ground_truth": 0}, {"key": "35502013", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.10911027619214339, "res": {"No": 0.8908893014737977, "Yes": 0.10911027619214339}, "ground_truth": 0}, {"key": "33987664", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.16992282765010874, "res": {"No": 0.8300769264635872, "Yes": 0.16992282765010874}, "ground_truth": 0}, {"key": "33987664", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999766811478886, "res": {"Yes": 0.9999766811478886, "No": 2.3299911399416476e-05}, "ground_truth": 0}, {"key": "33987664", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995467884813808, "res": {"Yes": 0.9995467884813808, "No": 0.0004529600618746504}, "ground_truth": 1}, {"key": "33987664", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999694378111887, "res": {"Yes": 0.999694378111887, "No": 0.0003055130693717744}, "ground_truth": 0}, {"key": "33987664", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9926249193658604, "res": {"Yes": 0.9926249193658604, "No": 0.007374556604406697}, "ground_truth": 0}, {"key": "35203721", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9948704052362588, "res": {"Yes": 0.9948704052362588, "No": 0.005129459428054514}, "ground_truth": 0}, {"key": "35203721", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999958726752174, "res": {"Yes": 0.9999958726752174, "No": 4.055023804529291e-06}, "ground_truth": 0}, {"key": "35203721", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 7.335830657761043e-07}, "ground_truth": 1}, {"key": "35203721", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999837713579866, "res": {"Yes": 0.999837713579866, "No": 0.00016215887393576688}, "ground_truth": 0}, {"key": "35203721", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998943167248708, "res": {"Yes": 0.9998943167248708, "No": 0.00010557425645312434}, "ground_truth": 0}, {"key": "39028348", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9855649450025784, "res": {"Yes": 0.9855649450025784, "No": 0.014435039925348806}, "ground_truth": 0}, {"key": "39028348", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997476326300034, "res": {"Yes": 0.9997476326300034, "No": 0.0002523302027022758}, "ground_truth": 0}, {"key": "39028348", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, "No": 1.1083185621464935e-06}, "ground_truth": 1}, {"key": "39028348", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999071894471658, "res": {"Yes": 0.9999071894471658, "No": 9.269898822140301e-05}, "ground_truth": 0}, {"key": "39028348", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997927857718344, "res": {"Yes": 0.9997927857718344, "No": 0.0002071594026360633}, "ground_truth": 0}, {"key": "37459383", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 6.873036240261355e-05, "res": {"No": 0.999931147444446, "Yes": 6.873036240261355e-05}, "ground_truth": 0}, {"key": "37459383", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9958713739078219, "res": {"Yes": 0.9958713739078219, "No": 0.004128616153293813}, "ground_truth": 0}, {"key": "37459383", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9653423246294986, "res": {"Yes": 0.9653423246294986, "No": 0.034657608358636426}, "ground_truth": 1}, {"key": "37459383", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8240877269454581, "res": {"Yes": 0.8240877269454581, "No": 0.17591210712791935}, "ground_truth": 0}, {"key": "37459383", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9791986792795053, "res": {"Yes": 0.9791986792795053, "No": 0.020801304040193383}, "ground_truth": 0}, {"key": "34020070", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9992379489085306, "res": {"Yes": 0.9992379489085306, "No": 0.0007619149672803693}, "ground_truth": 0}, {"key": "34020070", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9992008157579735, "res": {"Yes": 0.9992008157579735, "No": 0.0007991237844338135}, "ground_truth": 0}, {"key": "34020070", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994961841178656, "res": {"Yes": 0.9994961841178656, "No": 0.0005037348094983665}, "ground_truth": 1}, {"key": "34020070", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999290019304823, "res": {"Yes": 0.9999290019304823, "No": 7.09287291408625e-05}, "ground_truth": 0}, {"key": "34020070", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998968197434489, "res": {"Yes": 0.9998968197434489, "No": 0.00010312930177475485}, "ground_truth": 0}, {"key": "35176615", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.4091395717212771, "res": {"No": 0.5908602577751008, "Yes": 0.4091395717212771}, "ground_truth": 0}, {"key": "35176615", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995350000659958, "res": {"Yes": 0.9995350000659958, "No": 0.0004649363702091589}, "ground_truth": 0}, {"key": "35176615", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9981809363055265, "res": {"Yes": 0.9981809363055265, "No": 0.0018190001445125563}, "ground_truth": 1}, {"key": "35176615", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999586819786129, "res": {"Yes": 0.9999586819786129, "No": 4.121008706089629e-05}, "ground_truth": 0}, {"key": "35176615", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9181742789770145, "res": {"Yes": 0.9181742789770145, "No": 0.08182559393668266}, "ground_truth": 0}, {"key": "33296389", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.996338352341574, "res": {"Yes": 0.996338352341574, "No": 0.0036616310088480215}, "ground_truth": 0}, {"key": "33296389", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.7934211371731287, "res": {"Yes": 0.7934211371731287, "No": 0.20657872590187504}, "ground_truth": 0}, {"key": "33296389", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9993229405082871, "res": {"Yes": 0.9993229405082871, "No": 0.0006770033071882811}, "ground_truth": 1}, {"key": "33296389", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3838045654849508, "res": {"No": 0.6161953740927825, "Yes": 0.3838045654849508}, "ground_truth": 0}, {"key": "33296389", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9591867720218762, "res": {"Yes": 0.9591867720218762, "No": 0.04081314777891593}, "ground_truth": 0}, {"key": "35399504", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 1.0, "res": {"Yes": 1.0, "No": 2.5810976235664138e-08}, "ground_truth": 0}, {"key": "35399504", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999814491960682, "res": {"Yes": 0.9999814491960682, "No": 1.8420654421490434e-05}, "ground_truth": 0}, {"key": "35399504", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999959918780326, "res": {"Yes": 0.9999959918780326, "No": 3.906155170923331e-06}, "ground_truth": 1}, {"key": "35399504", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999976607241361, "res": {"Yes": 0.9999976607241361, "No": 2.2357227591622137e-06}, "ground_truth": 0}, {"key": "35399504", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, "No": 1.0469284149487432e-06}, "ground_truth": 0}, {"key": "34807886", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9992504521514384, "res": {"Yes": 0.9992504521514384, "No": 0.0007494540124069099}, "ground_truth": 0}, {"key": "34807886", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999987335551019, "res": {"Yes": 0.9999987335551019, "No": 1.1706270646893916e-06}, "ground_truth": 0}, {"key": "34807886", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, "No": 1.0566088249884864e-06}, "ground_truth": 1}, {"key": "34807886", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 1.6975968799511284e-07}, "ground_truth": 0}, {"key": "34807886", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999984951481323, "res": {"Yes": 0.9999984951481323, "No": 1.4082778261282977e-06}, "ground_truth": 0}, {"key": "37629813", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 2.0051311397329487e-05, "res": {"No": 0.9999797803764193, "Yes": 2.0051311397329487e-05}, "ground_truth": 0}, {"key": "37629813", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996573251264509, "res": {"Yes": 0.9996573251264509, "No": 0.0003425532287255049}, "ground_truth": 0}, {"key": "37629813", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999954033236504, "res": {"Yes": 0.999954033236504, "No": 4.5920197729785765e-05}, "ground_truth": 1}, {"key": "37629813", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9981010114646686, "res": {"Yes": 0.9981010114646686, "No": 0.0018989579322075884}, "ground_truth": 0}, {"key": "37629813", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999551060207649, "res": {"Yes": 0.9999551060207649, "No": 4.4853766188377384e-05}, "ground_truth": 0}, {"key": "28084389", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 9.207455699391433e-06, "res": {"No": 0.9999905085465441, "Yes": 9.207455699391433e-06}, "ground_truth": 0}, {"key": "28084389", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.999994561441089, "res": {"Yes": 0.999994561441089, "No": 5.3982539460761416e-06}, "ground_truth": 0}, {"key": "28084389", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 1.9783786605649505e-07}, "ground_truth": 1}, {"key": "28084389", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999959918780326, "res": {"Yes": 0.9999959918780326, "No": 3.96845384032529e-06}, "ground_truth": 0}, {"key": "28084389", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999812107925193, "res": {"Yes": 0.9999812107925193, "No": 1.8728802095758506e-05}, "ground_truth": 0}, {"key": "35391734", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.07631591331400776, "res": {"No": 0.9236837479957803, "Yes": 0.07631591331400776}, "ground_truth": 0}, {"key": "35391734", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 2.706438483874045e-06, "res": {"No": 0.9999970647075079, "Yes": 2.706438483874045e-06}, "ground_truth": 0}, {"key": "35391734", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9871740614939475, "res": {"Yes": 0.9871740614939475, "No": 0.012825800150101613}, "ground_truth": 1}, {"key": "35391734", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.04552679615003124, "res": {"No": 0.9544727902810296, "Yes": 0.04552679615003124}, "ground_truth": 0}, {"key": "35391734", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9837671284668167, "res": {"Yes": 0.9837671284668167, "No": 0.016232806368304976}, "ground_truth": 0}, {"key": "40214591", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.0463319959187087, "res": {"No": 0.9536678688597545, "Yes": 0.0463319959187087}, "ground_truth": 0}, {"key": "40214591", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9907532267904151, "res": {"Yes": 0.9907532267904151, "No": 0.00924663571593886}, "ground_truth": 0}, {"key": "40214591", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3302516299792192, "res": {"No": 0.6697479442439863, "Yes": 0.3302516299792192}, "ground_truth": 1}, {"key": "40214591", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8392392686812425, "res": {"Yes": 0.8392392686812425, "No": 0.1607604837920547}, "ground_truth": 0}, {"key": "40214591", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9968981671546578, "res": {"Yes": 0.9968981671546578, "No": 0.003101820753713597}, "ground_truth": 0}, {"key": "26283171", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.0883828705653427, "res": {"No": 0.911617002966796, "Yes": 0.0883828705653427}, "ground_truth": 0}, {"key": "26283171", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9990901426314592, "res": {"Yes": 0.9990901426314592, "No": 0.0009097743101839568}, "ground_truth": 0}, {"key": "26283171", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998379519412053, "res": {"Yes": 0.9998379519412053, "No": 0.00016198205507652924}, "ground_truth": 1}, {"key": "26283171", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999905085465441, "res": {"Yes": 0.9999905085465441, "No": 9.433371462785176e-06}, "ground_truth": 0}, {"key": "26283171", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998480789911653, "res": {"Yes": 0.9998480789911653, "No": 0.00015183851548316435}, "ground_truth": 0}, {"key": "37084030", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9996298135659125, "res": {"Yes": 0.9996298135659125, "No": 0.00037002662331841487}, "ground_truth": 0}, {"key": "37084030", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998383095033176, "res": {"Yes": 0.9998383095033176, "No": 0.00016163077348909434}, "ground_truth": 0}, {"key": "37084030", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9990507596139651, "res": {"Yes": 0.9990507596139651, "No": 0.0009491516237386075}, "ground_truth": 1}, {"key": "37084030", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9986568600210022, "res": {"Yes": 0.9986568600210022, "No": 0.0013430891263725563}, "ground_truth": 0}, {"key": "37084030", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998367601050818, "res": {"Yes": 0.9998367601050818, "No": 0.0001632068733836092}, "ground_truth": 0}, {"key": "39027295", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9901738680309023, "res": {"Yes": 0.9901738680309023, "No": 0.00982598374029925}, "ground_truth": 0}, {"key": "39027295", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9992978241809041, "res": {"Yes": 0.9992978241809041, "No": 0.0007021091371474812}, "ground_truth": 0}, {"key": "39027295", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.993758290904346, "res": {"Yes": 0.993758290904346, "No": 0.006241731819064837}, "ground_truth": 1}, {"key": "39027295", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995577466228676, "res": {"Yes": 0.9995577466228676, "No": 0.00044223417875581143}, "ground_truth": 0}, {"key": "39027295", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.972613343203038, "res": {"Yes": 0.972613343203038, "No": 0.027386439413869597}, "ground_truth": 0}, {"key": "14018647", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9996607808478312, "res": {"Yes": 0.9996607808478312, "No": 0.00033912445298678984}, "ground_truth": 0}, {"key": "14018647", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 7.238671180579181e-07}, "ground_truth": 0}, {"key": "14018647", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 5.26689313606149e-07}, "ground_truth": 1}, {"key": "14018647", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 8.254757088528342e-07}, "ground_truth": 0}, {"key": "14018647", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999970647075079, "res": {"Yes": 0.9999970647075079, "No": 2.84056949353836e-06}, "ground_truth": 0}, {"key": "37424289", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.8548470627686723, "res": {"Yes": 0.8548470627686723, "No": 0.14515272717806368}, "ground_truth": 0}, {"key": "37424289", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.007685982989667107, "res": {"No": 0.9923137551217092, "Yes": 0.007685982989667107}, "ground_truth": 0}, {"key": "37424289", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8624058633286854, "res": {"Yes": 0.8624058633286854, "No": 0.13759399437030953}, "ground_truth": 1}, {"key": "37424289", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9979788329146067, "res": {"Yes": 0.9979788329146067, "No": 0.002021144184416118}, "ground_truth": 0}, {"key": "37424289", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.7255079563539297, "res": {"Yes": 0.7255079563539297, "No": 0.2744918249716703}, "ground_truth": 0}, {"key": "37498031", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9966374809719647, "res": {"Yes": 0.9966374809719647, "No": 0.003362520289568393}, "ground_truth": 0}, {"key": "37498031", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999957534720165, "res": {"Yes": 0.9999957534720165, "No": 4.1898149067750205e-06}, "ground_truth": 0}, {"key": "37498031", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999980183344636, "res": {"Yes": 0.9999980183344636, "No": 1.923853215406224e-06}, "ground_truth": 1}, {"key": "37498031", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999947998470209, "res": {"Yes": 0.9999947998470209, "No": 5.158075567858969e-06}, "ground_truth": 0}, {"key": "37498031", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999970647075079, "res": {"Yes": 0.9999970647075079, "No": 2.8893991038559938e-06}, "ground_truth": 0}, {"key": "30104095", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.999244261973941, "res": {"Yes": 0.999244261973941, "No": 0.000755687973838737}, "ground_truth": 0}, {"key": "30104095", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998055380101843, "res": {"Yes": 0.9998055380101843, "No": 0.00019437174911129257}, "ground_truth": 1}, {"key": "30104095", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999966549126493, "res": {"Yes": 0.999966549126493, "No": 3.3400088776704045e-05}, "ground_truth": 0}, {"key": "30104095", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9979062992555192, "res": {"Yes": 0.9979062992555192, "No": 0.002093671616473197}, "ground_truth": 0}, {"key": "37911407", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9582845404768483, "res": {"Yes": 0.9582845404768483, "No": 0.04171511274028351}, "ground_truth": 0}, {"key": "37911407", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9985403543205146, "res": {"Yes": 0.9985403543205146, "No": 0.0014593423353311116}, "ground_truth": 0}, {"key": "37911407", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9991422674030854, "res": {"Yes": 0.9991422674030854, "No": 0.0008576311036987242}, "ground_truth": 1}, {"key": "37911407", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.997547808246465, "res": {"Yes": 0.997547808246465, "No": 0.0024519996589468618}, "ground_truth": 0}, {"key": "37911407", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.995853528948969, "res": {"Yes": 0.995853528948969, "No": 0.004146408670499571}, "ground_truth": 0}, {"key": "39177472", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9997741976971782, "res": {"Yes": 0.9997741976971782, "No": 0.00022567018116353814}, "ground_truth": 0}, {"key": "39177472", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999590395729012, "res": {"Yes": 0.9999590395729012, "No": 4.088035206288958e-05}, "ground_truth": 0}, {"key": "39177472", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999950382530095, "res": {"Yes": 0.9999950382530095, "No": 4.835761229418591e-06}, "ground_truth": 1}, {"key": "39177472", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999753699393249, "res": {"Yes": 0.9999753699393249, "No": 2.4551441218719983e-05}, "ground_truth": 0}, {"key": "39177472", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999038520625092, "res": {"Yes": 0.9999038520625092, "No": 9.608993328234333e-05}, "ground_truth": 0}, {"key": "32325454", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.14149485825210778, "res": {"No": 0.8585048236199461, "Yes": 0.14149485825210778}, "ground_truth": 0}, {"key": "32325454", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999023025627055, "res": {"Yes": 0.9999023025627055, "No": 9.765145505849146e-05}, "ground_truth": 0}, {"key": "32325454", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998907410091983, "res": {"Yes": 0.9998907410091983, "No": 0.00010920715225037344}, "ground_truth": 1}, {"key": "32325454", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999909692497968, "res": {"Yes": 0.999909692497968, "No": 9.005814866379083e-05}, "ground_truth": 0}, {"key": "32325454", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9829871688225199, "res": {"Yes": 0.9829871688225199, "No": 0.017012234794282096}, "ground_truth": 0}, {"key": "38395319", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9992953226617778, "res": {"Yes": 0.9992953226617778, "No": 0.0007046511502199861}, "ground_truth": 0}, {"key": "38395319", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997286878917885, "res": {"Yes": 0.9997286878917885, "No": 0.00027124936340672734}, "ground_truth": 0}, {"key": "38395319", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998797755275411, "res": {"Yes": 0.9998797755275411, "No": 0.00012010142065669049}, "ground_truth": 1}, {"key": "38395319", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997716949855572, "res": {"Yes": 0.9997716949855572, "No": 0.00022826804124812612}, "ground_truth": 0}, {"key": "38395319", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.999946523779904, "res": {"Yes": 0.999946523779904, "No": 5.343054349243537e-05}, "ground_truth": 0}, {"key": "38235895", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9980918686018491, "res": {"Yes": 0.9980918686018491, "No": 0.0019080900710900573}, "ground_truth": 0}, {"key": "38235895", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.989616427401205, "res": {"Yes": 0.989616427401205, "No": 0.01038303556869722}, "ground_truth": 0}, {"key": "38235895", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9981888939354883, "res": {"Yes": 0.9981888939354883, "No": 0.0018110447323988837}, "ground_truth": 1}, {"key": "38235895", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995560822006759, "res": {"Yes": 0.9995560822006759, "No": 0.00044364950962635187}, "ground_truth": 0}, {"key": "38235895", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9036175972286049, "res": {"Yes": 0.9036175972286049, "No": 0.09638089323180696}, "ground_truth": 0}, {"key": "26543267", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.0003358094405858383, "res": {"No": 0.9996638791012905, "Yes": 0.0003358094405858383}, "ground_truth": 0}, {"key": "26543267", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.7672349739259279, "res": {"Yes": 0.7672349739259279, "No": 0.23276486454715054}, "ground_truth": 0}, {"key": "26543267", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9958563670359852, "res": {"Yes": 0.9958563670359852, "No": 0.004143669214362233}, "ground_truth": 1}, {"key": "26543267", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9982979403492547, "res": {"Yes": 0.9982979403492547, "No": 0.001701998381111054}, "ground_truth": 0}, {"key": "26543267", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.8636855182008571, "res": {"Yes": 0.8636855182008571, "No": 0.13631438589924888}, "ground_truth": 0}, {"key": "39054728", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.0035602090996013657, "res": {"No": 0.9964390705008825, "Yes": 0.0035602090996013657}, "ground_truth": 0}, {"key": "39054728", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9918496010859643, "res": {"Yes": 0.9918496010859643, "No": 0.008150137342751558}, "ground_truth": 0}, {"key": "39054728", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9453324730787169, "res": {"Yes": 0.9453324730787169, "No": 0.05466655404393999}, "ground_truth": 1}, {"key": "39054728", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9922334794915213, "res": {"Yes": 0.9922334794915213, "No": 0.007766345964931798}, "ground_truth": 0}, {"key": "39054728", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.6058858966487425, "res": {"Yes": 0.6058858966487425, "No": 0.39411312169170337}, "ground_truth": 0}, {"key": "39158443", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9999877668918251, "res": {"Yes": 0.9999877668918251, "No": 1.212759649680383e-05}, "ground_truth": 0}, {"key": "39158443", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9896159593127456, "res": {"Yes": 0.9896159593127456, "No": 0.010383980467926228}, "ground_truth": 0}, {"key": "39158443", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9980074484717271, "res": {"Yes": 0.9980074484717271, "No": 0.0019925170305678413}, "ground_truth": 1}, {"key": "39158443", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9873903049223889, "res": {"Yes": 0.9873903049223889, "No": 0.012609402665905212}, "ground_truth": 0}, {"key": "39158443", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9902358132653034, "res": {"Yes": 0.9902358132653034, "No": 0.009763897736807225}, "ground_truth": 0}, {"key": "36254201", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.5512801829881564, "res": {"Yes": 0.5512801829881564, "No": 0.4487196382340298}, "ground_truth": 0}, {"key": "36254201", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.996985012929783, "res": {"Yes": 0.996985012929783, "No": 0.003014954319487068}, "ground_truth": 0}, {"key": "36254201", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9912776255639869, "res": {"Yes": 0.9912776255639869, "No": 0.008722262619549604}, "ground_truth": 1}, {"key": "36254201", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9988987346382661, "res": {"Yes": 0.9988987346382661, "No": 0.0011012332398887047}, "ground_truth": 0}, {"key": "36254201", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9975997611908598, "res": {"Yes": 0.9975997611908598, "No": 0.002400180586701502}, "ground_truth": 0}, {"key": "23434347", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999946806438478, "res": {"Yes": 0.9999946806438478, "No": 5.261328481923604e-06}, "ground_truth": 0}, {"key": "23434347", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999807339855921, "res": {"Yes": 0.9999807339855921, "No": 1.9141879103157452e-05}, "ground_truth": 1}, {"key": "23434347", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 3.858215037312364e-07}, "ground_truth": 0}, {"key": "23434347", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999866940725246, "res": {"Yes": 0.9999866940725246, "No": 1.321266128290549e-05}, "ground_truth": 0}, {"key": "34397620", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9989025413487074, "res": {"Yes": 0.9989025413487074, "No": 0.001097375345363611}, "ground_truth": 0}, {"key": "34397620", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999937270200753, "res": {"Yes": 0.9999937270200753, "No": 6.246070574598469e-06}, "ground_truth": 0}, {"key": "34397620", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9982817929108333, "res": {"Yes": 0.9982817929108333, "No": 0.001718031706704265}, "ground_truth": 1}, {"key": "34397620", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999337697473201, "res": {"Yes": 0.9999337697473201, "No": 6.595822307953392e-05}, "ground_truth": 0}, {"key": "34397620", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9976749860417069, "res": {"Yes": 0.9976749860417069, "No": 0.0023239554725690797}, "ground_truth": 0}, {"key": "34340916", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.999480345467211, "res": {"Yes": 0.999480345467211, "No": 0.0005196039350329045}, "ground_truth": 0}, {"key": "34340916", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999760851449647, "res": {"Yes": 0.9999760851449647, "No": 2.3862628116016244e-05}, "ground_truth": 0}, {"key": "34340916", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 1.9379392204082315e-07}, "ground_truth": 1}, {"key": "34340916", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 4.071561761187389e-07}, "ground_truth": 0}, {"key": "34340916", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999976607241361, "res": {"Yes": 0.9999976607241361, "No": 2.2936525753670373e-06}, "ground_truth": 0}, {"key": "30375089", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9987754034310911, "res": {"Yes": 0.9987754034310911, "No": 0.0012245779805679135}, "ground_truth": 0}, {"key": "30375089", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9574579895820571, "res": {"Yes": 0.9574579895820571, "No": 0.04254189176225763}, "ground_truth": 0}, {"key": "30375089", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998980116571646, "res": {"Yes": 0.9998980116571646, "No": 0.00010189945154873315}, "ground_truth": 1}, {"key": "30375089", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999253068846696, "res": {"Yes": 0.9999253068846696, "No": 7.462477509935767e-05}, "ground_truth": 0}, {"key": "30375089", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.999983356420506, "res": {"Yes": 0.999983356420506, "No": 1.6489924498939576e-05}, "ground_truth": 0}, {"key": "35807797", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.15188591487195569, "res": {"No": 0.848111988499375, "Yes": 0.15188591487195569}, "ground_truth": 0}, {"key": "35807797", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9986768266668005, "res": {"Yes": 0.9986768266668005, "No": 0.0013231305784061854}, "ground_truth": 0}, {"key": "35807797", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9978374508859517, "res": {"Yes": 0.9978374508859517, "No": 0.002162513665222869}, "ground_truth": 1}, {"key": "35807797", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9980053107420619, "res": {"Yes": 0.9980053107420619, "No": 0.001994695842825637}, "ground_truth": 0}, {"key": "35807797", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.5333438931298048, "res": {"Yes": 0.5333438931298048, "No": 0.4666553510924088}, "ground_truth": 0}, {"key": "34188172", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9987450807705152, "res": {"Yes": 0.9987450807705152, "No": 0.0012548958473520403}, "ground_truth": 0}, {"key": "34188172", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 4.970262476854169e-07}, "ground_truth": 0}, {"key": "34188172", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "No": 9.362858610419247e-07}, "ground_truth": 1}, {"key": "34188172", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999219694395426, "res": {"Yes": 0.9999219694395426, "No": 7.79754908908502e-05}, "ground_truth": 0}, {"key": "34188172", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.999983356420506, "res": {"Yes": 0.999983356420506, "No": 1.6551612977732918e-05}, "ground_truth": 0}, {"key": "37075567", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.991705253832317, "res": {"Yes": 0.991705253832317, "No": 0.008294662859394581}, "ground_truth": 0}, {"key": "37075567", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.4924997627972789, "res": {"No": 0.5074999330435982, "Yes": 0.4924997627972789}, "ground_truth": 0}, {"key": "37075567", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5702246345996674, "res": {"Yes": 0.5702246345996674, "No": 0.4297751345430531}, "ground_truth": 1}, {"key": "37075567", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9992074785511471, "res": {"Yes": 0.9992074785511471, "No": 0.0007925230242131249}, "ground_truth": 0}, {"key": "37075567", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.0708541032439141, "res": {"No": 0.9291456601943658, "Yes": 0.0708541032439141}, "ground_truth": 0}, {"key": "35559735", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.7909170926858541, "res": {"Yes": 0.7909170926858541, "No": 0.20908266058865907}, "ground_truth": 0}, {"key": "35559735", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999742971333243, "res": {"Yes": 0.9999742971333243, "No": 2.558255203448697e-05}, "ground_truth": 0}, {"key": "35559735", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999965878943212, "res": {"Yes": 0.9999965878943212, "No": 3.3715836438115843e-06}, "ground_truth": 1}, {"key": "35559735", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999985978860297, "res": {"Yes": 0.999985978860297, "No": 1.396157003867758e-05}, "ground_truth": 0}, {"key": "35559735", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997861154465632, "res": {"Yes": 0.9997861154465632, "No": 0.00021375415750943952}, "ground_truth": 0}, {"key": "33005019", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.698596679289223, "res": {"Yes": 0.698596679289223, "No": 0.301402891280696}, "ground_truth": 0}, {"key": "33005019", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998661953626391, "res": {"Yes": 0.9998661953626391, "No": 0.00013378292958317358}, "ground_truth": 0}, {"key": "33005019", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999912237625114, "res": {"Yes": 0.9999912237625114, "No": 8.645375536833262e-06}, "ground_truth": 1}, {"key": "33005019", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999919389784903, "res": {"Yes": 0.9999919389784903, "No": 7.943504557873037e-06}, "ground_truth": 0}, {"key": "33005019", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999565364005696, "res": {"Yes": 0.9999565364005696, "No": 4.334214806407223e-05}, "ground_truth": 0}, {"key": "30808252", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 1.4694798064069914e-05, "res": {"No": 0.9999847868417213, "Yes": 1.4694798064069914e-05}, "ground_truth": 0}, {"key": "30808252", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999815683978641, "res": {"Yes": 0.9999815683978641, "No": 1.8340029181456636e-05}, "ground_truth": 0}, {"key": "30808252", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9991370306124431, "res": {"Yes": 0.9991370306124431, "No": 0.0008629230815462122}, "ground_truth": 1}, {"key": "30808252", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.998687410799763, "res": {"Yes": 0.998687410799763, "No": 0.0013122818327259602}, "ground_truth": 0}, {"key": "30808252", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.8689088412214994, "res": {"Yes": 0.8689088412214994, "No": 0.13109007456039765}, "ground_truth": 0}, {"key": "15159017", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9964957629201932, "res": {"Yes": 0.9964957629201932, "No": 0.0035042757963548625}, "ground_truth": 0}, {"key": "15159017", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.8392187747085338, "res": {"Yes": 0.8392187747085338, "No": 0.1607810785290932}, "ground_truth": 0}, {"key": "15159017", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9993494905730591, "res": {"Yes": 0.9993494905730591, "No": 0.0006504741496512697}, "ground_truth": 1}, {"key": "15159017", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9983497491638257, "res": {"Yes": 0.9983497491638257, "No": 0.0016501913963648395}, "ground_truth": 0}, {"key": "15159017", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999688139202959, "res": {"Yes": 0.9999688139202959, "No": 3.113400275070822e-05}, "ground_truth": 0}, {"key": "24493400", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9779972243884173, "res": {"Yes": 0.9779972243884173, "No": 0.022002691328329913}, "ground_truth": 0}, {"key": "24493400", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999614235510903, "res": {"Yes": 0.9999614235510903, "No": 3.8551135620889225e-05}, "ground_truth": 0}, {"key": "24493400", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999052823659984, "res": {"Yes": 0.9999052823659984, "No": 9.467907908227338e-05}, "ground_truth": 1}, {"key": "24493400", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994611711504329, "res": {"Yes": 0.9994611711504329, "No": 0.0005387117051504094}, "ground_truth": 0}, {"key": "24493400", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999905085465441, "res": {"Yes": 0.9999905085465441, "No": 9.44464163526578e-06}, "ground_truth": 0}, {"key": "37791071", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 5.797209325337599e-07, "res": {"No": 0.9999993295729247, "Yes": 5.797209325337599e-07}, "ground_truth": 0}, {"key": "37791071", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.99951534443347, "res": {"Yes": 0.99951534443347, "No": 0.0004845848562839889}, "ground_truth": 0}, {"key": "37791071", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9990962128918158, "res": {"Yes": 0.9990962128918158, "No": 0.0009037059744313848}, "ground_truth": 1}, {"key": "37791071", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999975415208221, "res": {"Yes": 0.9999975415208221, "No": 2.3753043056279383e-06}, "ground_truth": 0}, {"key": "37791071", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999874092852638, "res": {"Yes": 0.9999874092852638, "No": 1.2528894765623e-05}, "ground_truth": 0}, {"key": "33528627", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.45962552367896053, "res": {"No": 0.5403742953484015, "Yes": 0.45962552367896053}, "ground_truth": 0}, {"key": "33528627", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.8446720951617963, "res": {"Yes": 0.8446720951617963, "No": 0.15532748258434392}, "ground_truth": 0}, {"key": "33528627", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9992310477953912, "res": {"Yes": 0.9992310477953912, "No": 0.0007688470522367287}, "ground_truth": 1}, {"key": "33528627", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9855919399961168, "res": {"Yes": 0.9855919399961168, "No": 0.014407923221535586}, "ground_truth": 0}, {"key": "33528627", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.988889516436123, "res": {"Yes": 0.988889516436123, "No": 0.01111044080166737}, "ground_truth": 0}, {"key": "39925662", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9995348808814496, "res": {"Yes": 0.9995348808814496, "No": 0.00046505843202625127}, "ground_truth": 0}, {"key": "39925662", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999027793262361, "res": {"Yes": 0.9999027793262361, "No": 9.717287006647809e-05}, "ground_truth": 0}, {"key": "39925662", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 4.614749795343902e-07}, "ground_truth": 1}, {"key": "39925662", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998986076205548, "res": {"Yes": 0.9998986076205548, "No": 0.00010133831350529257}, "ground_truth": 0}, {"key": "39925662", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.999738221619941, "res": {"Yes": 0.999738221619941, "No": 0.00026171877723726395}, "ground_truth": 0}, {"key": "29213416", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 7.132447653809637e-05, "res": {"No": 0.9999275715930637, "Yes": 7.132447653809637e-05}, "ground_truth": 0}, {"key": "29213416", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9993818751179455, "res": {"Yes": 0.9993818751179455, "No": 0.0006179120651697576}, "ground_truth": 0}, {"key": "29213416", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9897765172382808, "res": {"Yes": 0.9897765172382808, "No": 0.010223050043826303}, "ground_truth": 1}, {"key": "29213416", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8688396440768513, "res": {"Yes": 0.8688396440768513, "No": 0.1311596494105348}, "ground_truth": 0}, {"key": "29213416", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9974943123890383, "res": {"Yes": 0.9974943123890383, "No": 0.0025055174472715896}, "ground_truth": 0}, {"key": "34492745", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.10072233564794253, "res": {"No": 0.8992775052465998, "Yes": 0.10072233564794253}, "ground_truth": 0}, {"key": "34492745", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997172511609895, "res": {"Yes": 0.9997172511609895, "No": 0.00028264609234637876}, "ground_truth": 0}, {"key": "34492745", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999030177130861, "res": {"Yes": 0.9999030177130861, "No": 9.685863631661276e-05}, "ground_truth": 1}, {"key": "34492745", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999531988512079, "res": {"Yes": 0.9999531988512079, "No": 4.669135631664263e-05}, "ground_truth": 0}, {"key": "34492745", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996686420111889, "res": {"Yes": 0.9996686420111889, "No": 0.0003312762296333238}, "ground_truth": 0}, {"key": "34191937", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.0002041419444404987, "res": {"No": 0.9997956460731088, "Yes": 0.0002041419444404987}, "ground_truth": 0}, {"key": "34191937", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997806332644017, "res": {"Yes": 0.9997806332644017, "No": 0.0002193201551532575}, "ground_truth": 0}, {"key": "34191937", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999217310531738, "res": {"Yes": 0.9999217310531738, "No": 7.817442577705625e-05}, "ground_truth": 1}, {"key": "34191937", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9975424683873396, "res": {"Yes": 0.9975424683873396, "No": 0.002457511564882017}, "ground_truth": 0}, {"key": "34191937", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9989315588900942, "res": {"Yes": 0.9989315588900942, "No": 0.0010682505164350546}, "ground_truth": 0}, {"key": "34933372", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 3.3272480734554165e-06, "res": {"No": 0.9999965878943212, "Yes": 3.3272480734554165e-06}, "ground_truth": 0}, {"key": "34933372", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.4074593523951552, "res": {"No": 0.5925401171469841, "Yes": 0.4074593523951552}, "ground_truth": 0}, {"key": "34933372", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8946784043754906, "res": {"Yes": 0.8946784043754906, "No": 0.10532115266567384}, "ground_truth": 1}, {"key": "34933372", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9447691500427936, "res": {"Yes": 0.9447691500427936, "No": 0.05523056307653343}, "ground_truth": 0}, {"key": "34933372", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9977785835057207, "res": {"Yes": 0.9977785835057207, "No": 0.002221294821935155}, "ground_truth": 0}, {"key": "38714379", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.0007993401365241937, "res": {"No": 0.9992005775085594, "Yes": 0.0007993401365241937}, "ground_truth": 0}, {"key": "38714379", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 5.0589682430086654e-06, "res": {"No": 0.9999947998470209, "Yes": 5.0589682430086654e-06}, "ground_truth": 0}, {"key": "38714379", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.01306005687817196, "res": {"No": 0.9869398003124651, "Yes": 0.01306005687817196}, "ground_truth": 1}, {"key": "38714379", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 2.0418446734505832e-07, "res": {"No": 0.9999996871837189, "Yes": 2.0418446734505832e-07}, "ground_truth": 0}, {"key": "38714379", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.00042520570260654413, "res": {"No": 0.9995746588631997, "Yes": 0.00042520570260654413}, "ground_truth": 0}, {"key": "39220660", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9519066268889781, "res": {"Yes": 0.9519066268889781, "No": 0.04809326656131268}, "ground_truth": 0}, {"key": "39220660", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.8845192074641898, "res": {"Yes": 0.8845192074641898, "No": 0.11548056864656131}, "ground_truth": 0}, {"key": "39220660", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.002533645764170981, "res": {"No": 0.9974663135156476, "Yes": 0.002533645764170981}, "ground_truth": 1}, {"key": "39220660", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.1487253890514511, "res": {"No": 0.8512742537505622, "Yes": 0.1487253890514511}, "ground_truth": 0}, {"key": "39220660", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.000408352616780516, "res": {"No": 0.9995915750781753, "Yes": 0.000408352616780516}, "ground_truth": 0}, {"key": "41028780", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.05487474799190439, "res": {"No": 0.9451248914086436, "Yes": 0.05487474799190439}, "ground_truth": 0}, {"key": "41028780", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.6051473481159824, "res": {"Yes": 0.6051473481159824, "No": 0.3948523939980278}, "ground_truth": 0}, {"key": "41028780", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.011962673723115581, "res": {"No": 0.9880371487418358, "Yes": 0.011962673723115581}, "ground_truth": 1}, {"key": "41028780", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.010347156824038358, "res": {"No": 0.9896527380836764, "Yes": 0.010347156824038358}, "ground_truth": 0}, {"key": "41028780", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9995768036328547, "res": {"Yes": 0.9995768036328547, "No": 0.00042318547493349257}, "ground_truth": 0}, {"key": "39457108", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.004797506381074648, "res": {"No": 0.9952024153505664, "Yes": 0.004797506381074648}, "ground_truth": 0}, {"key": "39457108", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999975415208221, "res": {"Yes": 0.9999975415208221, "No": 2.380302446484093e-06}, "ground_truth": 0}, {"key": "39457108", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999856212553752, "res": {"Yes": 0.9999856212553752, "No": 1.4259873699427376e-05}, "ground_truth": 1}, {"key": "39457108", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999938462231346, "res": {"Yes": 0.9999938462231346, "No": 6.072088153152275e-06}, "ground_truth": 0}, {"key": "39457108", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9570411638757028, "res": {"Yes": 0.9570411638757028, "No": 0.042958377379098814}, "ground_truth": 0}, {"key": "38288018", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 8.256893811822785e-05, "res": {"No": 0.9999172016779703, "Yes": 8.256893811822785e-05}, "ground_truth": 0}, {"key": "38288018", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9966416245009052, "res": {"Yes": 0.9966416245009052, "No": 0.0033583939847681133}, "ground_truth": 0}, {"key": "38288018", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9951360783637647, "res": {"Yes": 0.9951360783637647, "No": 0.004863871272627508}, "ground_truth": 1}, {"key": "38288018", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8881112129706561, "res": {"Yes": 0.8881112129706561, "No": 0.11188849320569882}, "ground_truth": 0}, {"key": "38288018", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9832323406643262, "res": {"Yes": 0.9832323406643262, "No": 0.01676742940881232}, "ground_truth": 0}, {"key": "40106293", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.0002732001175491442, "res": {"No": 0.9997263044414649, "Yes": 0.0002732001175491442}, "ground_truth": 0}, {"key": "40106293", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999704827216435, "res": {"Yes": 0.9999704827216435, "No": 2.9460740837866482e-05}, "ground_truth": 0}, {"key": "40106293", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998789411982082, "res": {"Yes": 0.9998789411982082, "No": 0.00012100774574438064}, "ground_truth": 1}, {"key": "40106293", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997801565491138, "res": {"Yes": 0.9997801565491138, "No": 0.00021974187410267064}, "ground_truth": 0}, {"key": "40106293", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9988788680385635, "res": {"Yes": 0.9988788680385635, "No": 0.0011210304186267614}, "ground_truth": 0}, {"key": "39948797", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.003700269959376155, "res": {"No": 0.996299658997779, "Yes": 0.003700269959376155}, "ground_truth": 0}, {"key": "39948797", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994574815563783, "res": {"Yes": 0.9994574815563783, "No": 0.000542406954338361}, "ground_truth": 0}, {"key": "39948797", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999241149394169, "res": {"Yes": 0.9999241149394169, "No": 7.582609980331956e-05}, "ground_truth": 1}, {"key": "39948797", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999505764963816, "res": {"Yes": 0.9999505764963816, "No": 4.930110812439408e-05}, "ground_truth": 0}, {"key": "39948797", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998192401790255, "res": {"Yes": 0.9998192401790255, "No": 0.0001806198520978602}, "ground_truth": 0}, {"key": "31853399", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.07846694701476287, "res": {"No": 0.9215319267243481, "Yes": 0.07846694701476287}, "ground_truth": 0}, {"key": "31853399", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9428932776283192, "res": {"Yes": 0.9428932776283192, "No": 0.05710606219026424}, "ground_truth": 0}, {"key": "31853399", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998691750683278, "res": {"Yes": 0.9998691750683278, "No": 0.00013077045809410992}, "ground_truth": 1}, {"key": "31853399", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999727475263555, "res": {"Yes": 0.9999727475263555, "No": 2.715234889111624e-05}, "ground_truth": 0}, {"key": "31853399", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996298135659125, "res": {"Yes": 0.9996298135659125, "No": 0.0003701169948537446}, "ground_truth": 0}, {"key": "35273252", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.3282196270538876, "res": {"No": 0.6717797803361792, "Yes": 0.3282196270538876}, "ground_truth": 0}, {"key": "35273252", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9266214287989994, "res": {"Yes": 0.9266214287989994, "No": 0.07337840474017646}, "ground_truth": 0}, {"key": "35273252", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9957409948963161, "res": {"Yes": 0.9957409948963161, "No": 0.004258935638653191}, "ground_truth": 1}, {"key": "35273252", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9903589082173825, "res": {"Yes": 0.9903589082173825, "No": 0.009641004254573215}, "ground_truth": 0}, {"key": "35273252", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.0005073933585829253, "res": {"No": 0.9994924906764456, "Yes": 0.0005073933585829253}, "ground_truth": 0}, {"key": "37130459", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9997593007127227, "res": {"Yes": 0.9997593007127227, "No": 0.00024059272302438998}, "ground_truth": 0}, {"key": "37130459", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999968263007362, "res": {"Yes": 0.9999968263007362, "No": 1.972452234964873e-06}, "ground_truth": 0}, {"key": "37130459", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999899125338788, "res": {"Yes": 0.9999899125338788, "No": 9.694420927863715e-06}, "ground_truth": 1}, {"key": "37130459", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999983356420506, "res": {"Yes": 0.999983356420506, "No": 1.639915088138213e-05}, "ground_truth": 0}, {"key": "37130459", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.999950338100193, "res": {"Yes": 0.999950338100193, "No": 4.942837035166676e-05}, "ground_truth": 0}, {"key": "21734003", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9976993111509193, "res": {"Yes": 0.9976993111509193, "No": 0.0023006819499872795}, "ground_truth": 0}, {"key": "21734003", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999976607241361, "res": {"Yes": 0.9999976607241361, "No": 2.2266329531485547e-06}, "ground_truth": 0}, {"key": "21734003", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999949190499081, "res": {"Yes": 0.9999949190499081, "No": 5.054164505314495e-06}, "ground_truth": 1}, {"key": "21734003", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 1.0419048776895233e-07}, "ground_truth": 0}, {"key": "21734003", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, "No": 1.1197971557470183e-06}, "ground_truth": 0}, {"key": "33990737", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9991727285079002, "res": {"Yes": 0.9991727285079002, "No": 0.0008272334643994859}, "ground_truth": 0}, {"key": "33990737", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.5975133513921107, "res": {"Yes": 0.5975133513921107, "No": 0.4024865696537887}, "ground_truth": 0}, {"key": "33990737", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999653571300781, "res": {"Yes": 0.9999653571300781, "No": 3.453542324363851e-05}, "ground_truth": 1}, {"key": "33990737", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999828796125555, "res": {"Yes": 0.9999828796125555, "No": 1.703068229149053e-05}, "ground_truth": 0}, {"key": "33990737", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.820703516170681, "res": {"Yes": 0.820703516170681, "No": 0.1792963420252437}, "ground_truth": 0}, {"key": "34559912", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.0008712614098684733, "res": {"No": 0.9991287010016021, "Yes": 0.0008712614098684733}, "ground_truth": 0}, {"key": "34559912", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.973059549223818, "res": {"Yes": 0.973059549223818, "No": 0.026940390848353032}, "ground_truth": 0}, {"key": "34559912", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9924696743486924, "res": {"Yes": 0.9924696743486924, "No": 0.0075302834132260224}, "ground_truth": 1}, {"key": "34559912", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9987673147813623, "res": {"Yes": 0.9987673147813623, "No": 0.0012326006402571413}, "ground_truth": 0}, {"key": "34559912", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9985297827298907, "res": {"Yes": 0.9985297827298907, "No": 0.0014701774076458945}, "ground_truth": 0}, {"key": "39820439", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.7698795845999746, "res": {"Yes": 0.7698795845999746, "No": 0.23011983131912045}, "ground_truth": 0}, {"key": "39820439", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9953257016951301, "res": {"Yes": 0.9953257016951301, "No": 0.004674004874355754}, "ground_truth": 0}, {"key": "39820439", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998950318795388, "res": {"Yes": 0.9998950318795388, "No": 0.00010470360593928494}, "ground_truth": 1}, {"key": "39820439", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997626376149095, "res": {"Yes": 0.9997626376149095, "No": 0.00023722677921895078}, "ground_truth": 0}, {"key": "39820439", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9993102017202969, "res": {"Yes": 0.9993102017202969, "No": 0.0006897574802874907}, "ground_truth": 0}, {"key": "34759328", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9920674292232659, "res": {"Yes": 0.9920674292232659, "No": 0.007932468543788542}, "ground_truth": 0}, {"key": "34759328", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9973420338638138, "res": {"Yes": 0.9973420338638138, "No": 0.002657895592331548}, "ground_truth": 0}, {"key": "34759328", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9851347580151901, "res": {"Yes": 0.9851347580151901, "No": 0.014865190608161353}, "ground_truth": 1}, {"key": "34759328", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999156521574794, "res": {"Yes": 0.9999156521574794, "No": 8.431405338749773e-05}, "ground_truth": 0}, {"key": "34759328", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.00021642510623492883, "res": {"No": 0.999783493530909, "Yes": 0.00021642510623492883}, "ground_truth": 0}, {"key": "36939137", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9217502505942554, "res": {"Yes": 0.9217502505942554, "No": 0.07824967522889066}, "ground_truth": 0}, {"key": "36939137", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9942765623344823, "res": {"Yes": 0.9942765623344823, "No": 0.005723384229679841}, "ground_truth": 0}, {"key": "36939137", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9991989136411431, "res": {"Yes": 0.9991989136411431, "No": 0.0008009932803558137}, "ground_truth": 1}, {"key": "36939137", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8566126724635589, "res": {"Yes": 0.8566126724635589, "No": 0.14338715922205206}, "ground_truth": 0}, {"key": "36939137", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.4641205984844219, "res": {"No": 0.5358792891949593, "Yes": 0.4641205984844219}, "ground_truth": 0}, {"key": "35851522", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.8587770213765077, "res": {"Yes": 0.8587770213765077, "No": 0.14122258730034243}, "ground_truth": 0}, {"key": "35851522", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9985550957807244, "res": {"Yes": 0.9985550957807244, "No": 0.0014447957744018628}, "ground_truth": 0}, {"key": "35851522", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994179465188867, "res": {"Yes": 0.9994179465188867, "No": 0.0005820035561663751}, "ground_truth": 1}, {"key": "35851522", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9981617009453143, "res": {"Yes": 0.9981617009453143, "No": 0.0018383079919814882}, "ground_truth": 0}, {"key": "35851522", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998405740096741, "res": {"Yes": 0.9998405740096741, "No": 0.00015938896856407893}, "ground_truth": 0}, {"key": "22412782", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.054864470914083904, "res": {"No": 0.9451354324453395, "Yes": 0.054864470914083904}, "ground_truth": 0}, {"key": "22412782", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.99997334352929, "res": {"Yes": 0.99997334352929, "No": 2.656136079279512e-05}, "ground_truth": 0}, {"key": "22412782", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998788220126454, "res": {"Yes": 0.9998788220126454, "No": 0.0001211245745798708}, "ground_truth": 1}, {"key": "22412782", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999994561441089, "res": {"Yes": 0.999994561441089, "No": 5.319980074566353e-06}, "ground_truth": 0}, {"key": "22412782", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9990007977353684, "res": {"Yes": 0.9990007977353684, "No": 0.0009992200832754114}, "ground_truth": 0}, {"key": "38579227", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 8.608606554398416e-05, "res": {"No": 0.9999137450601713, "Yes": 8.608606554398416e-05}, "ground_truth": 0}, {"key": "38579227", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9019425705137192, "res": {"Yes": 0.9019425705137192, "No": 0.09805700360273348}, "ground_truth": 0}, {"key": "38579227", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9153737474025339, "res": {"Yes": 0.9153737474025339, "No": 0.08462590953320592}, "ground_truth": 1}, {"key": "38579227", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9583727852967219, "res": {"Yes": 0.9583727852967219, "No": 0.04162676205191281}, "ground_truth": 0}, {"key": "38579227", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9342153442807936, "res": {"Yes": 0.9342153442807936, "No": 0.06578415994172453}, "ground_truth": 0}, {"key": "37206995", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.8461692925482784, "res": {"Yes": 0.8461692925482784, "No": 0.15383047416854984}, "ground_truth": 0}, {"key": "37206995", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.99820469379362, "res": {"Yes": 0.99820469379362, "No": 0.0017952399304240396}, "ground_truth": 0}, {"key": "37206995", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.1427277513551572, "res": {"No": 0.8572718667025075, "Yes": 0.1427277513551572}, "ground_truth": 1}, {"key": "37206995", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.995830238398163, "res": {"Yes": 0.995830238398163, "No": 0.004169754991222889}, "ground_truth": 0}, {"key": "37206995", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.02304640930958992, "res": {"No": 0.9769535505569554, "Yes": 0.02304640930958992}, "ground_truth": 0}, {"key": "38700847", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9358260816241627, "res": {"Yes": 0.9358260816241627, "No": 0.06417344391790092}, "ground_truth": 0}, {"key": "38700847", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.3185281579090363, "res": {"No": 0.6814716419067669, "Yes": 0.3185281579090363}, "ground_truth": 0}, {"key": "38700847", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.22093337356481957, "res": {"No": 0.7790661890755469, "Yes": 0.22093337356481957}, "ground_truth": 1}, {"key": "38700847", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8941933562816549, "res": {"Yes": 0.8941933562816549, "No": 0.10580632275015035}, "ground_truth": 0}, {"key": "38700847", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9300202117560256, "res": {"Yes": 0.9300202117560256, "No": 0.06997939061979404}, "ground_truth": 0}, {"key": "20246590", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9999886013079656, "res": {"Yes": 0.9999886013079656, "No": 1.1324364533519642e-05}, "ground_truth": 0}, {"key": "20246590", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996887772698445, "res": {"Yes": 0.9996887772698445, "No": 0.0003111061144383647}, "ground_truth": 0}, {"key": "20246590", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999883629027115, "res": {"Yes": 0.9999883629027115, "No": 1.1541295978810891e-05}, "ground_truth": 1}, {"key": "20246590", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998468908124082, "res": {"Yes": 0.9998468908124082, "No": 0.00015295145109972156}, "ground_truth": 0}, {"key": "20246590", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9674939087148339, "res": {"Yes": 0.9674939087148339, "No": 0.0325057689254566}, "ground_truth": 0}, {"key": "39141360", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.0019515623930744348, "res": {"No": 0.9980484156220788, "Yes": 0.0019515623930744348}, "ground_truth": 0}, {"key": "39141360", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997668087733017, "res": {"Yes": 0.9997668087733017, "No": 0.00023306294282505178}, "ground_truth": 0}, {"key": "39141360", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999748931371826, "res": {"Yes": 0.9999748931371826, "No": 2.5071896511788073e-05}, "ground_truth": 1}, {"key": "39141360", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.983169081530596, "res": {"Yes": 0.983169081530596, "No": 0.016830879740209838}, "ground_truth": 0}, {"key": "39141360", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.990686066896301, "res": {"Yes": 0.990686066896301, "No": 0.00931383546425937}, "ground_truth": 0}, {"key": "37906226", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.451590763387174, "res": {"No": 0.5484086654980802, "Yes": 0.451590763387174}, "ground_truth": 0}, {"key": "37906226", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.515361625138399, "res": {"Yes": 0.515361625138399, "No": 0.48463819312161194}, "ground_truth": 0}, {"key": "37906226", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9940776698461611, "res": {"Yes": 0.9940776698461611, "No": 0.005922219410035279}, "ground_truth": 1}, {"key": "37906226", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7757633474006421, "res": {"Yes": 0.7757633474006421, "No": 0.22423651805647915}, "ground_truth": 0}, {"key": "37906226", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9622618769501888, "res": {"Yes": 0.9622618769501888, "No": 0.037737793747925436}, "ground_truth": 0}, {"key": "16201033", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 3.3431001261088675e-06, "res": {"No": 0.9999965878943212, "Yes": 3.3431001261088675e-06}, "ground_truth": 0}, {"key": "16201033", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9986348629484767, "res": {"Yes": 0.9986348629484767, "No": 0.0013650479810644873}, "ground_truth": 0}, {"key": "16201033", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999963494876631, "res": {"Yes": 0.9999963494876631, "No": 3.6049758005358628e-06}, "ground_truth": 1}, {"key": "16201033", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998458181472437, "res": {"Yes": 0.9998458181472437, "No": 0.0001541267873178072}, "ground_truth": 0}, {"key": "16201033", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996450551078482, "res": {"Yes": 0.9996450551078482, "No": 0.0003548966278668865}, "ground_truth": 0}, {"key": "36469022", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 1.2622583589164663e-05, "res": {"No": 0.9999872900832717, "Yes": 1.2622583589164663e-05}, "ground_truth": 0}, {"key": "36469022", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.3869769880977587, "res": {"No": 0.6130230143191635, "Yes": 0.3869769880977587}, "ground_truth": 0}, {"key": "36469022", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9855323144729994, "res": {"Yes": 0.9855323144729994, "No": 0.014467633456304823}, "ground_truth": 1}, {"key": "36469022", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.05478579686045631, "res": {"No": 0.9452140265595765, "Yes": 0.05478579686045631}, "ground_truth": 0}, {"key": "36469022", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.1303182817015136, "res": {"No": 0.8696816881256519, "Yes": 0.1303182817015136}, "ground_truth": 0}, {"key": "31295270", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 3.51021945376772e-06, "res": {"No": 0.9999961110815618, "Yes": 3.51021945376772e-06}, "ground_truth": 0}, {"key": "31295270", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.4091451319658385, "res": {"No": 0.5908547628002551, "Yes": 0.4091451319658385}, "ground_truth": 0}, {"key": "31295270", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9033810340041485, "res": {"Yes": 0.9033810340041485, "No": 0.0966188424854084}, "ground_truth": 1}, {"key": "31295270", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.997399545854076, "res": {"Yes": 0.997399545854076, "No": 0.002600430423906124}, "ground_truth": 0}, {"key": "31295270", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.8043518107111786, "res": {"Yes": 0.8043518107111786, "No": 0.19564808857256327}, "ground_truth": 0}, {"key": "35360689", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.0007107479441110954, "res": {"No": 0.9992891321163796, "Yes": 0.0007107479441110954}, "ground_truth": 0}, {"key": "35360689", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999704827216435, "res": {"Yes": 0.9999704827216435, "No": 2.9469609855578894e-05}, "ground_truth": 0}, {"key": "35360689", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999886013079656, "res": {"Yes": 0.9999886013079656, "No": 1.1264740728645634e-05}, "ground_truth": 1}, {"key": "35360689", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999818067994983, "res": {"Yes": 0.9999818067994983, "No": 1.8078774271795233e-05}, "ground_truth": 0}, {"key": "35360689", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9817002673009068, "res": {"Yes": 0.9817002673009068, "No": 0.0182993269225444}, "ground_truth": 0}, {"key": "29202793", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.008973037782387507, "res": {"No": 0.9910268898140875, "Yes": 0.008973037782387507}, "ground_truth": 0}, {"key": "29202793", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998364025435236, "res": {"Yes": 0.9998364025435236, "No": 0.00016352952547260094}, "ground_truth": 0}, {"key": "29202793", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9992201004798462, "res": {"Yes": 0.9992201004798462, "No": 0.0007798177651503796}, "ground_truth": 1}, {"key": "29202793", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994185421621662, "res": {"Yes": 0.9994185421621662, "No": 0.0005813334741110409}, "ground_truth": 0}, {"key": "29202793", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997968378602231, "res": {"Yes": 0.9997968378602231, "No": 0.0002030851923924812}, "ground_truth": 0}, {"key": "35999008", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 4.68124202637204e-05, "res": {"No": 0.999952483661937, "Yes": 4.68124202637204e-05}, "ground_truth": 0}, {"key": "35999008", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999921773835968, "res": {"Yes": 0.9999921773835968, "No": 7.706834695627795e-06}, "ground_truth": 0}, {"key": "35999008", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999710787232282, "res": {"Yes": 0.9999710787232282, "No": 2.8810158710919532e-05}, "ground_truth": 1}, {"key": "35999008", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9967763934129661, "res": {"Yes": 0.9967763934129661, "No": 0.003223624278679692}, "ground_truth": 0}, {"key": "35999008", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9986724292028113, "res": {"Yes": 0.9986724292028113, "No": 0.001327417040278276}, "ground_truth": 0}, {"key": "31797119", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.003961329411812887, "res": {"No": 0.9960385816015884, "Yes": 0.003961329411812887}, "ground_truth": 0}, {"key": "31797119", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999826412106655, "res": {"Yes": 0.9999826412106655, "No": 1.7230525650941112e-05}, "ground_truth": 0}, {"key": "31797119", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999541524340317, "res": {"Yes": 0.9999541524340317, "No": 4.5728080308353114e-05}, "ground_truth": 1}, {"key": "31797119", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999902701413353, "res": {"Yes": 0.9999902701413353, "No": 9.612752073166332e-06}, "ground_truth": 0}, {"key": "31797119", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.999418303950785, "res": {"Yes": 0.999418303950785, "No": 0.0005815666355799096}, "ground_truth": 0}, {"key": "26711893", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9658848623376336, "res": {"Yes": 0.9658848623376336, "No": 0.03411500029730371}, "ground_truth": 0}, {"key": "26711893", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9904584254705519, "res": {"Yes": 0.9904584254705519, "No": 0.009541390971594182}, "ground_truth": 0}, {"key": "26711893", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.009464370544678262, "res": {"No": 0.9905352721780325, "Yes": 0.009464370544678262}, "ground_truth": 1}, {"key": "26711893", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999283060723865, "res": {"Yes": 0.999283060723865, "No": 0.0007168482396135426}, "ground_truth": 0}, {"key": "26711893", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9881133787483959, "res": {"Yes": 0.9881133787483959, "No": 0.01188649377891594}, "ground_truth": 0}, {"key": "35348288", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9970870361364584, "res": {"Yes": 0.9970870361364584, "No": 0.002912886980719629}, "ground_truth": 0}, {"key": "35348288", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996949702810272, "res": {"Yes": 0.9996949702810272, "No": 0.0003049380246113335}, "ground_truth": 0}, {"key": "35348288", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999682179220609, "res": {"Yes": 0.9999682179220609, "No": 3.1659066154943824e-05}, "ground_truth": 1}, {"key": "35348288", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999298363015874, "res": {"Yes": 0.9999298363015874, "No": 7.005240171811858e-05}, "ground_truth": 0}, {"key": "35348288", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999614235510903, "res": {"Yes": 0.9999614235510903, "No": 3.848958010212559e-05}, "ground_truth": 0}, {"key": "38124131", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9996293369325348, "res": {"Yes": 0.9996293369325348, "No": 0.00037057029541879763}, "ground_truth": 0}, {"key": "38124131", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9972727930042032, "res": {"Yes": 0.9972727930042032, "No": 0.0027272112828331317}, "ground_truth": 0}, {"key": "38124131", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995753737792429, "res": {"Yes": 0.9995753737792429, "No": 0.00042449115020661384}, "ground_truth": 1}, {"key": "38124131", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999312666422455, "res": {"Yes": 0.9999312666422455, "No": 6.860281425146942e-05}, "ground_truth": 0}, {"key": "38124131", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996243322408098, "res": {"Yes": 0.9996243322408098, "No": 0.0003756351242349094}, "ground_truth": 0}, {"key": "20285901", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9583899719499843, "res": {"Yes": 0.9583899719499843, "No": 0.041609881660799306}, "ground_truth": 0}, {"key": "20285901", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9892986314557201, "res": {"Yes": 0.9892986314557201, "No": 0.010701308021345769}, "ground_truth": 1}, {"key": "20285901", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996301710335977, "res": {"Yes": 0.9996301710335977, "No": 0.00036980785426976825}, "ground_truth": 0}, {"key": "20285901", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 1.7742494083715975e-05, "res": {"No": 0.9999818067994983, "Yes": 1.7742494083715975e-05}, "ground_truth": 0}, {"key": "35633632", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9919105848497011, "res": {"Yes": 0.9919105848497011, "No": 0.008089184147746842}, "ground_truth": 0}, {"key": "35633632", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9978120443720114, "res": {"Yes": 0.9978120443720114, "No": 0.00218792297155214}, "ground_truth": 0}, {"key": "35633632", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999870516788303, "res": {"Yes": 0.9999870516788303, "No": 1.2927535326717716e-05}, "ground_truth": 1}, {"key": "35633632", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.00564374740838483, "res": {"No": 0.9943561079395458, "Yes": 0.00564374740838483}, "ground_truth": 0}, {"key": "35633632", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999965878943212, "res": {"Yes": 0.9999965878943212, "No": 3.3563650498226176e-06}, "ground_truth": 0}, {"key": "10741274", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9999809723900273, "res": {"Yes": 0.9999809723900273, "No": 1.900153105884345e-05}, "ground_truth": 0}, {"key": "10741274", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.998959159250891, "res": {"Yes": 0.998959159250891, "No": 0.001040826052347284}, "ground_truth": 0}, {"key": "10741274", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9993015095581035, "res": {"Yes": 0.9993015095581035, "No": 0.000698388094783187}, "ground_truth": 1}, {"key": "10741274", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998658378005529, "res": {"Yes": 0.9998658378005529, "No": 0.00013408213473557924}, "ground_truth": 0}, {"key": "10741274", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9990738414198521, "res": {"Yes": 0.9990738414198521, "No": 0.0009260729006620095}, "ground_truth": 0}, {"key": "30605795", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.24466591775188845, "res": {"No": 0.7553336853658023, "Yes": 0.24466591775188845}, "ground_truth": 0}, {"key": "30605795", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9334321566192602, "res": {"Yes": 0.9334321566192602, "No": 0.06656725426843575}, "ground_truth": 0}, {"key": "30605795", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994211594628599, "res": {"Yes": 0.9994211594628599, "No": 0.000578635970031342}, "ground_truth": 1}, {"key": "30605795", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996250472123729, "res": {"Yes": 0.9996250472123729, "No": 0.0003748467433230458}, "ground_truth": 0}, {"key": "30605795", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9934061812608791, "res": {"Yes": 0.9934061812608791, "No": 0.006593541959298383}, "ground_truth": 0}, {"key": "30539722", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9901465168359086, "res": {"Yes": 0.9901465168359086, "No": 0.009853450308566488}, "ground_truth": 0}, {"key": "30539722", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.997293301033053, "res": {"Yes": 0.997293301033053, "No": 0.002706621772849143}, "ground_truth": 0}, {"key": "30539722", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999946806438478, "res": {"Yes": 0.9999946806438478, "No": 5.256795170909939e-06}, "ground_truth": 1}, {"key": "30539722", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9970809888219228, "res": {"Yes": 0.9970809888219228, "No": 0.0029189754428778523}, "ground_truth": 0}, {"key": "30539722", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9938709706008746, "res": {"Yes": 0.9938709706008746, "No": 0.006128944069934182}, "ground_truth": 0}, {"key": "18639299", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.014530424302536291, "res": {"No": 0.9854690946028251, "Yes": 0.014530424302536291}, "ground_truth": 0}, {"key": "18639299", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999949190499081, "res": {"Yes": 0.9999949190499081, "No": 4.9956451252462e-06}, "ground_truth": 0}, {"key": "18639299", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999790651681366, "res": {"Yes": 0.9999790651681366, "No": 2.089996653899113e-05}, "ground_truth": 1}, {"key": "18639299", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999828796125555, "res": {"Yes": 0.9999828796125555, "No": 1.7077666512250078e-05}, "ground_truth": 0}, {"key": "18639299", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999664299234876, "res": {"Yes": 0.9999664299234876, "No": 3.3470101840697526e-05}, "ground_truth": 0}, {"key": "39773552", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9998980116571646, "res": {"Yes": 0.9998980116571646, "No": 0.00010190067434948852}, "ground_truth": 0}, {"key": "39773552", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999591587700257, "res": {"Yes": 0.9999591587700257, "No": 4.069843167670748e-05}, "ground_truth": 0}, {"key": "39773552", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999483117398842, "res": {"Yes": 0.9999483117398842, "No": 5.158508384694282e-05}, "ground_truth": 1}, {"key": "39773552", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999642843338196, "res": {"Yes": 0.9999642843338196, "No": 3.5630305647797805e-05}, "ground_truth": 0}, {"key": "39773552", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999437822452772, "res": {"Yes": 0.9999437822452772, "No": 5.61335996767613e-05}, "ground_truth": 0}, {"key": "34086410", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 3.304332429887676e-05, "res": {"No": 0.999966549126493, "Yes": 3.304332429887676e-05}, "ground_truth": 0}, {"key": "34086410", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999851444463448, "res": {"Yes": 0.9999851444463448, "No": 1.4809539855776886e-05}, "ground_truth": 0}, {"key": "34086410", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999193471666147, "res": {"Yes": 0.9999193471666147, "No": 8.052321741743399e-05}, "ground_truth": 1}, {"key": "34086410", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999961110815618, "res": {"Yes": 0.9999961110815618, "No": 3.776556282327409e-06}, "ground_truth": 0}, {"key": "34086410", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9995525076845663, "res": {"Yes": 0.9995525076845663, "No": 0.00044741745292595626}, "ground_truth": 0}, {"key": "35454652", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9580326022635438, "res": {"Yes": 0.9580326022635438, "No": 0.04196732246810167}, "ground_truth": 0}, {"key": "35454652", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999579667834204, "res": {"Yes": 0.9999579667834204, "No": 4.196662073761987e-05}, "ground_truth": 0}, {"key": "35454652", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999962302846054, "res": {"Yes": 0.9999962302846054, "No": 3.6500744256088184e-06}, "ground_truth": 1}, {"key": "35454652", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999939654258081, "res": {"Yes": 0.9999939654258081, "No": 5.98867423530031e-06}, "ground_truth": 0}, {"key": "35454652", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 8.719428100441191e-07}, "ground_truth": 0}, {"key": "36158310", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 1.4890756494683575e-06, "res": {"No": 0.9999981375378344, "Yes": 1.4890756494683575e-06}, "ground_truth": 0}, {"key": "36158310", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999450934134217, "res": {"Yes": 0.9999450934134217, "No": 5.4821176838485345e-05}, "ground_truth": 0}, {"key": "36158310", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999923042191404, "res": {"Yes": 0.999923042191404, "No": 7.687421832648898e-05}, "ground_truth": 1}, {"key": "36158310", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998709628860046, "res": {"Yes": 0.9998709628860046, "No": 0.00012899395695628414}, "ground_truth": 0}, {"key": "36158310", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999620195462757, "res": {"Yes": 0.9999620195462757, "No": 3.782266620481409e-05}, "ground_truth": 0}, {"key": "35688387", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 4.7492307186317236e-07, "res": {"No": 0.9999993295729247, "Yes": 4.7492307186317236e-07}, "ground_truth": 0}, {"key": "35688387", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9690824461157501, "res": {"Yes": 0.9690824461157501, "No": 0.03091693459669117}, "ground_truth": 0}, {"key": "35688387", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9990462377805931, "res": {"Yes": 0.9990462377805931, "No": 0.0009537535053863902}, "ground_truth": 1}, {"key": "35688387", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9862524260024836, "res": {"Yes": 0.9862524260024836, "No": 0.013747384378268938}, "ground_truth": 0}, {"key": "35688387", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.6393821497078498, "res": {"Yes": 0.6393821497078498, "No": 0.3606172256289945}, "ground_truth": 0}, {"key": "34209292", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 5.62069018436003e-05, "res": {"No": 0.9999436630499856, "Yes": 5.62069018436003e-05}, "ground_truth": 0}, {"key": "34209292", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 4.878596865493722e-07}, "ground_truth": 0}, {"key": "34209292", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 3.086144727589164e-07}, "ground_truth": 1}, {"key": "34209292", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999891973193493, "res": {"Yes": 0.9999891973193493, "No": 1.0685733755521866e-05}, "ground_truth": 0}, {"key": "34209292", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 5.041857929496676e-07}, "ground_truth": 0}, {"key": "25037859", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.3027718280170431, "res": {"No": 0.6972280944744826, "Yes": 0.3027718280170431}, "ground_truth": 0}, {"key": "25037859", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9983738673887166, "res": {"Yes": 0.9983738673887166, "No": 0.0016261288946432316}, "ground_truth": 0}, {"key": "25037859", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "No": 9.003281760784262e-07}, "ground_truth": 1}, {"key": "25037859", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999933694113825, "res": {"Yes": 0.9999933694113825, "No": 6.4942655079653365e-06}, "ground_truth": 0}, {"key": "25037859", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999893165220688, "res": {"Yes": 0.9999893165220688, "No": 1.0565875947669469e-05}, "ground_truth": 0}, {"key": "36412121", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.01826160519421795, "res": {"No": 0.9817379525523748, "Yes": 0.01826160519421795}, "ground_truth": 0}, {"key": "36412121", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997844469448988, "res": {"Yes": 0.9997844469448988, "No": 0.00021543356047850392}, "ground_truth": 0}, {"key": "36412121", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999394911657333, "res": {"Yes": 0.9999394911657333, "No": 6.036494513910101e-05}, "ground_truth": 1}, {"key": "36412121", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999692907215395, "res": {"Yes": 0.9999692907215395, "No": 3.0671827368071574e-05}, "ground_truth": 0}, {"key": "36412121", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9933407623529049, "res": {"Yes": 0.9933407623529049, "No": 0.006659079614505396}, "ground_truth": 0}, {"key": "34909172", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9994442608402433, "res": {"Yes": 0.9994442608402433, "No": 0.000555660700995561}, "ground_truth": 0}, {"key": "34909172", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998844239234294, "res": {"Yes": 0.9998844239234294, "No": 0.00011544145957471227}, "ground_truth": 0}, {"key": "34909172", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996552993429273, "res": {"Yes": 0.9996552993429273, "No": 0.0003446786583676658}, "ground_truth": 1}, {"key": "34909172", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998062530813588, "res": {"Yes": 0.9998062530813588, "No": 0.00019363918533747164}, "ground_truth": 0}, {"key": "34909172", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999903893441826, "res": {"Yes": 0.9999903893441826, "No": 9.556997015160438e-06}, "ground_truth": 0}, {"key": "39011806", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.2898534582779058, "res": {"No": 0.7101461180857083, "Yes": 0.2898534582779058}, "ground_truth": 0}, {"key": "39011806", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.6745813788378117, "res": {"Yes": 0.6745813788378117, "No": 0.32541825030954097}, "ground_truth": 0}, {"key": "39011806", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.13635924071042158, "res": {"No": 0.8636401672649956, "Yes": 0.13635924071042158}, "ground_truth": 1}, {"key": "39011806", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7946413803505059, "res": {"Yes": 0.7946413803505059, "No": 0.20535840690939688}, "ground_truth": 0}, {"key": "39011806", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.003365819867316821, "res": {"No": 0.9966339277663515, "Yes": 0.003365819867316821}, "ground_truth": 0}, {"key": "33096163", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9954142778787969, "res": {"Yes": 0.9954142778787969, "No": 0.004585654662192976}, "ground_truth": 0}, {"key": "33096163", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999756083404814, "res": {"Yes": 0.9999756083404814, "No": 2.4360563090893914e-05}, "ground_truth": 0}, {"key": "33096163", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999087389545426, "res": {"Yes": 0.9999087389545426, "No": 9.117698957595679e-05}, "ground_truth": 1}, {"key": "33096163", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999772771531678, "res": {"Yes": 0.9999772771531678, "No": 2.269463511399073e-05}, "ground_truth": 0}, {"key": "33096163", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.999972151525776, "res": {"Yes": 0.999972151525776, "No": 2.7734179179763355e-05}, "ground_truth": 0}, {"key": "38762205", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9979870267536537, "res": {"Yes": 0.9979870267536537, "No": 0.00201292779966402}, "ground_truth": 0}, {"key": "38762205", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999726283256111, "res": {"Yes": 0.9999726283256111, "No": 2.7307968825430035e-05}, "ground_truth": 0}, {"key": "38762205", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999963494876631, "res": {"Yes": 0.9999963494876631, "No": 3.5100544773303683e-06}, "ground_truth": 1}, {"key": "38762205", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999820452021894, "res": {"Yes": 0.9999820452021894, "No": 1.7829485185603277e-05}, "ground_truth": 0}, {"key": "38762205", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999747739361825, "res": {"Yes": 0.9999747739361825, "No": 2.5177319211699635e-05}, "ground_truth": 0}, {"key": "35519177", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.0007807933810813526, "res": {"No": 0.9992191476240126, "Yes": 0.0007807933810813526}, "ground_truth": 0}, {"key": "35519177", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999796611748367, "res": {"Yes": 0.9999796611748367, "No": 2.0300906860161044e-05}, "ground_truth": 0}, {"key": "35519177", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999346041184038, "res": {"Yes": 0.9999346041184038, "No": 6.534863536862398e-05}, "ground_truth": 1}, {"key": "35519177", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9989727198139334, "res": {"Yes": 0.9989727198139334, "No": 0.0010272674122412955}, "ground_truth": 0}, {"key": "35519177", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9912369760846005, "res": {"Yes": 0.9912369760846005, "No": 0.008762824451776905}, "ground_truth": 0}, {"key": "36192531", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9998381903125998, "res": {"Yes": 0.9998381903125998, "No": 0.00016165663648183673}, "ground_truth": 0}, {"key": "36192531", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 7.108232724463084e-07}, "ground_truth": 0}, {"key": "36192531", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 3.0051249594263764e-07}, "ground_truth": 1}, {"key": "36192531", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 4.0848444569717037e-07}, "ground_truth": 0}, {"key": "36192531", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 3.4691987203169745e-07}, "ground_truth": 0}, {"key": "33160852", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9998392629795794, "res": {"Yes": 0.9998392629795794, "No": 0.0001606939393492726}, "ground_truth": 0}, {"key": "33160852", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999717939237989, "res": {"Yes": 0.9999717939237989, "No": 2.8097302013180763e-05}, "ground_truth": 0}, {"key": "33160852", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999682179220609, "res": {"Yes": 0.9999682179220609, "No": 3.174637539277183e-05}, "ground_truth": 1}, {"key": "33160852", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999987335551019, "res": {"Yes": 0.9999987335551019, "No": 1.1481116599867423e-06}, "ground_truth": 0}, {"key": "33160852", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997053378914091, "res": {"Yes": 0.9997053378914091, "No": 0.00029460355111087265}, "ground_truth": 0}, {"key": "36312304", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.3803820639203286, "res": {"No": 0.619617721085681, "Yes": 0.3803820639203286}, "ground_truth": 0}, {"key": "36312304", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.6102574083024257, "res": {"Yes": 0.6102574083024257, "No": 0.38974242763805866}, "ground_truth": 0}, {"key": "36312304", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9986966855529451, "res": {"Yes": 0.9986966855529451, "No": 0.0013032749370359793}, "ground_truth": 1}, {"key": "36312304", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9993849688092665, "res": {"Yes": 0.9993849688092665, "No": 0.0006150262550074315}, "ground_truth": 0}, {"key": "36312304", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9994046108445075, "res": {"Yes": 0.9994046108445075, "No": 0.0005953768449766584}, "ground_truth": 0}, {"key": "33773343", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9999210158834096, "res": {"Yes": 0.9999210158834096, "No": 7.895811784020398e-05}, "ground_truth": 0}, {"key": "33773343", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.999995276659155, "res": {"Yes": 0.999995276659155, "No": 4.647932783037392e-06}, "ground_truth": 0}, {"key": "33773343", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999838547864799, "res": {"Yes": 0.999838547864799, "No": 0.00016140610137884985}, "ground_truth": 1}, {"key": "33773343", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999400871468467, "res": {"Yes": 0.9999400871468467, "No": 5.978558377902661e-05}, "ground_truth": 0}, {"key": "33773343", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999932502087799, "res": {"Yes": 0.9999932502087799, "No": 6.6563451521371164e-06}, "ground_truth": 0}, {"key": "34913320", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.006645040447021589, "res": {"No": 0.9933548798121379, "Yes": 0.006645040447021589}, "ground_truth": 0}, {"key": "34913320", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9730587610459024, "res": {"Yes": 0.9730587610459024, "No": 0.026940878473840385}, "ground_truth": 0}, {"key": "34913320", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9981280874178611, "res": {"Yes": 0.9981280874178611, "No": 0.0018719019115047465}, "ground_truth": 1}, {"key": "34913320", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999817094909176, "res": {"Yes": 0.999817094909176, "No": 0.0001827870031890532}, "ground_truth": 0}, {"key": "34913320", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.4583540768462853, "res": {"No": 0.5416452565699472, "Yes": 0.4583540768462853}, "ground_truth": 0}, {"key": "33784155", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9996119434534262, "res": {"Yes": 0.9996119434534262, "No": 0.00038797730977979564}, "ground_truth": 0}, {"key": "33784155", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994334267632664, "res": {"Yes": 0.9994334267632664, "No": 0.0005665512578204093}, "ground_truth": 0}, {"key": "33784155", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996352839250833, "res": {"Yes": 0.9996352839250833, "No": 0.00036462541194494835}, "ground_truth": 1}, {"key": "33784155", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999716747231683, "res": {"Yes": 0.9999716747231683, "No": 2.81935029051288e-05}, "ground_truth": 0}, {"key": "33784155", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.999758466493915, "res": {"Yes": 0.999758466493915, "No": 0.0002414309613665678}, "ground_truth": 0}, {"key": "24085062", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9998840663508249, "res": {"Yes": 0.9998840663508249, "No": 0.00011570195809613828}, "ground_truth": 0}, {"key": "24085062", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9991426247563727, "res": {"Yes": 0.9991426247563727, "No": 0.0008572422561710381}, "ground_truth": 0}, {"key": "24085062", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997602541136392, "res": {"Yes": 0.9997602541136392, "No": 0.00023960375976632433}, "ground_truth": 1}, {"key": "24085062", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9983303803682299, "res": {"Yes": 0.9983303803682299, "No": 0.001669369280898065}, "ground_truth": 0}, {"key": "24085062", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999614235510903, "res": {"Yes": 0.9999614235510903, "No": 3.849854921915093e-05}, "ground_truth": 0}, {"key": "33893487", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 9.773263714412986e-05, "res": {"No": 0.9999021833683587, "Yes": 9.773263714412986e-05}, "ground_truth": 0}, {"key": "33893487", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995285660599177, "res": {"Yes": 0.9995285660599177, "No": 0.00047131353783193134}, "ground_truth": 0}, {"key": "33893487", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996560143266428, "res": {"Yes": 0.9996560143266428, "No": 0.00034395380410537146}, "ground_truth": 1}, {"key": "33893487", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999410407211666, "res": {"Yes": 0.9999410407211666, "No": 5.8942568699761944e-05}, "ground_truth": 0}, {"key": "33893487", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.0006470683678727322, "res": {"No": 0.999352822449789, "Yes": 0.0006470683678727322}, "ground_truth": 0}, {"key": "40913011", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.392142768555372, "res": {"No": 0.607856320445206, "Yes": 0.392142768555372}, "ground_truth": 0}, {"key": "40913011", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 1.8869024287300736e-06, "res": {"No": 0.9999980183344636, "Yes": 1.8869024287300736e-06}, "ground_truth": 0}, {"key": "40913011", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9897045804208722, "res": {"Yes": 0.9897045804208722, "No": 0.010295293808074452}, "ground_truth": 1}, {"key": "40913011", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.878481702225914, "res": {"Yes": 0.878481702225914, "No": 0.12151781517445104}, "ground_truth": 0}, {"key": "40913011", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9167417053116709, "res": {"Yes": 0.9167417053116709, "No": 0.08325780506347896}, "ground_truth": 0}, {"key": "29642545", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9836892594942064, "res": {"Yes": 0.9836892594942064, "No": 0.016310723565965107}, "ground_truth": 0}, {"key": "29642545", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9744690153242377, "res": {"Yes": 0.9744690153242377, "No": 0.02553093052747255}, "ground_truth": 0}, {"key": "29642545", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8930439564221397, "res": {"Yes": 0.8930439564221397, "No": 0.10695592984175957}, "ground_truth": 1}, {"key": "29642545", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9986803943462687, "res": {"Yes": 0.9986803943462687, "No": 0.0013194912939952924}, "ground_truth": 0}, {"key": "29642545", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998149533032524, "res": {"Yes": 0.9998149533032524, "No": 0.00018491579475329474}, "ground_truth": 0}, {"key": "35969159", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9935300707099014, "res": {"Yes": 0.9935300707099014, "No": 0.006469875586132763}, "ground_truth": 0}, {"key": "35969159", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999120763555102, "res": {"Yes": 0.9999120763555102, "No": 8.78099269941263e-05}, "ground_truth": 0}, {"key": "35969159", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998768031293578, "res": {"Yes": 0.9998768031293578, "No": 0.00012311483749378782}, "ground_truth": 1}, {"key": "35969159", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999590395729012, "res": {"Yes": 0.9999590395729012, "No": 4.087908479161844e-05}, "ground_truth": 0}, {"key": "35969159", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999759659438225, "res": {"Yes": 0.9999759659438225, "No": 2.3990683271623087e-05}, "ground_truth": 0}, {"key": "37081669", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.006756697795960736, "res": {"No": 0.9932423168311799, "Yes": 0.006756697795960736}, "ground_truth": 0}, {"key": "37081669", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.999620161656856, "res": {"Yes": 0.999620161656856, "No": 0.0003796730767034257}, "ground_truth": 0}, {"key": "37081669", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999980183344636, "res": {"Yes": 0.9999980183344636, "No": 1.948277322883546e-06}, "ground_truth": 1}, {"key": "37081669", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999691715222073, "res": {"Yes": 0.9999691715222073, "No": 3.0745743378778454e-05}, "ground_truth": 0}, {"key": "37081669", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999971839107652, "res": {"Yes": 0.9999971839107652, "No": 2.6939662712667613e-06}, "ground_truth": 0}, {"key": "40048022", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.07934033036419273, "res": {"No": 0.9206593876110912, "Yes": 0.07934033036419273}, "ground_truth": 0}, {"key": "40048022", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995292809529897, "res": {"Yes": 0.9995292809529897, "No": 0.0004706115212154068}, "ground_truth": 0}, {"key": "40048022", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999869324773808, "res": {"Yes": 0.9999869324773808, "No": 1.299482963498807e-05}, "ground_truth": 1}, {"key": "40048022", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999249493064443, "res": {"Yes": 0.9999249493064443, "No": 7.499395857543572e-05}, "ground_truth": 0}, {"key": "40048022", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9994745075795313, "res": {"Yes": 0.9994745075795313, "No": 0.0005253870126470428}, "ground_truth": 0}, {"key": "32884004", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9831050183541703, "res": {"Yes": 0.9831050183541703, "No": 0.016894879350125142}, "ground_truth": 0}, {"key": "32884004", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999522452652937, "res": {"Yes": 0.9999522452652937, "No": 4.763818082878672e-05}, "ground_truth": 0}, {"key": "32884004", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999987886094374, "res": {"Yes": 0.999987886094374, "No": 1.2077866443163344e-05}, "ground_truth": 1}, {"key": "32884004", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999682179220609, "res": {"Yes": 0.9999682179220609, "No": 3.166717191337231e-05}, "ground_truth": 0}, {"key": "32884004", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999810915907662, "res": {"Yes": 0.9999810915907662, "No": 1.8850785007069616e-05}, "ground_truth": 0}, {"key": "39022490", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9994341415583088, "res": {"Yes": 0.9994341415583088, "No": 0.0005658238767527586}, "ground_truth": 0}, {"key": "39022490", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "No": 9.014998145642081e-07}, "ground_truth": 0}, {"key": "39022490", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.997231292460176, "res": {"Yes": 0.997231292460176, "No": 0.0027687232101775874}, "ground_truth": 1}, {"key": "39022490", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9137003919568826, "res": {"Yes": 0.9137003919568826, "No": 0.08629938562323165}, "ground_truth": 0}, {"key": "39022490", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.998472853876194, "res": {"Yes": 0.998472853876194, "No": 0.001527082082648659}, "ground_truth": 0}, {"key": "35159385", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.8339654536194263, "res": {"Yes": 0.8339654536194263, "No": 0.16603416475912158}, "ground_truth": 0}, {"key": "35159385", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9944741151806961, "res": {"Yes": 0.9944741151806961, "No": 0.005525898612539626}, "ground_truth": 0}, {"key": "35159385", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996748385564448, "res": {"Yes": 0.9996748385564448, "No": 0.00032510901536075056}, "ground_truth": 1}, {"key": "35159385", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9976976459921585, "res": {"Yes": 0.9976976459921585, "No": 0.0023023107188751783}, "ground_truth": 0}, {"key": "35159385", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.8834147262414742, "res": {"Yes": 0.8834147262414742, "No": 0.11658511172126497}, "ground_truth": 0}, {"key": "34363669", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.0006822468605491309, "res": {"No": 0.9993175836520222, "Yes": 0.0006822468605491309}, "ground_truth": 0}, {"key": "34363669", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999908661547138, "res": {"Yes": 0.9999908661547138, "No": 9.030967982412833e-06}, "ground_truth": 0}, {"key": "34363669", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999197047538358, "res": {"Yes": 0.9999197047538358, "No": 8.02026500997463e-05}, "ground_truth": 1}, {"key": "34363669", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999750123381969, "res": {"Yes": 0.9999750123381969, "No": 2.4912870512052018e-05}, "ground_truth": 0}, {"key": "34363669", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998054188233733, "res": {"Yes": 0.9998054188233733, "No": 0.00019450892676401788}, "ground_truth": 0}, {"key": "36119687", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.4079004416166919, "res": {"No": 0.592098661526546, "Yes": 0.4079004416166919}, "ground_truth": 0}, {"key": "36119687", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9841862070752154, "res": {"Yes": 0.9841862070752154, "No": 0.01581374152755475}, "ground_truth": 0}, {"key": "36119687", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997235671645989, "res": {"Yes": 0.9997235671645989, "No": 0.00027630478722491547}, "ground_truth": 1}, {"key": "36119687", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9676936878056509, "res": {"Yes": 0.9676936878056509, "No": 0.032306261921652146}, "ground_truth": 0}, {"key": "36119687", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9993400833408417, "res": {"Yes": 0.9993400833408417, "No": 0.0006598753529147679}, "ground_truth": 0}, {"key": "35217446", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.7065447418415277, "res": {"Yes": 0.7065447418415277, "No": 0.2934550508983514}, "ground_truth": 0}, {"key": "35217446", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.999995276659155, "res": {"Yes": 0.999995276659155, "No": 4.660005171795386e-06}, "ground_truth": 0}, {"key": "35217446", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "No": 9.07189026550604e-07}, "ground_truth": 1}, {"key": "35217446", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999888397127765, "res": {"Yes": 0.9999888397127765, "No": 1.111156611278707e-05}, "ground_truth": 0}, {"key": "35217446", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999953958625991, "res": {"Yes": 0.9999953958625991, "No": 4.559739788461817e-06}, "ground_truth": 0}, {"key": "39049331", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9993255611362024, "res": {"Yes": 0.9993255611362024, "No": 0.0006743758240542993}, "ground_truth": 0}, {"key": "39049331", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9975456717008574, "res": {"Yes": 0.9975456717008574, "No": 0.0024543252567827494}, "ground_truth": 0}, {"key": "39049331", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999893165220688, "res": {"Yes": 0.9999893165220688, "No": 1.058297478976502e-05}, "ground_truth": 1}, {"key": "39049331", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9985154023070703, "res": {"Yes": 0.9985154023070703, "No": 0.0014845894642481017}, "ground_truth": 0}, {"key": "39049331", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.10815351780716975, "res": {"No": 0.891845462258967, "Yes": 0.10815351780716975}, "ground_truth": 0}, {"key": "36472242", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9997644252520895, "res": {"Yes": 0.9997644252520895, "No": 0.0002354843738000351}, "ground_truth": 0}, {"key": "36472242", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.6657763604191571, "res": {"Yes": 0.6657763604191571, "No": 0.33422341581500536}, "ground_truth": 0}, {"key": "36472242", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5367481977163535, "res": {"Yes": 0.5367481977163535, "No": 0.4632516092483665}, "ground_truth": 1}, {"key": "36472242", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6960219582051853, "res": {"Yes": 0.6960219582051853, "No": 0.3039778627891307}, "ground_truth": 0}, {"key": "36472242", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.6257335734460215, "res": {"Yes": 0.6257335734460215, "No": 0.37426597920025695}, "ground_truth": 0}, {"key": "31854721", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.0060200542986065505, "res": {"No": 0.9939799015486054, "Yes": 0.0060200542986065505}, "ground_truth": 0}, {"key": "31854721", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.999992773397112, "res": {"Yes": 0.999992773397112, "No": 7.152575902891134e-06}, "ground_truth": 0}, {"key": "31854721", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998098285148898, "res": {"Yes": 0.9998098285148898, "No": 0.00019015348426256154}, "ground_truth": 1}, {"key": "31854721", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 1.7827473883302898e-07}, "ground_truth": 0}, {"key": "31854721", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999897933310884, "res": {"Yes": 0.9999897933310884, "No": 1.0168630680374654e-05}, "ground_truth": 0}, {"key": "18725849", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.001966007386781457, "res": {"No": 0.9980333358232714, "Yes": 0.001966007386781457}, "ground_truth": 0}, {"key": "18725849", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.999980257181892, "res": {"Yes": 0.999980257181892, "No": 1.9645721082051978e-05}, "ground_truth": 0}, {"key": "18725849", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.99999861435166, "res": {"Yes": 0.99999861435166, "No": 1.3289904023065336e-06}, "ground_truth": 1}, {"key": "18725849", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999943230348141, "res": {"Yes": 0.9999943230348141, "No": 5.640256583278549e-06}, "ground_truth": 0}, {"key": "18725849", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.993850007101584, "res": {"Yes": 0.993850007101584, "No": 0.006149564577971558}, "ground_truth": 0}, {"key": "36883179", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9571023198029137, "res": {"Yes": 0.9571023198029137, "No": 0.042897582783868335}, "ground_truth": 0}, {"key": "36883179", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9947239982463904, "res": {"Yes": 0.9947239982463904, "No": 0.00527604560661153}, "ground_truth": 0}, {"key": "36883179", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999779923581718, "res": {"Yes": 0.9999779923581718, "No": 2.1966302135580172e-05}, "ground_truth": 1}, {"key": "36883179", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998783452545388, "res": {"Yes": 0.9998783452545388, "No": 0.00012160505725851737}, "ground_truth": 0}, {"key": "36883179", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 6.948255709027441e-07}, "ground_truth": 0}, {"key": "34266359", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9997193962066108, "res": {"Yes": 0.9997193962066108, "No": 0.00028056615895785333}, "ground_truth": 0}, {"key": "34266359", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999242341303785, "res": {"Yes": 0.9999242341303785, "No": 7.564932805925181e-05}, "ground_truth": 0}, {"key": "34266359", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999106460363032, "res": {"Yes": 0.9999106460363032, "No": 8.928857658902021e-05}, "ground_truth": 1}, {"key": "34266359", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999303130782463, "res": {"Yes": 0.9999303130782463, "No": 6.958608073946116e-05}, "ground_truth": 0}, {"key": "34266359", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998732274763156, "res": {"Yes": 0.9998732274763156, "No": 0.00012663014049628542}, "ground_truth": 0}, {"key": "31920289", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9677993057227099, "res": {"Yes": 0.9677993057227099, "No": 0.032200651297689835}, "ground_truth": 0}, {"key": "31920289", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9949393152320752, "res": {"Yes": 0.9949393152320752, "No": 0.005060716018810369}, "ground_truth": 0}, {"key": "31920289", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999298363015874, "res": {"Yes": 0.9999298363015874, "No": 7.007328044460625e-05}, "ground_truth": 1}, {"key": "31920289", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999826412106655, "res": {"Yes": 0.9999826412106655, "No": 1.7291889178674627e-05}, "ground_truth": 0}, {"key": "31920289", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.6914328986280521, "res": {"Yes": 0.6914328986280521, "No": 0.30856683933354967}, "ground_truth": 0}, {"key": "36292997", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.003342828413375855, "res": {"No": 0.9966570180486668, "Yes": 0.003342828413375855}, "ground_truth": 0}, {"key": "36292997", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9908769270801646, "res": {"Yes": 0.9908769270801646, "No": 0.00912294691906273}, "ground_truth": 0}, {"key": "36292997", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.99963587970789, "res": {"Yes": 0.99963587970789, "No": 0.00036405026834992864}, "ground_truth": 1}, {"key": "36292997", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998445071102727, "res": {"Yes": 0.9998445071102727, "No": 0.00015536101814286294}, "ground_truth": 0}, {"key": "36292997", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9925937928238917, "res": {"Yes": 0.9925937928238917, "No": 0.007406039966518084}, "ground_truth": 0}, {"key": "30412533", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.065785120397471, "res": {"No": 0.9342147146198637, "Yes": 0.065785120397471}, "ground_truth": 0}, {"key": "30412533", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996026565818134, "res": {"Yes": 0.9996026565818134, "No": 0.0003973394916532071}, "ground_truth": 0}, {"key": "30412533", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999973031140366, "res": {"Yes": 0.9999973031140366, "No": 2.588360226338514e-06}, "ground_truth": 1}, {"key": "30412533", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999942038320978, "res": {"Yes": 0.9999942038320978, "No": 5.713732900488576e-06}, "ground_truth": 0}, {"key": "30412533", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998479598092814, "res": {"Yes": 0.9998479598092814, "No": 0.00015191551712878215}, "ground_truth": 0}, {"key": "40433191", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.012800609397630038, "res": {"No": 0.9871990383008451, "Yes": 0.012800609397630038}, "ground_truth": 0}, {"key": "40433191", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999713171213617, "res": {"Yes": 0.9999713171213617, "No": 2.8630470648265652e-05}, "ground_truth": 0}, {"key": "40433191", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999804955832136, "res": {"Yes": 0.9999804955832136, "No": 1.9398263620474018e-05}, "ground_truth": 1}, {"key": "40433191", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999870516788303, "res": {"Yes": 0.9999870516788303, "No": 1.2861521931581007e-05}, "ground_truth": 0}, {"key": "40433191", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9992106944656237, "res": {"Yes": 0.9992106944656237, "No": 0.0007892313980120024}, "ground_truth": 0}, {"key": "34565591", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9328490883859633, "res": {"Yes": 0.9328490883859633, "No": 0.06715033279709026}, "ground_truth": 0}, {"key": "34565591", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9934781744259684, "res": {"Yes": 0.9934781744259684, "No": 0.006521773048881883}, "ground_truth": 0}, {"key": "34565591", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9989465479705907, "res": {"Yes": 0.9989465479705907, "No": 0.001053379196519271}, "ground_truth": 1}, {"key": "34565591", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9973946857374101, "res": {"Yes": 0.9973946857374101, "No": 0.002605273288672839}, "ground_truth": 0}, {"key": "34565591", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.053996232681984446, "res": {"No": 0.9460026108976277, "Yes": 0.053996232681984446}, "ground_truth": 0}, {"key": "36062480", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 4.497288042864636e-06, "res": {"No": 0.9999953958625991, "Yes": 4.497288042864636e-06}, "ground_truth": 0}, {"key": "36062480", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999946806438478, "res": {"Yes": 0.9999946806438478, "No": 5.230812176867056e-06}, "ground_truth": 0}, {"key": "36062480", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999757275415809, "res": {"Yes": 0.9999757275415809, "No": 2.41275311043515e-05}, "ground_truth": 1}, {"key": "36062480", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997053378914091, "res": {"Yes": 0.9997053378914091, "No": 0.0002945643714440805}, "ground_truth": 0}, {"key": "36062480", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.5399591988303605, "res": {"Yes": 0.5399591988303605, "No": 0.4600404493478577}, "ground_truth": 0}, {"key": "37276883", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.06323333906918363, "res": {"No": 0.9367665256413543, "Yes": 0.06323333906918363}, "ground_truth": 0}, {"key": "37276883", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995382170545798, "res": {"Yes": 0.9995382170545798, "No": 0.00046165209116180977}, "ground_truth": 0}, {"key": "37276883", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996556568497159, "res": {"Yes": 0.9996556568497159, "No": 0.00034423861252073207}, "ground_truth": 1}, {"key": "37276883", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999661915245194, "res": {"Yes": 0.9999661915245194, "No": 3.370649531029076e-05}, "ground_truth": 0}, {"key": "37276883", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9991383407417248, "res": {"Yes": 0.9991383407417248, "No": 0.000861585725234649}, "ground_truth": 0}, {"key": "38509260", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.02676156399970618, "res": {"No": 0.9732382651006694, "Yes": 0.02676156399970618}, "ground_truth": 0}, {"key": "38509260", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997656170119853, "res": {"Yes": 0.9997656170119853, "No": 0.00023427966655951526}, "ground_truth": 0}, {"key": "38509260", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999102884643205, "res": {"Yes": 0.9999102884643205, "No": 8.957072690623971e-05}, "ground_truth": 1}, {"key": "38509260", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996118242996896, "res": {"Yes": 0.9996118242996896, "No": 0.00038791368671822545}, "ground_truth": 0}, {"key": "38509260", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.20104731512361906, "res": {"No": 0.7989521039550953, "Yes": 0.20104731512361906}, "ground_truth": 0}, {"key": "37139607", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9480923583999973, "res": {"Yes": 0.9480923583999973, "No": 0.05190698043863917}, "ground_truth": 0}, {"key": "37139607", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.932364067693854, "res": {"Yes": 0.932364067693854, "No": 0.06763562755026702}, "ground_truth": 0}, {"key": "37139607", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9857599878908024, "res": {"Yes": 0.9857599878908024, "No": 0.014239781745168496}, "ground_truth": 1}, {"key": "37139607", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9901926864640859, "res": {"Yes": 0.9901926864640859, "No": 0.00980714575832632}, "ground_truth": 0}, {"key": "37139607", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 5.018406559905372e-05, "res": {"No": 0.999944378229948, "Yes": 5.018406559905372e-05}, "ground_truth": 0}, {"key": "37092824", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.004244931907430049, "res": {"No": 0.9957546978803039, "Yes": 0.004244931907430049}, "ground_truth": 0}, {"key": "37092824", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999586819786129, "res": {"Yes": 0.9999586819786129, "No": 4.125868177730992e-05}, "ground_truth": 0}, {"key": "37092824", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9909765399899693, "res": {"Yes": 0.9909765399899693, "No": 0.009023315864393807}, "ground_truth": 1}, {"key": "37092824", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.902952905773429, "res": {"Yes": 0.902952905773429, "No": 0.09704686236279994}, "ground_truth": 0}, {"key": "37092824", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9006223992044134, "res": {"Yes": 0.9006223992044134, "No": 0.09937728624830797}, "ground_truth": 0}, {"key": "32191802", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.8181536613171747, "res": {"Yes": 0.8181536613171747, "No": 0.18184583705546112}, "ground_truth": 0}, {"key": "32191802", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.00014382748462924674, "res": {"No": 0.999856064459714, "Yes": 0.00014382748462924674}, "ground_truth": 0}, {"key": "32191802", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.83987584637759, "res": {"Yes": 0.83987584637759, "No": 0.16012381808414586}, "ground_truth": 1}, {"key": "32191802", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.030659353250630895, "res": {"No": 0.9693404453550242, "Yes": 0.030659353250630895}, "ground_truth": 0}, {"key": "32191802", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.957995737878284, "res": {"Yes": 0.957995737878284, "No": 0.042004135552939664}, "ground_truth": 0}, {"key": "39396038", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.0004906311178510596, "res": {"No": 0.9995092827111322, "Yes": 0.0004906311178510596}, "ground_truth": 0}, {"key": "39396038", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9993946074640537, "res": {"Yes": 0.9993946074640537, "No": 0.0006053514860213751}, "ground_truth": 0}, {"key": "39396038", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999448788113121, "res": {"Yes": 0.999448788113121, "No": 0.0005511435810991893}, "ground_truth": 1}, {"key": "39396038", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999464045822857, "res": {"Yes": 0.9999464045822857, "No": 5.3574465142263064e-05}, "ground_truth": 0}, {"key": "39396038", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.999987886094374, "res": {"Yes": 0.999987886094374, "No": 1.2096178278537545e-05}, "ground_truth": 0}, {"key": "39076884", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.7631279259162729, "res": {"Yes": 0.7631279259162729, "No": 0.23687180295415267}, "ground_truth": 0}, {"key": "39076884", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 3.3544796145558304e-07}, "ground_truth": 0}, {"key": "39076884", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999984951481323, "res": {"Yes": 0.9999984951481323, "No": 1.4702442713383898e-06}, "ground_truth": 1}, {"key": "39076884", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999995276659155, "res": {"Yes": 0.999995276659155, "No": 4.6540022948143145e-06}, "ground_truth": 0}, {"key": "39076884", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997820633916336, "res": {"Yes": 0.9997820633916336, "No": 0.0002178469077285161}, "ground_truth": 0}, {"key": "27763432", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9996466042090082, "res": {"Yes": 0.9996466042090082, "No": 0.0003532939245638263}, "ground_truth": 0}, {"key": "27763432", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.999613254165458, "res": {"Yes": 0.999613254165458, "No": 0.0003866201003165859}, "ground_truth": 0}, {"key": "27763432", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999639692856215, "res": {"Yes": 0.999639692856215, "No": 0.00036028279255865994}, "ground_truth": 1}, {"key": "27763432", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9969856069336307, "res": {"Yes": 0.9969856069336307, "No": 0.0030143923843758934}, "ground_truth": 0}, {"key": "27763432", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.994377216355049, "res": {"Yes": 0.994377216355049, "No": 0.005622744438613781}, "ground_truth": 0}, {"key": "37806929", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9995139147077535, "res": {"Yes": 0.9995139147077535, "No": 0.00048602451157310137}, "ground_truth": 0}, {"key": "37806929", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9526624931959818, "res": {"Yes": 0.9526624931959818, "No": 0.04733682240424021}, "ground_truth": 0}, {"key": "37806929", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9544682307754008, "res": {"Yes": 0.9544682307754008, "No": 0.04553144467322122}, "ground_truth": 1}, {"key": "37806929", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9770012231455149, "res": {"Yes": 0.9770012231455149, "No": 0.02299861689180708}, "ground_truth": 0}, {"key": "37806929", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9888386868435817, "res": {"Yes": 0.9888386868435817, "No": 0.011161186616923967}, "ground_truth": 0}, {"key": "32334186", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.6606521273769658, "res": {"Yes": 0.6606521273769658, "No": 0.3393475756353747}, "ground_truth": 0}, {"key": "32334186", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998037502994923, "res": {"Yes": 0.9998037502994923, "No": 0.0001961253622257697}, "ground_truth": 0}, {"key": "32334186", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999735819309633, "res": {"Yes": 0.9999735819309633, "No": 2.639501213348588e-05}, "ground_truth": 1}, {"key": "32334186", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999673835219289, "res": {"Yes": 0.9999673835219289, "No": 3.259652077435943e-05}, "ground_truth": 0}, {"key": "32334186", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9965020440520815, "res": {"Yes": 0.9965020440520815, "No": 0.0034979937119509227}, "ground_truth": 0}, {"key": "36187324", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.562756640036326, "res": {"Yes": 0.562756640036326, "No": 0.4372432948591394}, "ground_truth": 0}, {"key": "36187324", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9988001310191157, "res": {"Yes": 0.9988001310191157, "No": 0.0011998048025034006}, "ground_truth": 0}, {"key": "36187324", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997677621813781, "res": {"Yes": 0.9997677621813781, "No": 0.00023220591855167984}, "ground_truth": 1}, {"key": "36187324", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998940783500941, "res": {"Yes": 0.9998940783500941, "No": 0.00010590130314531494}, "ground_truth": 0}, {"key": "36187324", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999548676244963, "res": {"Yes": 0.9999548676244963, "No": 4.4829148228988166e-05}, "ground_truth": 0}, {"key": "35306009", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.95190403199505, "res": {"Yes": 0.95190403199505, "No": 0.0480958780965895}, "ground_truth": 0}, {"key": "35306009", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.999157613307053, "res": {"Yes": 0.999157613307053, "No": 0.0008423855926777376}, "ground_truth": 0}, {"key": "35306009", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9980339270583947, "res": {"Yes": 0.9980339270583947, "No": 0.0019661058862189256}, "ground_truth": 1}, {"key": "35306009", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999630647657377, "res": {"Yes": 0.999630647657377, "No": 0.00036925835864172083}, "ground_truth": 0}, {"key": "35306009", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9879568552135263, "res": {"Yes": 0.9879568552135263, "No": 0.012043104783031637}, "ground_truth": 0}, {"key": "39490050", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9999944422379444, "res": {"Yes": 0.9999944422379444, "No": 5.4885415141778054e-06}, "ground_truth": 0}, {"key": "39490050", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.999987886094374, "res": {"Yes": 0.999987886094374, "No": 1.2018577263070848e-05}, "ground_truth": 0}, {"key": "39490050", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999844292352256, "res": {"Yes": 0.9999844292352256, "No": 1.542523119429837e-05}, "ground_truth": 1}, {"key": "39490050", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9983221837101238, "res": {"Yes": 0.9983221837101238, "No": 0.0016777242784876418}, "ground_truth": 0}, {"key": "39490050", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 5.911169229127617e-07}, "ground_truth": 0}, {"key": "38072149", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.99999861435166, "res": {"Yes": 0.99999861435166, "No": 1.3463962221008988e-06}, "ground_truth": 0}, {"key": "38072149", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9946961602901138, "res": {"Yes": 0.9946961602901138, "No": 0.005303825109310274}, "ground_truth": 0}, {"key": "38072149", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999304322739465, "res": {"Yes": 0.9999304322739465, "No": 6.942066279274058e-05}, "ground_truth": 1}, {"key": "38072149", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996738852169895, "res": {"Yes": 0.9996738852169895, "No": 0.0003260450486830718}, "ground_truth": 0}, {"key": "38072149", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9994953501385975, "res": {"Yes": 0.9994953501385975, "No": 0.0005045440034859966}, "ground_truth": 0}, {"key": "35899689", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.330038257309569, "res": {"No": 0.6699614872270072, "Yes": 0.330038257309569}, "ground_truth": 0}, {"key": "35899689", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9892635129669414, "res": {"Yes": 0.9892635129669414, "No": 0.010736375753185584}, "ground_truth": 0}, {"key": "35899689", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994965415777448, "res": {"Yes": 0.9994965415777448, "No": 0.0005033509084412831}, "ground_truth": 1}, {"key": "35899689", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995357149536742, "res": {"Yes": 0.9995357149536742, "No": 0.0004642524873334932}, "ground_truth": 0}, {"key": "35899689", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.5731528178536909, "res": {"Yes": 0.5731528178536909, "No": 0.426846762579474}, "ground_truth": 0}, {"key": "27994518", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998510586328669, "res": {"Yes": 0.9998510586328669, "No": 0.00014889354060115579}, "ground_truth": 0}, {"key": "27994518", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.998672905369939, "res": {"Yes": 0.998672905369939, "No": 0.001327037851399579}, "ground_truth": 1}, {"key": "27994518", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999696483206215, "res": {"Yes": 0.9999696483206215, "No": 3.0221075884987978e-05}, "ground_truth": 0}, {"key": "27994518", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999871708812939, "res": {"Yes": 0.9999871708812939, "No": 1.2756622620457356e-05}, "ground_truth": 0}, {"key": "10615479", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.01663835843829883, "res": {"No": 0.9833610856693906, "Yes": 0.01663835843829883}, "ground_truth": 0}, {"key": "10615479", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999981375378344, "res": {"Yes": 0.9999981375378344, "No": 1.7276386915207654e-06}, "ground_truth": 0}, {"key": "10615479", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9986360533219428, "res": {"Yes": 0.9986360533219428, "No": 0.0013638632243390988}, "ground_truth": 1}, {"key": "10615479", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996533927421792, "res": {"Yes": 0.9996533927421792, "No": 0.00034656731046629893}, "ground_truth": 0}, {"key": "10615479", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9004674298337257, "res": {"Yes": 0.9004674298337257, "No": 0.0995320277178911}, "ground_truth": 0}, {"key": "40186667", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9999955150656573, "res": {"Yes": 0.9999955150656573, "No": 4.459637826723963e-06}, "ground_truth": 0}, {"key": "40186667", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.999994561441089, "res": {"Yes": 0.999994561441089, "No": 5.381066334454939e-06}, "ground_truth": 0}, {"key": "40186667", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999634499379698, "res": {"Yes": 0.9999634499379698, "No": 3.651070692429738e-05}, "ground_truth": 1}, {"key": "40186667", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999059975285092, "res": {"Yes": 0.9999059975285092, "No": 9.388320952868032e-05}, "ground_truth": 0}, {"key": "40186667", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.7749539273318444, "res": {"Yes": 0.7749539273318444, "No": 0.22504576442853957}, "ground_truth": 0}, {"key": "38622886", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.0006138456781044928, "res": {"No": 0.9993859219231658, "Yes": 0.0006138456781044928}, "ground_truth": 0}, {"key": "38622886", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997484668397711, "res": {"Yes": 0.9997484668397711, "No": 0.00025146091199659034}, "ground_truth": 0}, {"key": "38622886", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "No": 5.5928262176354146e-08}, "ground_truth": 1}, {"key": "38622886", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 2.1921891380241054e-07}, "ground_truth": 0}, {"key": "38622886", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 1.1491501290604893e-07}, "ground_truth": 0}, {"key": "40686943", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 2.7493061148418652e-06, "res": {"No": 0.9999968263007362, "Yes": 2.7493061148418652e-06}, "ground_truth": 0}, {"key": "40686943", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.0025526590459345787, "res": {"No": 0.9974470990083872, "Yes": 0.0025526590459345787}, "ground_truth": 0}, {"key": "40686943", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, "No": 1.1243302870157375e-06}, "ground_truth": 1}, {"key": "40686943", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999974223173222, "res": {"Yes": 0.9999974223173222, "No": 2.4921469252724512e-06}, "ground_truth": 0}, {"key": "40686943", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997172511609895, "res": {"Yes": 0.9997172511609895, "No": 0.00028270206181345753}, "ground_truth": 0}, {"key": "30604567", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9365008907036948, "res": {"Yes": 0.9365008907036948, "No": 0.0634989124501149}, "ground_truth": 0}, {"key": "30604567", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 4.5771542625978255e-05, "res": {"No": 0.9999541524340317, "Yes": 4.5771542625978255e-05}, "ground_truth": 0}, {"key": "30604567", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.002386763535503204, "res": {"No": 0.9976131696203185, "Yes": 0.002386763535503204}, "ground_truth": 1}, {"key": "30604567", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.006165185830486011, "res": {"No": 0.9938348152264873, "Yes": 0.006165185830486011}, "ground_truth": 0}, {"key": "30604567", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.28995718640113227, "res": {"No": 0.7100426786348819, "Yes": 0.28995718640113227}, "ground_truth": 0}, {"key": "35440903", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9999919389784903, "res": {"Yes": 0.9999919389784903, "No": 8.00418522465043e-06}, "ground_truth": 0}, {"key": "35440903", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999853828508316, "res": {"Yes": 0.9999853828508316, "No": 1.4600431133415246e-05}, "ground_truth": 0}, {"key": "35440903", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999268564151225, "res": {"Yes": 0.9999268564151225, "No": 7.307406807805337e-05}, "ground_truth": 1}, {"key": "35440903", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 4.828494160094459e-07}, "ground_truth": 0}, {"key": "35440903", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999939654258081, "res": {"Yes": 0.9999939654258081, "No": 6.0029712678772526e-06}, "ground_truth": 0}, {"key": "37219533", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9998029160335971, "res": {"Yes": 0.9998029160335971, "No": 0.00019685364974592997}, "ground_truth": 0}, {"key": "37219533", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999599931652947, "res": {"Yes": 0.9999599931652947, "No": 3.990643375798795e-05}, "ground_truth": 0}, {"key": "37219533", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995249953204606, "res": {"Yes": 0.9995249953204606, "No": 0.00047487625673768607}, "ground_truth": 1}, {"key": "37219533", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999734627301196, "res": {"Yes": 0.9999734627301196, "No": 2.641345549005353e-05}, "ground_truth": 0}, {"key": "37219533", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9991955786504705, "res": {"Yes": 0.9991955786504705, "No": 0.0008044015284143633}, "ground_truth": 0}, {"key": "40178965", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9999677411203288, "res": {"Yes": 0.9999677411203288, "No": 3.2225515898634526e-05}, "ground_truth": 0}, {"key": "40178965", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.997688744773008, "res": {"Yes": 0.997688744773008, "No": 0.0023112217457491695}, "ground_truth": 0}, {"key": "40178965", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9993242508613587, "res": {"Yes": 0.9993242508613587, "No": 0.0006756674775890324}, "ground_truth": 1}, {"key": "40178965", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999957534720165, "res": {"Yes": 0.9999957534720165, "No": 4.125959331519005e-06}, "ground_truth": 0}, {"key": "40178965", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9986411646539352, "res": {"Yes": 0.9986411646539352, "No": 0.0013587338575057222}, "ground_truth": 0}, {"key": "13750468", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9998924096782539, "res": {"Yes": 0.9998924096782539, "No": 0.00010752824507760509}, "ground_truth": 0}, {"key": "13750468", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997043845528594, "res": {"Yes": 0.9997043845528594, "No": 0.00029551617053283553}, "ground_truth": 0}, {"key": "13750468", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999789459686392, "res": {"Yes": 0.9999789459686392, "No": 2.0971210277591895e-05}, "ground_truth": 1}, {"key": "13750468", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999978991308068, "res": {"Yes": 0.9999978991308068, "No": 2.031451491444255e-06}, "ground_truth": 0}, {"key": "13750468", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9951012529919205, "res": {"Yes": 0.9951012529919205, "No": 0.004898706932585873}, "ground_truth": 0}, {"key": "17754949", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9794760481851449, "res": {"Yes": 0.9794760481851449, "No": 0.020523877798465037}, "ground_truth": 0}, {"key": "17754949", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 1.4123255203015333e-07}, "ground_truth": 0}, {"key": "17754949", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999561788061766, "res": {"Yes": 0.9999561788061766, "No": 4.3689576864439634e-05}, "ground_truth": 1}, {"key": "17754949", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999995276659155, "res": {"Yes": 0.999995276659155, "No": 4.6057317385542945e-06}, "ground_truth": 0}, {"key": "17754949", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999983759447187, "res": {"Yes": 0.9999983759447187, "No": 1.4758964353496423e-06}, "ground_truth": 0}, {"key": "36675623", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.987211703158546, "res": {"Yes": 0.987211703158546, "No": 0.012788083733342852}, "ground_truth": 0}, {"key": "36675623", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.8257609038622958, "res": {"Yes": 0.8257609038622958, "No": 0.1742389405887348}, "ground_truth": 0}, {"key": "36675623", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.05290568388756933, "res": {"No": 0.9470941741691202, "Yes": 0.05290568388756933}, "ground_truth": 1}, {"key": "36675623", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3009340624074694, "res": {"No": 0.6990656928338316, "Yes": 0.3009340624074694}, "ground_truth": 0}, {"key": "36675623", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.06677356568747725, "res": {"No": 0.9332263341882516, "Yes": 0.06677356568747725}, "ground_truth": 0}, {"key": "40035440", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.920712649297244, "res": {"Yes": 0.920712649297244, "No": 0.07928713464579575}, "ground_truth": 0}, {"key": "40035440", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 5.637657024238048e-06, "res": {"No": 0.9999943230348141, "Yes": 5.637657024238048e-06}, "ground_truth": 0}, {"key": "40035440", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9655681156408025, "res": {"Yes": 0.9655681156408025, "No": 0.0344317870195538}, "ground_truth": 1}, {"key": "40035440", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9965389781914331, "res": {"Yes": 0.9965389781914331, "No": 0.0034610647708984976}, "ground_truth": 0}, {"key": "40035440", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.18319025891813887, "res": {"No": 0.8168095501889882, "Yes": 0.18319025891813887}, "ground_truth": 0}, {"key": "37685909", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.0039270400689022785, "res": {"No": 0.9960727638417393, "Yes": 0.0039270400689022785}, "ground_truth": 0}, {"key": "37685909", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.0001385663895854054, "res": {"No": 0.9998610703116231, "Yes": 0.0001385663895854054}, "ground_truth": 0}, {"key": "37685909", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9700572030567967, "res": {"Yes": 0.9700572030567967, "No": 0.029942703703790575}, "ground_truth": 1}, {"key": "37685909", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9949792055367209, "res": {"Yes": 0.9949792055367209, "No": 0.005020729686325013}, "ground_truth": 0}, {"key": "37685909", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.8674089234935481, "res": {"Yes": 0.8674089234935481, "No": 0.13259083781969905}, "ground_truth": 0}, {"key": "36938787", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.7856067184342694, "res": {"Yes": 0.7856067184342694, "No": 0.2143931285441255}, "ground_truth": 0}, {"key": "36938787", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999700059218314, "res": {"Yes": 0.9999700059218314, "No": 2.9932880485064108e-05}, "ground_truth": 0}, {"key": "36938787", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999642843338196, "res": {"Yes": 0.9999642843338196, "No": 3.5616163223912535e-05}, "ground_truth": 1}, {"key": "36938787", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997881377959671, "res": {"Yes": 0.9997881377959671, "No": 0.00021175004752350482}, "ground_truth": 0}, {"key": "36938787", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.999994561441089, "res": {"Yes": 0.999994561441089, "No": 5.303404951529436e-06}, "ground_truth": 0}, {"key": "39398068", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9999984951481323, "res": {"Yes": 0.9999984951481323, "No": 1.3900831395284223e-06}, "ground_truth": 0}, {"key": "39398068", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999316242277296, "res": {"Yes": 0.9999316242277296, "No": 6.834863265786845e-05}, "ground_truth": 0}, {"key": "39398068", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999303130782463, "res": {"Yes": 0.9999303130782463, "No": 6.960041694867236e-05}, "ground_truth": 1}, {"key": "39398068", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999813299942867, "res": {"Yes": 0.9999813299942867, "No": 1.8638280515208864e-05}, "ground_truth": 0}, {"key": "39398068", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999646419301113, "res": {"Yes": 0.9999646419301113, "No": 3.528456294545938e-05}, "ground_truth": 0}, {"key": "39926408", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9984350606965434, "res": {"Yes": 0.9984350606965434, "No": 0.0015648536089065668}, "ground_truth": 0}, {"key": "39926408", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 2.5193870831253805e-07}, "ground_truth": 0}, {"key": "39926408", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999987335551019, "res": {"Yes": 0.9999987335551019, "No": 1.1966746175046087e-06}, "ground_truth": 1}, {"key": "39926408", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "\"Yes": 1.52443026563865e-08}, "ground_truth": 0}, {"key": "39926408", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 3.807682174847176e-07}, "ground_truth": 0}, {"key": "40465336", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9979476084417679, "res": {"Yes": 0.9979476084417679, "No": 0.0020523440500737344}, "ground_truth": 0}, {"key": "40465336", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.998061941864694, "res": {"Yes": 0.998061941864694, "No": 0.0019380726095254523}, "ground_truth": 0}, {"key": "40465336", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9979679104952427, "res": {"Yes": 0.9979679104952427, "No": 0.0020320533392083656}, "ground_truth": 1}, {"key": "40465336", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996143265911169, "res": {"Yes": 0.9996143265911169, "No": 0.0003855807186419559}, "ground_truth": 0}, {"key": "40465336", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.960404522183738, "res": {"Yes": 0.960404522183738, "No": 0.039595286325141206}, "ground_truth": 0}, {"key": "34173549", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.8469530642280806, "res": {"Yes": 0.8469530642280806, "No": 0.15304654318909763}, "ground_truth": 0}, {"key": "34173549", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9990390997605908, "res": {"Yes": 0.9990390997605908, "No": 0.0009607834620826511}, "ground_truth": 0}, {"key": "34173549", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999238765575363, "res": {"Yes": 0.9999238765575363, "No": 7.599256890684432e-05}, "ground_truth": 1}, {"key": "34173549", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999789459686392, "res": {"Yes": 0.9999789459686392, "No": 2.0910983724130476e-05}, "ground_truth": 0}, {"key": "34173549", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9995085677923832, "res": {"Yes": 0.9995085677923832, "No": 0.0004912860565092333}, "ground_truth": 0}, {"key": "33541535", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.5012529074395629, "res": {"Yes": 0.5012529074395629, "No": 0.4987466637617178}, "ground_truth": 0}, {"key": "33541535", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996069388687747, "res": {"Yes": 0.9996069388687747, "No": 0.00039295478641638974}, "ground_truth": 0}, {"key": "33541535", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9979099793411553, "res": {"Yes": 0.9979099793411553, "No": 0.0020900077601957293}, "ground_truth": 1}, {"key": "33541535", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998987268084759, "res": {"Yes": 0.9998987268084759, "No": 0.0001011744806905299}, "ground_truth": 0}, {"key": "33541535", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999555828164723, "res": {"Yes": 0.9999555828164723, "No": 4.4355892253787156e-05}, "ground_truth": 0}, {"key": "35685195", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.531977384969432, "res": {"Yes": 0.531977384969432, "No": 0.4680218013819651}, "ground_truth": 0}, {"key": "35685195", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995955072092758, "res": {"Yes": 0.9995955072092758, "No": 0.0004042933619811788}, "ground_truth": 0}, {"key": "35685195", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9975970297664529, "res": {"Yes": 0.9975970297664529, "No": 0.002402739822599384}, "ground_truth": 1}, {"key": "35685195", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.998832721200517, "res": {"Yes": 0.998832721200517, "No": 0.0011671999541940517}, "ground_truth": 0}, {"key": "35685195", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.995359837970761, "res": {"Yes": 0.995359837970761, "No": 0.004639826526374905}, "ground_truth": 0}, {"key": "28440730", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.00037416113914930603, "res": {"No": 0.9996257621544586, "Yes": 0.00037416113914930603}, "ground_truth": 0}, {"key": "28440730", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999719131244437, "res": {"Yes": 0.9999719131244437, "No": 2.7983612135717728e-05}, "ground_truth": 0}, {"key": "28440730", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "No": 9.788719497081965e-07}, "ground_truth": 1}, {"key": "28440730", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996209957902479, "res": {"Yes": 0.9996209957902479, "No": 0.00037894844164948244}, "ground_truth": 0}, {"key": "28440730", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.999790998093942, "res": {"Yes": 0.999790998093942, "No": 0.00020889454556723616}, "ground_truth": 0}, {"key": "38338714", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.44688381493488527, "res": {"No": 0.5531155434243697, "Yes": 0.44688381493488527}, "ground_truth": 0}, {"key": "38338714", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.6237761368679484, "res": {"Yes": 0.6237761368679484, "No": 0.3762236983801386}, "ground_truth": 0}, {"key": "38338714", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8191492723623737, "res": {"Yes": 0.8191492723623737, "No": 0.1808502026577354}, "ground_truth": 1}, {"key": "38338714", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.23868115433761952, "res": {"No": 0.7613186906544813, "Yes": 0.23868115433761952}, "ground_truth": 0}, {"key": "38338714", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.24714390516564103, "res": {"No": 0.7528558829533061, "Yes": 0.24714390516564103}, "ground_truth": 0}, {"key": "32191881", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.2738060417724475, "res": {"No": 0.7261897751773638, "Yes": 0.2738060417724475}, "ground_truth": 0}, {"key": "32191881", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9954408628624599, "res": {"Yes": 0.9954408628624599, "No": 0.004559020870013817}, "ground_truth": 0}, {"key": "32191881", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994631965605485, "res": {"Yes": 0.9994631965605485, "No": 0.0005365817666271648}, "ground_truth": 1}, {"key": "32191881", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.816513469561826, "res": {"Yes": 0.816513469561826, "No": 0.18348537774057924}, "ground_truth": 0}, {"key": "32191881", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9968267392683872, "res": {"Yes": 0.9968267392683872, "No": 0.003172862822692519}, "ground_truth": 0}, {"key": "37707251", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 2.5647129395753495e-06, "res": {"No": 0.9999973031140366, "Yes": 2.5647129395753495e-06}, "ground_truth": 0}, {"key": "37707251", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.7429376012187733, "res": {"Yes": 0.7429376012187733, "No": 0.25706204576877734}, "ground_truth": 0}, {"key": "37707251", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999288827389523, "res": {"Yes": 0.9999288827389523, "No": 7.10084978540354e-05}, "ground_truth": 1}, {"key": "37707251", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.039428226915848394, "res": {"No": 0.9605715798700319, "Yes": 0.039428226915848394}, "ground_truth": 0}, {"key": "37707251", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9989191859007542, "res": {"Yes": 0.9989191859007542, "No": 0.0010807511179462465}, "ground_truth": 0}, {"key": "40172567", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.6909586001234416, "res": {"Yes": 0.6909586001234416, "No": 0.30904054410375426}, "ground_truth": 0}, {"key": "40172567", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9411936865729235, "res": {"Yes": 0.9411936865729235, "No": 0.0588051995656403}, "ground_truth": 0}, {"key": "40172567", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 4.708590710080773e-07}, "ground_truth": 1}, {"key": "40172567", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994834394633573, "res": {"Yes": 0.9994834394633573, "No": 0.0005163904461677368}, "ground_truth": 0}, {"key": "40172567", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9993788968644571, "res": {"Yes": 0.9993788968644571, "No": 0.000621065069323917}, "ground_truth": 0}, {"key": "33113255", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.24515551860554607, "res": {"No": 0.7548441687824139, "Yes": 0.24515551860554607}, "ground_truth": 0}, {"key": "33113255", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.996692075079164, "res": {"Yes": 0.996692075079164, "No": 0.0033078819125306152}, "ground_truth": 0}, {"key": "33113255", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998706053222135, "res": {"Yes": 0.9998706053222135, "No": 0.00012931012098980053}, "ground_truth": 1}, {"key": "33113255", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999068608614469, "res": {"Yes": 0.999068608614469, "No": 0.0009313643535974041}, "ground_truth": 0}, {"key": "33113255", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9857107642090743, "res": {"Yes": 0.9857107642090743, "No": 0.014289132438143155}, "ground_truth": 0}, {"key": "33022143", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9998449838462458, "res": {"Yes": 0.9998449838462458, "No": 0.0001548893603682903}, "ground_truth": 0}, {"key": "33022143", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9706094207000159, "res": {"Yes": 0.9706094207000159, "No": 0.029390476079278926}, "ground_truth": 0}, {"key": "33022143", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999874092852638, "res": {"Yes": 0.9999874092852638, "No": 1.249396287904299e-05}, "ground_truth": 1}, {"key": "33022143", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999812107925193, "res": {"Yes": 0.9999812107925193, "No": 1.867565040453096e-05}, "ground_truth": 0}, {"key": "33022143", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999982567412194, "res": {"Yes": 0.9999982567412194, "No": 1.6330959317303989e-06}, "ground_truth": 0}, {"key": "32084473", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.8098868195737047, "res": {"Yes": 0.8098868195737047, "No": 0.1901129898794128}, "ground_truth": 0}, {"key": "32084473", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9912585287844808, "res": {"Yes": 0.9912585287844808, "No": 0.008741244571215138}, "ground_truth": 0}, {"key": "32084473", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994081811938751, "res": {"Yes": 0.9994081811938751, "No": 0.0005917042059851867}, "ground_truth": 1}, {"key": "32084473", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995057083024119, "res": {"Yes": 0.9995057083024119, "No": 0.000494269817802575}, "ground_truth": 0}, {"key": "32084473", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.935128477539561, "res": {"Yes": 0.935128477539561, "No": 0.06487011104178284}, "ground_truth": 0}, {"key": "40564245", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9777021054503862, "res": {"Yes": 0.9777021054503862, "No": 0.022297141334751847}, "ground_truth": 0}, {"key": "40564245", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9749457125210876, "res": {"Yes": 0.9749457125210876, "No": 0.025053378256630423}, "ground_truth": 0}, {"key": "40564245", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9935108898035102, "res": {"Yes": 0.9935108898035102, "No": 0.006489031428529523}, "ground_truth": 1}, {"key": "40564245", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9664417008679389, "res": {"Yes": 0.9664417008679389, "No": 0.0335567749511482}, "ground_truth": 0}, {"key": "40564245", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.899692320696563, "res": {"Yes": 0.899692320696563, "No": 0.10030625731180508}, "ground_truth": 0}, {"key": "31717213", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.5100357794195273, "res": {"Yes": 0.5100357794195273, "No": 0.4899636374092469}, "ground_truth": 0}, {"key": "31717213", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994093725491017, "res": {"Yes": 0.9994093725491017, "No": 0.0005905347946590901}, "ground_truth": 0}, {"key": "31717213", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999812212134349, "res": {"Yes": 0.999812212134349, "No": 0.00018773559399204833}, "ground_truth": 1}, {"key": "31717213", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999167249015635, "res": {"Yes": 0.9999167249015635, "No": 8.320351195752724e-05}, "ground_truth": 0}, {"key": "31717213", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999901509395023, "res": {"Yes": 0.9999901509395023, "No": 9.770098598454142e-06}, "ground_truth": 0}, {"key": "34861894", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9674722632157524, "res": {"Yes": 0.9674722632157524, "No": 0.03252749979138954}, "ground_truth": 0}, {"key": "34861894", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9993731823323252, "res": {"Yes": 0.9993731823323252, "No": 0.0006267210579335537}, "ground_truth": 0}, {"key": "34861894", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9971870779998868, "res": {"Yes": 0.9971870779998868, "No": 0.0028129177194749157}, "ground_truth": 1}, {"key": "34861894", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.14409939131952235, "res": {"No": 0.8559001729727626, "Yes": 0.14409939131952235}, "ground_truth": 0}, {"key": "34861894", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9607172289317706, "res": {"Yes": 0.9607172289317706, "No": 0.039282564890814844}, "ground_truth": 0}, {"key": "40838760", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9991922436109776, "res": {"Yes": 0.9991922436109776, "No": 0.0008076489998104996}, "ground_truth": 0}, {"key": "40838760", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997285687141387, "res": {"Yes": 0.9997285687141387, "No": 0.000271382850928761}, "ground_truth": 0}, {"key": "40838760", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999921773835968, "res": {"Yes": 0.9999921773835968, "No": 7.739240513871482e-06}, "ground_truth": 1}, {"key": "40838760", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999087389545426, "res": {"Yes": 0.9999087389545426, "No": 9.113633370553909e-05}, "ground_truth": 0}, {"key": "40838760", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998775182714994, "res": {"Yes": 0.9998775182714994, "No": 0.00012239757502127436}, "ground_truth": 0}, {"key": "40044849", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.08111535513196293, "res": {"No": 0.9188843572162991, "Yes": 0.08111535513196293}, "ground_truth": 0}, {"key": "40044849", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999872900832717, "res": {"Yes": 0.9999872900832717, "No": 1.2618115985386626e-05}, "ground_truth": 0}, {"key": "40044849", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999759659438225, "res": {"Yes": 0.9999759659438225, "No": 2.3941409098024482e-05}, "ground_truth": 1}, {"key": "40044849", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999951574563252, "res": {"Yes": 0.9999951574563252, "No": 4.785241806798588e-06}, "ground_truth": 0}, {"key": "40044849", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9987928755615565, "res": {"Yes": 0.9987928755615565, "No": 0.0012070631722512625}, "ground_truth": 0}, {"key": "30296116", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9953624334744585, "res": {"Yes": 0.9953624334744585, "No": 0.0046375568145858225}, "ground_truth": 0}, {"key": "30296116", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9981896077406215, "res": {"Yes": 0.9981896077406215, "No": 0.0018103765801742952}, "ground_truth": 0}, {"key": "30296116", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999976607241361, "res": {"Yes": 0.9999976607241361, "No": 2.2100465042697746e-06}, "ground_truth": 1}, {"key": "30296116", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999900317366834, "res": {"Yes": 0.9999900317366834, "No": 9.871053229792569e-06}, "ground_truth": 0}, {"key": "30296116", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997601349322264, "res": {"Yes": 0.9997601349322264, "No": 0.00023973054368663107}, "ground_truth": 0}, {"key": "34931360", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.18020854777490866, "res": {"No": 0.8197909747992552, "Yes": 0.18020854777490866}, "ground_truth": 0}, {"key": "34931360", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9991937920603973, "res": {"Yes": 0.9991937920603973, "No": 0.000806089886470635}, "ground_truth": 0}, {"key": "34931360", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9993278245611954, "res": {"Yes": 0.9993278245611954, "No": 0.0006720917459827567}, "ground_truth": 1}, {"key": "34931360", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9978912215041996, "res": {"Yes": 0.9978912215041996, "No": 0.002108708297991862}, "ground_truth": 0}, {"key": "34931360", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999828796125555, "res": {"Yes": 0.9999828796125555, "No": 1.701367713537107e-05}, "ground_truth": 0}, {"key": "18862422", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.040044369584796215, "res": {"No": 0.9599554517730938, "Yes": 0.040044369584796215}, "ground_truth": 0}, {"key": "18862422", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999074278310677, "res": {"Yes": 0.9999074278310677, "No": 9.253228012490258e-05}, "ground_truth": 0}, {"key": "18862422", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999970647075079, "res": {"Yes": 0.9999970647075079, "No": 2.8031409545898653e-06}, "ground_truth": 1}, {"key": "18862422", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999918197754583, "res": {"Yes": 0.9999918197754583, "No": 8.16226380810739e-06}, "ground_truth": 0}, {"key": "18862422", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 5.501466268374048e-07}, "ground_truth": 0}, {"key": "36361140", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.954513740905597, "res": {"Yes": 0.954513740905597, "No": 0.04548604515306103}, "ground_truth": 0}, {"key": "36361140", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9882583139898513, "res": {"Yes": 0.9882583139898513, "No": 0.011741442518810293}, "ground_truth": 0}, {"key": "36361140", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9993227022697264, "res": {"Yes": 0.9993227022697264, "No": 0.0006770933546161584}, "ground_truth": 1}, {"key": "36361140", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9989825926102119, "res": {"Yes": 0.9989825926102119, "No": 0.0010173561770244566}, "ground_truth": 0}, {"key": "36361140", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9860822273286661, "res": {"Yes": 0.9860822273286661, "No": 0.013917593775131184}, "ground_truth": 0}, {"key": "39703329", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9940155917334932, "res": {"Yes": 0.9940155917334932, "No": 0.0059844370772083036}, "ground_truth": 0}, {"key": "39703329", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998378327705268, "res": {"Yes": 0.9998378327705268, "No": 0.00016210877458446918}, "ground_truth": 0}, {"key": "39703329", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996822231417649, "res": {"Yes": 0.9996822231417649, "No": 0.0003177584674813642}, "ground_truth": 1}, {"key": "39703329", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997158211064733, "res": {"Yes": 0.9997158211064733, "No": 0.00028403786534811543}, "ground_truth": 0}, {"key": "39703329", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997152252560272, "res": {"Yes": 0.9997152252560272, "No": 0.00028467339992250233}, "ground_truth": 0}, {"key": "34033324", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9900832979990914, "res": {"Yes": 0.9900832979990914, "No": 0.009916661070282773}, "ground_truth": 0}, {"key": "34033324", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.00033165443153448904, "res": {"No": 0.9996680461788657, "Yes": 0.00033165443153448904}, "ground_truth": 0}, {"key": "34033324", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9191624445050438, "res": {"Yes": 0.9191624445050438, "No": 0.08083744440074857}, "ground_truth": 1}, {"key": "34033324", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9979789518736906, "res": {"Yes": 0.9979789518736906, "No": 0.0020210441402551023}, "ground_truth": 0}, {"key": "34033324", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.3645573503972131, "res": {"No": 0.6354423362993845, "Yes": 0.3645573503972131}, "ground_truth": 0}, {"key": "35658862", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.5621275823470592, "res": {"Yes": 0.5621275823470592, "No": 0.4378722402859585}, "ground_truth": 0}, {"key": "35658862", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9291511198703057, "res": {"Yes": 0.9291511198703057, "No": 0.07084856266967968}, "ground_truth": 0}, {"key": "35658862", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998471291759354, "res": {"Yes": 0.9998471291759354, "No": 0.00015279063077948488}, "ground_truth": 1}, {"key": "35658862", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999657147257535, "res": {"Yes": 0.9999657147257535, "No": 3.415245793045626e-05}, "ground_truth": 0}, {"key": "35658862", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9995170125060299, "res": {"Yes": 0.9995170125060299, "No": 0.0004827813009102273}, "ground_truth": 0}, {"key": "36092657", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9180356084065829, "res": {"Yes": 0.9180356084065829, "No": 0.08196411905341537}, "ground_truth": 0}, {"key": "36092657", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.997627998152915, "res": {"Yes": 0.997627998152915, "No": 0.0023718145117451574}, "ground_truth": 0}, {"key": "36092657", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9970228153301867, "res": {"Yes": 0.9970228153301867, "No": 0.0029771345543563787}, "ground_truth": 1}, {"key": "36092657", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9963499550681469, "res": {"Yes": 0.9963499550681469, "No": 0.0036496938194079515}, "ground_truth": 0}, {"key": "36092657", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.999318655720501, "res": {"Yes": 0.999318655720501, "No": 0.0006809164136159962}, "ground_truth": 0}, {"key": "26333438", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.4642809102133429, "res": {"No": 0.5357188977375279, "Yes": 0.4642809102133429}, "ground_truth": 0}, {"key": "26333438", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.985026603052141, "res": {"Yes": 0.985026603052141, "No": 0.01497339379248132}, "ground_truth": 0}, {"key": "26333438", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998191209905809, "res": {"Yes": 0.9998191209905809, "No": 0.0001807922456554137}, "ground_truth": 1}, {"key": "26333438", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998828744557322, "res": {"Yes": 0.9998828744557322, "No": 0.00011706701609613602}, "ground_truth": 0}, {"key": "26333438", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.98928684403275, "res": {"Yes": 0.98928684403275, "No": 0.010713024872419357}, "ground_truth": 0}, {"key": "34184963", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.305094382324596, "res": {"No": 0.6949053220245031, "Yes": 0.305094382324596}, "ground_truth": 0}, {"key": "34184963", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999517684731775, "res": {"Yes": 0.9999517684731775, "No": 4.815835088360777e-05}, "ground_truth": 0}, {"key": "34184963", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999981375378344, "res": {"Yes": 0.9999981375378344, "No": 1.7929486552060855e-06}, "ground_truth": 1}, {"key": "34184963", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 2.0885011403280517e-07}, "ground_truth": 0}, {"key": "34184963", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999922965856715, "res": {"Yes": 0.9999922965856715, "No": 7.590272991074865e-06}, "ground_truth": 0}, {"key": "35069975", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9997385791563852, "res": {"Yes": 0.9997385791563852, "No": 0.0002613996750209657}, "ground_truth": 0}, {"key": "35069975", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999924157887603, "res": {"Yes": 0.9999924157887603, "No": 7.474106332614325e-06}, "ground_truth": 0}, {"key": "35069975", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999714363229496, "res": {"Yes": 0.9999714363229496, "No": 2.831343870860012e-05}, "ground_truth": 1}, {"key": "35069975", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999493845180067, "res": {"Yes": 0.9999493845180067, "No": 5.05767790023847e-05}, "ground_truth": 0}, {"key": "35069975", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999824028078323, "res": {"Yes": 0.9999824028078323, "No": 1.7326628849855147e-05}, "ground_truth": 0}, {"key": "36443950", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.010073534910865805, "res": {"No": 0.9899263753027769, "Yes": 0.010073534910865805}, "ground_truth": 0}, {"key": "36443950", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.0005340081270100205, "res": {"No": 0.9994656984899161, "Yes": 0.0005340081270100205}, "ground_truth": 0}, {"key": "36443950", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999913429644723, "res": {"Yes": 0.9999913429644723, "No": 8.422776781927118e-06}, "ground_truth": 1}, {"key": "36443950", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9989735533571187, "res": {"Yes": 0.9989735533571187, "No": 0.001026347392870156}, "ground_truth": 0}, {"key": "29460858", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.6178833706286789, "res": {"Yes": 0.6178833706286789, "No": 0.38211598148684567}, "ground_truth": 0}, {"key": "29460858", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9924297428369376, "res": {"Yes": 0.9924297428369376, "No": 0.007570217349423342}, "ground_truth": 0}, {"key": "29460858", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994791540873492, "res": {"Yes": 0.9994791540873492, "No": 0.0005207520714555474}, "ground_truth": 1}, {"key": "29460858", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8724582125308119, "res": {"Yes": 0.8724582125308119, "No": 0.1275413057733221}, "ground_truth": 0}, {"key": "29460858", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.08183616648639132, "res": {"No": 0.9181632334068777, "Yes": 0.08183616648639132}, "ground_truth": 0}, {"key": "36155704", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9992844865619103, "res": {"Yes": 0.9992844865619103, "No": 0.0007154506028466732}, "ground_truth": 0}, {"key": "36155704", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999974223173222, "res": {"Yes": 0.9999974223173222, "No": 2.455485638969491e-06}, "ground_truth": 0}, {"key": "36155704", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998246033937837, "res": {"Yes": 0.9998246033937837, "No": 0.0001752549126855439}, "ground_truth": 1}, {"key": "36155704", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997270194659681, "res": {"Yes": 0.9997270194659681, "No": 0.00027294834312660874}, "ground_truth": 0}, {"key": "36155704", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999968263007362, "res": {"Yes": 0.9999968263007362, "No": 3.0692540694766493e-06}, "ground_truth": 0}, {"key": "37185211", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.7523674380028004, "res": {"Yes": 0.7523674380028004, "No": 0.2476317042809477}, "ground_truth": 0}, {"key": "37185211", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9104057224373927, "res": {"Yes": 0.9104057224373927, "No": 0.08959420835370313}, "ground_truth": 0}, {"key": "37185211", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999789459686392, "res": {"Yes": 0.9999789459686392, "No": 2.096336851144977e-05}, "ground_truth": 1}, {"key": "37185211", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9986165562418314, "res": {"Yes": 0.9986165562418314, "No": 0.0013833806903578271}, "ground_truth": 0}, {"key": "37185211", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999572515937392, "res": {"Yes": 0.9999572515937392, "No": 4.265495980210118e-05}, "ground_truth": 0}, {"key": "36454885", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.00036645420378915254, "res": {"No": 0.9996333883484807, "Yes": 0.00036645420378915254}, "ground_truth": 0}, {"key": "36454885", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9992856776897424, "res": {"Yes": 0.9992856776897424, "No": 0.0007141941598704421}, "ground_truth": 0}, {"key": "36454885", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999061167173112, "res": {"Yes": 0.9999061167173112, "No": 9.377408682315138e-05}, "ground_truth": 1}, {"key": "36454885", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9984710722028232, "res": {"Yes": 0.9984710722028232, "No": 0.0015288629344057565}, "ground_truth": 0}, {"key": "36454885", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9968754206902385, "res": {"Yes": 0.9968754206902385, "No": 0.0031246109485753093}, "ground_truth": 0}, {"key": "33148906", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9976245639242053, "res": {"Yes": 0.9976245639242053, "No": 0.0023754278745897173}, "ground_truth": 0}, {"key": "33148906", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9887547676653899, "res": {"Yes": 0.9887547676653899, "No": 0.011245151736228023}, "ground_truth": 0}, {"key": "33148906", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.977220081274035, "res": {"Yes": 0.977220081274035, "No": 0.022779779512606495}, "ground_truth": 1}, {"key": "33148906", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8011930898324071, "res": {"Yes": 0.8011930898324071, "No": 0.19880681592706162}, "ground_truth": 0}, {"key": "33148906", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9957646285914253, "res": {"Yes": 0.9957646285914253, "No": 0.004235334370168073}, "ground_truth": 0}, {"key": "18086604", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.005743342046610722, "res": {"No": 0.994255818859522, "Yes": 0.005743342046610722}, "ground_truth": 0}, {"key": "18086604", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.30346779079370045, "res": {"No": 0.6965312218431919, "Yes": 0.30346779079370045}, "ground_truth": 0}, {"key": "18086604", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9966917190608184, "res": {"Yes": 0.9966917190608184, "No": 0.0033083109726386186}, "ground_truth": 1}, {"key": "18086604", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9325870225956361, "res": {"Yes": 0.9325870225956361, "No": 0.06741247426992046}, "ground_truth": 0}, {"key": "18086604", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9875822248523382, "res": {"Yes": 0.9875822248523382, "No": 0.012417133825527622}, "ground_truth": 0}, {"key": "33693397", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.5158388646910197, "res": {"Yes": 0.5158388646910197, "No": 0.48416079808135754}, "ground_truth": 0}, {"key": "33693397", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9986852715137515, "res": {"Yes": 0.9986852715137515, "No": 0.0013146744638592818}, "ground_truth": 0}, {"key": "33693397", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.0005894303476084618, "res": {"No": 0.9994103256263102, "Yes": 0.0005894303476084618}, "ground_truth": 1}, {"key": "33693397", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9663742339949439, "res": {"Yes": 0.9663742339949439, "No": 0.03362568733767193}, "ground_truth": 0}, {"key": "33693397", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.001548507368633781, "res": {"No": 0.9984514631224497, "Yes": 0.001548507368633781}, "ground_truth": 0}, {"key": "39501530", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.15058225488615193, "res": {"No": 0.8494176091813878, "Yes": 0.15058225488615193}, "ground_truth": 0}, {"key": "39501530", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.703994362970731, "res": {"Yes": 0.703994362970731, "No": 0.29600498014068516}, "ground_truth": 0}, {"key": "39501530", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9958556546005951, "res": {"Yes": 0.9958556546005951, "No": 0.004144378671259508}, "ground_truth": 1}, {"key": "39501530", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9800721574275154, "res": {"Yes": 0.9800721574275154, "No": 0.01992785454650238}, "ground_truth": 0}, {"key": "39501530", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998866885302296, "res": {"Yes": 0.9998866885302296, "No": 0.00011320672339615236}, "ground_truth": 0}, {"key": "30948874", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9998524888808352, "res": {"Yes": 0.9998524888808352, "No": 0.00014739669506506355}, "ground_truth": 0}, {"key": "30948874", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999242341303785, "res": {"Yes": 0.9999242341303785, "No": 7.574114637073536e-05}, "ground_truth": 0}, {"key": "30948874", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, "No": 1.0551833654764139e-06}, "ground_truth": 1}, {"key": "30948874", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999971839107652, "res": {"Yes": 0.9999971839107652, "No": 2.7298933922166114e-06}, "ground_truth": 0}, {"key": "30948874", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, "No": 1.100066968014368e-06}, "ground_truth": 0}, {"key": "39410675", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9997283303588818, "res": {"Yes": 0.9997283303588818, "No": 0.0002715609364846465}, "ground_truth": 0}, {"key": "39410675", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9866411254849683, "res": {"Yes": 0.9866411254849683, "No": 0.01335874570015527}, "ground_truth": 0}, {"key": "39410675", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9989944899643455, "res": {"Yes": 0.9989944899643455, "No": 0.0010055192542540476}, "ground_truth": 1}, {"key": "39410675", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9191340428243168, "res": {"Yes": 0.9191340428243168, "No": 0.08086577480446348}, "ground_truth": 0}, {"key": "39410675", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.4016418531595481, "res": {"No": 0.5983578695285785, "Yes": 0.4016418531595481}, "ground_truth": 0}, {"key": "32903337", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997285687141387, "res": {"Yes": 0.9997285687141387, "No": 0.0002713559853563872}, "ground_truth": 0}, {"key": "32903337", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999144602247352, "res": {"Yes": 0.9999144602247352, "No": 8.541780409792815e-05}, "ground_truth": 1}, {"key": "32903337", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999934127339699, "res": {"Yes": 0.999934127339699, "No": 6.58187372191521e-05}, "ground_truth": 0}, {"key": "32903337", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999918197754583, "res": {"Yes": 0.9999918197754583, "No": 8.06901613073711e-06}, "ground_truth": 0}, {"key": "27685132", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.03549680452260382, "res": {"No": 0.9645025430391153, "Yes": 0.03549680452260382}, "ground_truth": 0}, {"key": "27685132", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995222549265376, "res": {"Yes": 0.9995222549265376, "No": 0.0004777220404248464}, "ground_truth": 0}, {"key": "27685132", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.998271454758117, "res": {"Yes": 0.998271454758117, "No": 0.0017284590326200793}, "ground_truth": 1}, {"key": "27685132", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999155329675407, "res": {"Yes": 0.9999155329675407, "No": 8.443074039379872e-05}, "ground_truth": 0}, {"key": "27685132", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.998591353375361, "res": {"Yes": 0.998591353375361, "No": 0.001408538411231239}, "ground_truth": 0}, {"key": "22791471", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 7.88334625176537e-05, "res": {"No": 0.9999210158834096, "Yes": 7.88334625176537e-05}, "ground_truth": 0}, {"key": "22791471", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.20148464580049386, "res": {"No": 0.7985152605404895, "Yes": 0.20148464580049386}, "ground_truth": 0}, {"key": "22791471", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9960866013806898, "res": {"Yes": 0.9960866013806898, "No": 0.003913306539811219}, "ground_truth": 1}, {"key": "22791471", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7602223672580447, "res": {"Yes": 0.7602223672580447, "No": 0.2397773726736307}, "ground_truth": 0}, {"key": "32292348", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.014757364982028107, "res": {"No": 0.9852423612947572, "Yes": 0.014757364982028107}, "ground_truth": 0}, {"key": "32292348", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996728127374306, "res": {"Yes": 0.9996728127374306, "No": 0.00032709235361003445}, "ground_truth": 0}, {"key": "32292348", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9947902928415735, "res": {"Yes": 0.9947902928415735, "No": 0.005209677110965763}, "ground_truth": 1}, {"key": "32292348", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996512478281817, "res": {"Yes": 0.9996512478281817, "No": 0.0003486723158123841}, "ground_truth": 0}, {"key": "32292348", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9468640071143565, "res": {"Yes": 0.9468640071143565, "No": 0.05313591574484866}, "ground_truth": 0}, {"key": "20482930", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9998124504896088, "res": {"Yes": 0.9998124504896088, "No": 0.00018751232150867353}, "ground_truth": 0}, {"key": "20482930", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 8.824594484143496e-07}, "ground_truth": 0}, {"key": "20482930", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999981375378344, "res": {"Yes": 0.9999981375378344, "No": 1.8173939111480182e-06}, "ground_truth": 1}, {"key": "20482930", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999950382530095, "res": {"Yes": 0.9999950382530095, "No": 4.892647450802612e-06}, "ground_truth": 0}, {"key": "20482930", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 7.653910172837218e-07}, "ground_truth": 0}, {"key": "11635754", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.36562034233133545, "res": {"No": 0.6343796288939284, "Yes": 0.36562034233133545}, "ground_truth": 0}, {"key": "11635754", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 1.801170355217242e-07}, "ground_truth": 0}, {"key": "11635754", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 5.160552518337561e-07}, "ground_truth": 1}, {"key": "11635754", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 1.4214175537554367e-07}, "ground_truth": 0}, {"key": "11635754", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 1.5916013550692144e-07}, "ground_truth": 0}, {"key": "40029096", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 3.822720970391781e-05, "res": {"No": 0.9999616619499219, "Yes": 3.822720970391781e-05}, "ground_truth": 0}, {"key": "40029096", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.8637734804129905, "res": {"Yes": 0.8637734804129905, "No": 0.1362263824881951}, "ground_truth": 0}, {"key": "40029096", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9990812215356417, "res": {"Yes": 0.9990812215356417, "No": 0.0009187465155895606}, "ground_truth": 1}, {"key": "40029096", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9954759450066194, "res": {"Yes": 0.9954759450066194, "No": 0.00452409826564186}, "ground_truth": 0}, {"key": "40029096", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9992368842810174, "res": {"Yes": 0.9992368842810174, "No": 0.0007630282518797337}, "ground_truth": 0}, {"key": "40414719", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 2.430960996435651e-07}, "ground_truth": 0}, {"key": "40414719", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.06137646644225857, "res": {"No": 0.9386220063186149, "Yes": 0.06137646644225857}, "ground_truth": 0}, {"key": "40414719", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999760851449647, "res": {"Yes": 0.9999760851449647, "No": 2.3880030314243423e-05}, "ground_truth": 1}, {"key": "40414719", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998832320179115, "res": {"Yes": 0.9998832320179115, "No": 0.00011673758563030749}, "ground_truth": 0}, {"key": "40414719", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996852022792035, "res": {"Yes": 0.9996852022792035, "No": 0.00031470118135263193}, "ground_truth": 0}, {"key": "39537616", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.24870515514999814, "res": {"No": 0.7512945452865575, "Yes": 0.24870515514999814}, "ground_truth": 0}, {"key": "39537616", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9988613832165629, "res": {"Yes": 0.9988613832165629, "No": 0.0011385383367416987}, "ground_truth": 0}, {"key": "39537616", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9950397494753858, "res": {"Yes": 0.9950397494753858, "No": 0.004959965965813458}, "ground_truth": 1}, {"key": "39537616", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9698230391439578, "res": {"Yes": 0.9698230391439578, "No": 0.03017671435075339}, "ground_truth": 0}, {"key": "39537616", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9958680605496857, "res": {"Yes": 0.9958680605496857, "No": 0.004131912103893524}, "ground_truth": 0}, {"key": "33245830", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 4.232528454935476e-06, "res": {"No": 0.9999940846288958, "Yes": 4.232528454935476e-06}, "ground_truth": 0}, {"key": "33245830", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999290019304823, "res": {"Yes": 0.9999290019304823, "No": 7.084897918833844e-05}, "ground_truth": 0}, {"key": "33245830", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9861980425844027, "res": {"Yes": 0.9861980425844027, "No": 0.013801381267224064}, "ground_truth": 1}, {"key": "33245830", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9988264181864005, "res": {"Yes": 0.9988264181864005, "No": 0.001173563186078034}, "ground_truth": 0}, {"key": "33245830", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.0008151144704877803, "res": {"No": 0.9991846281364434, "Yes": 0.0008151144704877803}, "ground_truth": 0}, {"key": "39243601", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 9.394736240006196e-06, "res": {"No": 0.9999903893441826, "Yes": 9.394736240006196e-06}, "ground_truth": 0}, {"key": "39243601", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9813960295838957, "res": {"Yes": 0.9813960295838957, "No": 0.01860400683920907}, "ground_truth": 0}, {"key": "39243601", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999468813708443, "res": {"Yes": 0.9999468813708443, "No": 5.300867386353935e-05}, "ground_truth": 1}, {"key": "39243601", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998941975374753, "res": {"Yes": 0.9998941975374753, "No": 0.0001057213181493012}, "ground_truth": 0}, {"key": "39243601", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999652379302147, "res": {"Yes": 0.9999652379302147, "No": 3.4734122390304885e-05}, "ground_truth": 0}, {"key": "35815905", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9941093602550967, "res": {"Yes": 0.9941093602550967, "No": 0.005889762527754115}, "ground_truth": 0}, {"key": "35815905", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.999934246531854, "res": {"Yes": 0.999934246531854, "No": 6.569655954360759e-05}, "ground_truth": 0}, {"key": "35815905", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999168440936441, "res": {"Yes": 0.9999168440936441, "No": 8.306393047239786e-05}, "ground_truth": 1}, {"key": "35815905", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9966572554523968, "res": {"Yes": 0.9966572554523968, "No": 0.003342712753513679}, "ground_truth": 0}, {"key": "35815905", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9896347602153298, "res": {"Yes": 0.9896347602153298, "No": 0.010365101920524492}, "ground_truth": 0}, {"key": "35260212", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 9.406994092221617e-05, "res": {"No": 0.9999055207534452, "Yes": 9.406994092221617e-05}, "ground_truth": 0}, {"key": "35260212", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994792732252715, "res": {"Yes": 0.9994792732252715, "No": 0.0005206291884674468}, "ground_truth": 0}, {"key": "35260212", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.48490786750369014, "res": {"No": 0.5150920275553961, "Yes": 0.48490786750369014}, "ground_truth": 1}, {"key": "35260212", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999592779711644, "res": {"Yes": 0.9999592779711644, "No": 4.064645230291921e-05}, "ground_truth": 0}, {"key": "35260212", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999075470290394, "res": {"Yes": 0.9999075470290394, "No": 9.234324496637582e-05}, "ground_truth": 0}, {"key": "39193924", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9924219926856085, "res": {"Yes": 0.9924219926856085, "No": 0.007577957308591301}, "ground_truth": 0}, {"key": "39193924", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9987979915915264, "res": {"Yes": 0.9987979915915264, "No": 0.001201974088607972}, "ground_truth": 0}, {"key": "39193924", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999961110815618, "res": {"Yes": 0.9999961110815618, "No": 3.834007335755725e-06}, "ground_truth": 1}, {"key": "39193924", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 5.064698708611624e-07}, "ground_truth": 0}, {"key": "39193924", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 8.967740453536621e-08}, "ground_truth": 0}, {"key": "40658569", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.0026247227698540684, "res": {"No": 0.997374878473546, "Yes": 0.0026247227698540684}, "ground_truth": 0}, {"key": "40658569", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.975540509375045, "res": {"Yes": 0.975540509375045, "No": 0.024459379915627286}, "ground_truth": 0}, {"key": "40658569", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9989130014114352, "res": {"Yes": 0.9989130014114352, "No": 0.001086908772834774}, "ground_truth": 1}, {"key": "40658569", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997211837564752, "res": {"Yes": 0.9997211837564752, "No": 0.00027867425585196073}, "ground_truth": 0}, {"key": "40658569", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9877846851446521, "res": {"Yes": 0.9877846851446521, "No": 0.012215195127296278}, "ground_truth": 0}, {"key": "33497596", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9471232674548038, "res": {"Yes": 0.9471232674548038, "No": 0.05287664135090484}, "ground_truth": 0}, {"key": "33497596", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996501754028984, "res": {"Yes": 0.9996501754028984, "No": 0.0003497150073110707}, "ground_truth": 0}, {"key": "33497596", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996426718769061, "res": {"Yes": 0.9996426718769061, "No": 0.0003572659841349444}, "ground_truth": 1}, {"key": "33497596", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9989258469158782, "res": {"Yes": 0.9989258469158782, "No": 0.00107406710872628}, "ground_truth": 0}, {"key": "33497596", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.995260046504009, "res": {"Yes": 0.995260046504009, "No": 0.004739915359879921}, "ground_truth": 0}, {"key": "40339241", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.00453787880354835, "res": {"No": 0.9954620089386765, "Yes": 0.00453787880354835}, "ground_truth": 0}, {"key": "40339241", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9990352889032419, "res": {"Yes": 0.9990352889032419, "No": 0.000964635739409816}, "ground_truth": 0}, {"key": "40339241", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8505511056532417, "res": {"Yes": 0.8505511056532417, "No": 0.14944836954708376}, "ground_truth": 1}, {"key": "40339241", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9635306312384526, "res": {"Yes": 0.9635306312384526, "No": 0.036469218608042676}, "ground_truth": 0}, {"key": "40339241", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9992631865804186, "res": {"Yes": 0.9992631865804186, "No": 0.0007367643198674244}, "ground_truth": 0}, {"key": "31792608", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9974712999619595, "res": {"Yes": 0.9974712999619595, "No": 0.0025285784793956457}, "ground_truth": 0}, {"key": "31792608", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 1.6203523910981674e-07}, "ground_truth": 0}, {"key": "31792608", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999988005296937, "res": {"Yes": 0.999988005296937, "No": 1.1936841690781594e-05}, "ground_truth": 1}, {"key": "31792608", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 2.482493878215003e-07}, "ground_truth": 0}, {"key": "31792608", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 6.409688353541185e-07}, "ground_truth": 0}, {"key": "33132662", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9999970647075079, "res": {"Yes": 0.9999970647075079, "No": 2.8470448396589655e-06}, "ground_truth": 0}, {"key": "33132662", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999843100330889, "res": {"Yes": 0.9999843100330889, "No": 1.555606442796398e-05}, "ground_truth": 0}, {"key": "33132662", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999703635211691, "res": {"Yes": 0.9999703635211691, "No": 2.9606518538027787e-05}, "ground_truth": 1}, {"key": "33132662", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999887205106139, "res": {"Yes": 0.9999887205106139, "No": 1.1242840858672272e-05}, "ground_truth": 0}, {"key": "33132662", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.99860121870779, "res": {"Yes": 0.99860121870779, "No": 0.001398710413610655}, "ground_truth": 0}, {"key": "37577457", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 8.241377192518233e-05, "res": {"No": 0.9999174400582596, "Yes": 8.241377192518233e-05}, "ground_truth": 0}, {"key": "37577457", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.999984906043415, "res": {"Yes": 0.999984906043415, "No": 1.5013571488740488e-05}, "ground_truth": 0}, {"key": "37577457", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999921773835968, "res": {"Yes": 0.9999921773835968, "No": 7.761530607379702e-06}, "ground_truth": 1}, {"key": "37577457", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998933632061071, "res": {"Yes": 0.9998933632061071, "No": 0.00010660075113103392}, "ground_truth": 0}, {"key": "37577457", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999967070975216, "res": {"Yes": 0.9999967070975216, "No": 3.227689505726455e-06}, "ground_truth": 0}, {"key": "38701278", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.8046632918959258, "res": {"Yes": 0.8046632918959258, "No": 0.19533524332630975}, "ground_truth": 0}, {"key": "38701278", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.7039779319340502, "res": {"Yes": 0.7039779319340502, "No": 0.296020668820379}, "ground_truth": 0}, {"key": "38701278", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.985399530331151, "res": {"Yes": 0.985399530331151, "No": 0.014600069354075285}, "ground_truth": 1}, {"key": "38701278", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9811285826189742, "res": {"Yes": 0.9811285826189742, "No": 0.018871372268778483}, "ground_truth": 0}, {"key": "38701278", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.12886689873976492, "res": {"No": 0.8711297034112289, "Yes": 0.12886689873976492}, "ground_truth": 0}, {"key": "34570783", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.3448413578585933, "res": {"No": 0.6551582103463959, "Yes": 0.3448413578585933}, "ground_truth": 0}, {"key": "34570783", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996966386233704, "res": {"Yes": 0.9996966386233704, "No": 0.00030323696360520807}, "ground_truth": 0}, {"key": "34570783", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9897657757469136, "res": {"Yes": 0.9897657757469136, "No": 0.01023409997233289}, "ground_truth": 1}, {"key": "34570783", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8598664607406838, "res": {"Yes": 0.8598664607406838, "No": 0.14013339340114228}, "ground_truth": 0}, {"key": "34570783", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998322311147737, "res": {"Yes": 0.9998322311147737, "No": 0.00016770786349231288}, "ground_truth": 0}, {"key": "39064526", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9957093163983544, "res": {"Yes": 0.9957093163983544, "No": 0.004290712396664775}, "ground_truth": 0}, {"key": "39064526", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997980296587561, "res": {"Yes": 0.9997980296587561, "No": 0.00020189334400083883}, "ground_truth": 0}, {"key": "39064526", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9928074910931807, "res": {"Yes": 0.9928074910931807, "No": 0.007192474556853672}, "ground_truth": 1}, {"key": "39064526", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9993644896223162, "res": {"Yes": 0.9993644896223162, "No": 0.000635398625077764}, "ground_truth": 0}, {"key": "39064526", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9952909843448208, "res": {"Yes": 0.9952909843448208, "No": 0.004709045713878756}, "ground_truth": 0}, {"key": "40741545", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.3234318020803277, "res": {"No": 0.6765680331883787, "Yes": 0.3234318020803277}, "ground_truth": 0}, {"key": "40741545", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9879778011088498, "res": {"Yes": 0.9879778011088498, "No": 0.012022112699146272}, "ground_truth": 0}, {"key": "40741545", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9985092239122817, "res": {"Yes": 0.9985092239122817, "No": 0.0014906749127555385}, "ground_truth": 1}, {"key": "40741545", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8396113691305142, "res": {"Yes": 0.8396113691305142, "No": 0.1603881923559993}, "ground_truth": 0}, {"key": "40741545", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.2533799089880056, "res": {"No": 0.7466193763891664, "Yes": 0.2533799089880056}, "ground_truth": 0}, {"key": "36929751", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9993642513738502, "res": {"Yes": 0.9993642513738502, "No": 0.0006356502927602073}, "ground_truth": 0}, {"key": "36929751", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998944359222794, "res": {"Yes": 0.9998944359222794, "No": 0.00010550132984096152}, "ground_truth": 0}, {"key": "36929751", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 6.839698494820534e-07}, "ground_truth": 1}, {"key": "36929751", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999928763541437, "res": {"Yes": 0.999928763541437, "No": 7.111857525014466e-05}, "ground_truth": 0}, {"key": "36929751", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999619003488102, "res": {"Yes": 0.9999619003488102, "No": 3.80231641904313e-05}, "ground_truth": 0}, {"key": "23984730", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9967677523958661, "res": {"Yes": 0.9967677523958661, "No": 0.003232216998345237}, "ground_truth": 0}, {"key": "23984730", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.5196582993692498, "res": {"Yes": 0.5196582993692498, "No": 0.4803414374179292}, "ground_truth": 0}, {"key": "23984730", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9985760162284048, "res": {"Yes": 0.9985760162284048, "No": 0.0014239465525761649}, "ground_truth": 1}, {"key": "23984730", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999388951809756, "res": {"Yes": 0.9999388951809756, "No": 6.103683643909472e-05}, "ground_truth": 0}, {"key": "23984730", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997579897891967, "res": {"Yes": 0.9997579897891967, "No": 0.0002418639935302737}, "ground_truth": 0}, {"key": "36007415", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9877558392896867, "res": {"Yes": 0.9877558392896867, "No": 0.01224399703571536}, "ground_truth": 0}, {"key": "36007415", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9988034648197568, "res": {"Yes": 0.9988034648197568, "No": 0.0011964585427300986}, "ground_truth": 0}, {"key": "36007415", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9982049317656473, "res": {"Yes": 0.9982049317656473, "No": 0.0017949519970320742}, "ground_truth": 1}, {"key": "36007415", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999946806438478, "res": {"Yes": 0.9999946806438478, "No": 5.2271414352999386e-06}, "ground_truth": 0}, {"key": "36007415", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.021317488218845722, "res": {"No": 0.9786821864033576, "Yes": 0.021317488218845722}, "ground_truth": 0}, {"key": "38875041", "model": "xsum_10_ft_gpt35", "target_model": "human", "recognition_score": 0.9581950083018331, "res": {"Yes": 0.9581950083018331, "No": 0.0418047294966109}, "ground_truth": 0}, {"key": "38875041", "model": "xsum_10_ft_gpt35", "target_model": "claude", "recognition_score": 0.9982800078847552, "res": {"Yes": 0.9982800078847552, "No": 0.0017199621400463396}, "ground_truth": 0}, {"key": "38875041", "model": "xsum_10_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999441398352371, "res": {"Yes": 0.9999441398352371, "No": 5.57318366894802e-05}, "ground_truth": 1}, {"key": "38875041", "model": "xsum_10_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999821644040562, "res": {"Yes": 0.9999821644040562, "No": 1.7789324994143578e-05}, "ground_truth": 0}, {"key": "38875041", "model": "xsum_10_ft_gpt35", "target_model": "llama", "recognition_score": 0.9749334682547721, "res": {"Yes": 0.9749334682547721, "No": 0.0250662539898713}, "ground_truth": 0}]