[{"key": "33773576", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.2524442113063808, "res": {"yes": 0.7106368280801432, "Yes": 0.2524442113063808}, "ground_truth": 0}, {"key": "33773576", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.28460779624747123, "res": {"yes": 0.6419971593438615, "Yes": 0.28460779624747123}, "ground_truth": 0}, {"key": "33773576", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3631785109107524, "res": {"yes": 0.5581580014286193, "Yes": 0.3631785109107524}, "ground_truth": 1}, {"key": "33773576", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4200857339851695, "res": {"yes": 0.5615008449478791, "Yes": 0.4200857339851695}, "ground_truth": 0}, {"key": "33773576", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6141320861180946, "res": {"Yes": 0.6141320861180946, "yes": 0.37894295490148616}, "ground_truth": 0}, {"key": "37642631", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8922564925805606, "res": {"Yes": 0.8922564925805606, "yes": 0.10253772493196874}, "ground_truth": 0}, {"key": "37642631", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9755144101133886, "res": {"Yes": 0.9755144101133886, "yes": 0.02099124569165252}, "ground_truth": 0}, {"key": "37642631", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7994987581441129, "res": {"Yes": 0.7994987581441129, "yes": 0.19367385392463923}, "ground_truth": 1}, {"key": "37642631", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.980870914207537, "res": {"Yes": 0.980870914207537, "yes": 0.015856179195923848}, "ground_truth": 0}, {"key": "37642631", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9863671136092325, "res": {"Yes": 0.9863671136092325, "yes": 0.011146641914782306}, "ground_truth": 0}, {"key": "36609836", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9743731361881449, "res": {"Yes": 0.9743731361881449, "yes": 0.022580587545128823}, "ground_truth": 0}, {"key": "36609836", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9628856064240767, "res": {"Yes": 0.9628856064240767, "yes": 0.028528282938151725}, "ground_truth": 0}, {"key": "36609836", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.974901742486061, "res": {"Yes": 0.974901742486061, "yes": 0.02079307459248829}, "ground_truth": 1}, {"key": "36609836", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9806707035783881, "res": {"Yes": 0.9806707035783881, "yes": 0.01573801745139543}, "ground_truth": 0}, {"key": "36609836", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6945647420765617, "res": {"Yes": 0.6945647420765617, "yes": 0.2986049549727972}, "ground_truth": 0}, {"key": "41035610", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.700259736933249, "res": {"Yes": 0.700259736933249, "yes": 0.1785138660191298}, "ground_truth": 0}, {"key": "41035610", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7149194785804082, "res": {"Yes": 0.7149194785804082, "yes": 0.23075517014992966}, "ground_truth": 0}, {"key": "41035610", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4850143310133828, "res": {"Yes": 0.4850143310133828, "yes": 0.42603060183473185}, "ground_truth": 1}, {"key": "41035610", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5752252360588941, "res": {"Yes": 0.5752252360588941, "yes": 0.34043093773493}, "ground_truth": 0}, {"key": "41035610", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.4043447998661261, "res": {"Yes": 0.4043447998661261, "yes": 0.38765936120817884}, "ground_truth": 0}, {"key": "37592684", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9819223893494456, "res": {"Yes": 0.9819223893494456, "yes": 0.012155331486126773}, "ground_truth": 0}, {"key": "37592684", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7002098522065049, "res": {"Yes": 0.7002098522065049, "yes": 0.2946954879531239}, "ground_truth": 0}, {"key": "37592684", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9638989794157159, "res": {"Yes": 0.9638989794157159, "yes": 0.0309487954448458}, "ground_truth": 1}, {"key": "37592684", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8260822616868834, "res": {"Yes": 0.8260822616868834, "yes": 0.16916364820781615}, "ground_truth": 0}, {"key": "37592684", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7344890731952238, "res": {"Yes": 0.7344890731952238, "yes": 0.2581596208247917}, "ground_truth": 0}, {"key": "38951040", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6176248775967568, "res": {"Yes": 0.6176248775967568, "yes": 0.37401121327437925}, "ground_truth": 0}, {"key": "38951040", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9733971524030463, "res": {"Yes": 0.9733971524030463, "yes": 0.020360810085539683}, "ground_truth": 0}, {"key": "38951040", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6812698198977493, "res": {"Yes": 0.6812698198977493, "yes": 0.3090103832185249}, "ground_truth": 1}, {"key": "38951040", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.48603302729185205, "res": {"yes": 0.5036867157074983, "Yes": 0.48603302729185205}, "ground_truth": 0}, {"key": "38951040", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.5764790038332481, "res": {"Yes": 0.5764790038332481, "yes": 0.4179057710417105}, "ground_truth": 0}, {"key": "40774469", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8156134985215681, "res": {"Yes": 0.8156134985215681, "yes": 0.1651357361390667}, "ground_truth": 0}, {"key": "40774469", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7910810351512549, "res": {"Yes": 0.7910810351512549, "yes": 0.19743840829748008}, "ground_truth": 0}, {"key": "40774469", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9185981729894988, "res": {"Yes": 0.9185981729894988, "yes": 0.07477168563166063}, "ground_truth": 1}, {"key": "40774469", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8778986542242037, "res": {"Yes": 0.8778986542242037, "yes": 0.10744452619519942}, "ground_truth": 0}, {"key": "40774469", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8212565447178769, "res": {"Yes": 0.8212565447178769, "yes": 0.15567873285471567}, "ground_truth": 0}, {"key": "40876288", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7375906006311312, "res": {"Yes": 0.7375906006311312, "yes": 0.24181422812438236}, "ground_truth": 0}, {"key": "40876288", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.777209179935578, "res": {"Yes": 0.777209179935578, "yes": 0.17677207289395303}, "ground_truth": 0}, {"key": "40876288", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8659404475911824, "res": {"Yes": 0.8659404475911824, "yes": 0.106456396521734}, "ground_truth": 1}, {"key": "40876288", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7624811211154018, "res": {"Yes": 0.7624811211154018, "yes": 0.18580119889739372}, "ground_truth": 0}, {"key": "40876288", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7889227434321266, "res": {"Yes": 0.7889227434321266, "yes": 0.1959494227980866}, "ground_truth": 0}, {"key": "40340131", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9608386040057967, "res": {"Yes": 0.9608386040057967, "yes": 0.031185260988483437}, "ground_truth": 0}, {"key": "40340131", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.3447334394037028, "res": {"yes": 0.61448809723556, "Yes": 0.3447334394037028}, "ground_truth": 0}, {"key": "40340131", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.40241326340169586, "res": {"yes": 0.5556232401794854, "Yes": 0.40241326340169586}, "ground_truth": 1}, {"key": "40340131", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.39411674754910186, "res": {"yes": 0.49067008582125227, "Yes": 0.39411674754910186}, "ground_truth": 0}, {"key": "40340131", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6479094824268314, "res": {"Yes": 0.6479094824268314, "yes": 0.29522036544517777}, "ground_truth": 0}, {"key": "30121591", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8017526140370751, "res": {"Yes": 0.8017526140370751, "yes": 0.18692889034155724}, "ground_truth": 0}, {"key": "30121591", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6840493259484165, "res": {"Yes": 0.6840493259484165, "yes": 0.24572498882674787}, "ground_truth": 0}, {"key": "30121591", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6770476259644267, "res": {"Yes": 0.6770476259644267, "yes": 0.2859484869341243}, "ground_truth": 1}, {"key": "30121591", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4231537291806896, "res": {"yes": 0.5022621924109291, "Yes": 0.4231537291806896}, "ground_truth": 0}, {"key": "30121591", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.31519909254000605, "res": {"yes": 0.6489460199034544, "Yes": 0.31519909254000605}, "ground_truth": 0}, {"key": "35623366", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7267299198428988, "res": {"Yes": 0.7267299198428988, "yes": 0.26938262830269527}, "ground_truth": 0}, {"key": "35623366", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9755297277609147, "res": {"Yes": 0.9755297277609147, "yes": 0.01746422633026979}, "ground_truth": 0}, {"key": "35623366", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7748509427983759, "res": {"Yes": 0.7748509427983759, "yes": 0.21835360259741096}, "ground_truth": 1}, {"key": "35623366", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8394658519035175, "res": {"Yes": 0.8394658519035175, "yes": 0.15544336902953923}, "ground_truth": 0}, {"key": "35623366", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7111508540674126, "res": {"Yes": 0.7111508540674126, "yes": 0.2838223116029016}, "ground_truth": 0}, {"key": "41014093", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8783660840732647, "res": {"Yes": 0.8783660840732647, "yes": 0.11563044411732855}, "ground_truth": 0}, {"key": "41014093", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9068305871961139, "res": {"Yes": 0.9068305871961139, "yes": 0.07420301471885492}, "ground_truth": 0}, {"key": "41014093", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9336688163340671, "res": {"Yes": 0.9336688163340671, "yes": 0.062109470353700895}, "ground_truth": 1}, {"key": "41014093", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9299756648547892, "res": {"Yes": 0.9299756648547892, "yes": 0.06644972050011835}, "ground_truth": 0}, {"key": "41014093", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.88338886023697, "res": {"Yes": 0.88338886023697, "yes": 0.10733036261380512}, "ground_truth": 0}, {"key": "11387984", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8289743709052106, "res": {"Yes": 0.8289743709052106, "yes": 0.15905640360707607}, "ground_truth": 0}, {"key": "11387984", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.923220806603629, "res": {"Yes": 0.923220806603629, "yes": 0.07517970235139942}, "ground_truth": 0}, {"key": "11387984", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8799758632597174, "res": {"Yes": 0.8799758632597174, "yes": 0.11786032995806137}, "ground_truth": 1}, {"key": "11387984", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8575267200665968, "res": {"Yes": 0.8575267200665968, "yes": 0.13918427561478916}, "ground_truth": 0}, {"key": "11387984", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9086457270424115, "res": {"Yes": 0.9086457270424115, "yes": 0.09030242174836928}, "ground_truth": 0}, {"key": "39508312", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9797981068103828, "res": {"Yes": 0.9797981068103828, "yes": 0.016687277103575232}, "ground_truth": 0}, {"key": "39508312", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9691442464738224, "res": {"Yes": 0.9691442464738224, "yes": 0.026235295980832823}, "ground_truth": 0}, {"key": "39508312", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9845767505780024, "res": {"Yes": 0.9845767505780024, "yes": 0.007965743452909076}, "ground_truth": 1}, {"key": "39508312", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9896047579130965, "res": {"Yes": 0.9896047579130965, "yes": 0.008023340681548781}, "ground_truth": 0}, {"key": "39508312", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7017030430857549, "res": {"Yes": 0.7017030430857549, "yes": 0.29376269424688656}, "ground_truth": 0}, {"key": "35815369", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.789702181532365, "res": {"Yes": 0.789702181532365, "yes": 0.19956232209153468}, "ground_truth": 0}, {"key": "35815369", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7620711385316205, "res": {"Yes": 0.7620711385316205, "yes": 0.20461162682040598}, "ground_truth": 0}, {"key": "35815369", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7109966363941514, "res": {"Yes": 0.7109966363941514, "yes": 0.2748266345312778}, "ground_truth": 1}, {"key": "35815369", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8647853786534978, "res": {"Yes": 0.8647853786534978, "yes": 0.11716104940946812}, "ground_truth": 0}, {"key": "35815369", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.5654897551701823, "res": {"Yes": 0.5654897551701823, "yes": 0.39710372515099474}, "ground_truth": 0}, {"key": "35802823", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9857117962487847, "res": {"Yes": 0.9857117962487847, "yes": 0.012889593481990532}, "ground_truth": 0}, {"key": "35802823", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9944625564752289, "res": {"Yes": 0.9944625564752289, "yes": 0.004718260903161855}, "ground_truth": 0}, {"key": "35802823", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9133467341032024, "res": {"Yes": 0.9133467341032024, "yes": 0.08534638377506377}, "ground_truth": 1}, {"key": "35802823", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8260452540305374, "res": {"Yes": 0.8260452540305374, "yes": 0.17149983062259205}, "ground_truth": 0}, {"key": "35802823", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9138470069466451, "res": {"Yes": 0.9138470069466451, "yes": 0.08446774782691828}, "ground_truth": 0}, {"key": "38499968", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7284409817589628, "res": {"Yes": 0.7284409817589628, "yes": 0.2527957572197491}, "ground_truth": 0}, {"key": "38499968", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7931066001206925, "res": {"Yes": 0.7931066001206925, "yes": 0.17907386582878815}, "ground_truth": 0}, {"key": "38499968", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9612264940359015, "res": {"Yes": 0.9612264940359015, "yes": 0.030411428613031914}, "ground_truth": 1}, {"key": "38499968", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7006244431124425, "res": {"Yes": 0.7006244431124425, "yes": 0.28133228482122735}, "ground_truth": 0}, {"key": "38499968", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7028433097463276, "res": {"Yes": 0.7028433097463276, "yes": 0.2690877619464682}, "ground_truth": 0}, {"key": "36926726", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8076721966449667, "res": {"Yes": 0.8076721966449667, "yes": 0.18610577204315518}, "ground_truth": 0}, {"key": "36926726", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.25561519752776163, "res": {"yes": 0.7415766879705445, "Yes": 0.25561519752776163}, "ground_truth": 0}, {"key": "36926726", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8444464289283204, "res": {"Yes": 0.8444464289283204, "yes": 0.14601882897601548}, "ground_truth": 1}, {"key": "36926726", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.40480893039490157, "res": {"yes": 0.5904884519079786, "Yes": 0.40480893039490157}, "ground_truth": 0}, {"key": "36926726", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7334371281009244, "res": {"Yes": 0.7334371281009244, "yes": 0.26075101304212017}, "ground_truth": 0}, {"key": "40903712", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9125784815918084, "res": {"Yes": 0.9125784815918084, "yes": 0.08070110589667051}, "ground_truth": 0}, {"key": "40903712", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8560506705710308, "res": {"Yes": 0.8560506705710308, "yes": 0.13926654390702312}, "ground_truth": 0}, {"key": "40903712", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7087447722368794, "res": {"Yes": 0.7087447722368794, "yes": 0.2790698137410909}, "ground_truth": 1}, {"key": "40903712", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.848853842519633, "res": {"Yes": 0.848853842519633, "yes": 0.14440350210684785}, "ground_truth": 0}, {"key": "40903712", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8696581548575778, "res": {"Yes": 0.8696581548575778, "yes": 0.12496810927874395}, "ground_truth": 0}, {"key": "19614862", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9501235549521218, "res": {"Yes": 0.9501235549521218, "yes": 0.04187486553276136}, "ground_truth": 0}, {"key": "19614862", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9808393806954043, "res": {"Yes": 0.9808393806954043, "yes": 0.013716442271066491}, "ground_truth": 0}, {"key": "19614862", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9444044328075272, "res": {"Yes": 0.9444044328075272, "yes": 0.04756890916285744}, "ground_truth": 1}, {"key": "19614862", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7277536943277502, "res": {"Yes": 0.7277536943277502, "yes": 0.2627567446754538}, "ground_truth": 0}, {"key": "19614862", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7965588313695199, "res": {"Yes": 0.7965588313695199, "yes": 0.19680163233760165}, "ground_truth": 0}, {"key": "38861704", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9917647023938272, "res": {"Yes": 0.9917647023938272, "yes": 0.006723498162470051}, "ground_truth": 0}, {"key": "38861704", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9950358533053706, "res": {"Yes": 0.9950358533053706, "yes": 0.0039641389902437246}, "ground_truth": 0}, {"key": "38861704", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9895191894997215, "res": {"Yes": 0.9895191894997215, "yes": 0.00911948997731225}, "ground_truth": 1}, {"key": "38861704", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9837579056932191, "res": {"Yes": 0.9837579056932191, "yes": 0.014761276201962105}, "ground_truth": 0}, {"key": "38861704", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9882694882897815, "res": {"Yes": 0.9882694882897815, "yes": 0.00972401188787207}, "ground_truth": 0}, {"key": "34349607", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6789217507256617, "res": {"Yes": 0.6789217507256617, "yes": 0.31544920764274487}, "ground_truth": 0}, {"key": "34349607", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8556010966864401, "res": {"Yes": 0.8556010966864401, "yes": 0.13745149331588974}, "ground_truth": 0}, {"key": "34349607", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7671595507631275, "res": {"Yes": 0.7671595507631275, "yes": 0.22213163997243107}, "ground_truth": 1}, {"key": "34349607", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8185128114021833, "res": {"Yes": 0.8185128114021833, "yes": 0.17562620108488805}, "ground_truth": 0}, {"key": "34349607", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8654212295533886, "res": {"Yes": 0.8654212295533886, "yes": 0.12841238962176751}, "ground_truth": 0}, {"key": "20773800", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7375401290332354, "res": {"Yes": 0.7375401290332354, "yes": 0.23721001286418134}, "ground_truth": 0}, {"key": "20773800", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8241011761669071, "res": {"Yes": 0.8241011761669071, "yes": 0.16347567115367873}, "ground_truth": 0}, {"key": "20773800", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7913117005387027, "res": {"Yes": 0.7913117005387027, "yes": 0.19166678681350943}, "ground_truth": 1}, {"key": "20773800", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8233483576803196, "res": {"Yes": 0.8233483576803196, "yes": 0.15841278333272493}, "ground_truth": 0}, {"key": "20773800", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7121159711536685, "res": {"Yes": 0.7121159711536685, "yes": 0.2629449984616242}, "ground_truth": 0}, {"key": "35545608", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8146147474627621, "res": {"Yes": 0.8146147474627621, "yes": 0.18145336508007195}, "ground_truth": 0}, {"key": "35545608", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9817069281598173, "res": {"Yes": 0.9817069281598173, "yes": 0.011424486368702304}, "ground_truth": 0}, {"key": "35545608", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.957528048388732, "res": {"Yes": 0.957528048388732, "yes": 0.02940378710023533}, "ground_truth": 1}, {"key": "35545608", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9715960998636904, "res": {"Yes": 0.9715960998636904, "yes": 0.020986525291325376}, "ground_truth": 0}, {"key": "35545608", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9638048399997696, "res": {"Yes": 0.9638048399997696, "yes": 0.024300110046095093}, "ground_truth": 0}, {"key": "37258984", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8688355866051877, "res": {"Yes": 0.8688355866051877, "yes": 0.12482406663646839}, "ground_truth": 0}, {"key": "37258984", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.775024683855396, "res": {"Yes": 0.775024683855396, "yes": 0.2186082420835738}, "ground_truth": 0}, {"key": "37258984", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.823948097110599, "res": {"Yes": 0.823948097110599, "yes": 0.1723347625240328}, "ground_truth": 1}, {"key": "37258984", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7865398501990385, "res": {"Yes": 0.7865398501990385, "yes": 0.20687368280020468}, "ground_truth": 0}, {"key": "37258984", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8531588361278695, "res": {"Yes": 0.8531588361278695, "yes": 0.1429138039558977}, "ground_truth": 0}, {"key": "37274562", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7449285509252939, "res": {"Yes": 0.7449285509252939, "yes": 0.24637633375695228}, "ground_truth": 0}, {"key": "37274562", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6526188831386244, "res": {"Yes": 0.6526188831386244, "yes": 0.3349604881279333}, "ground_truth": 0}, {"key": "37274562", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.801518047524609, "res": {"Yes": 0.801518047524609, "yes": 0.1722610190279064}, "ground_truth": 1}, {"key": "37274562", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8070302878530947, "res": {"Yes": 0.8070302878530947, "yes": 0.1845061468124284}, "ground_truth": 0}, {"key": "37274562", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9020429442676581, "res": {"Yes": 0.9020429442676581, "yes": 0.09190506838988215}, "ground_truth": 0}, {"key": "40828068", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8140169582220363, "res": {"Yes": 0.8140169582220363, "yes": 0.13132982131056264}, "ground_truth": 0}, {"key": "40828068", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9923654103663376, "res": {"Yes": 0.9923654103663376, "yes": 0.007036455610574336}, "ground_truth": 0}, {"key": "40828068", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8167413086019019, "res": {"Yes": 0.8167413086019019, "yes": 0.16007031364523186}, "ground_truth": 1}, {"key": "40828068", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.71011763469615, "res": {"Yes": 0.71011763469615, "yes": 0.22097621669946144}, "ground_truth": 0}, {"key": "40828068", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8165874653651368, "res": {"Yes": 0.8165874653651368, "yes": 0.13280328577025388}, "ground_truth": 0}, {"key": "37807180", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9416046383669879, "res": {"Yes": 0.9416046383669879, "yes": 0.05447441157247848}, "ground_truth": 0}, {"key": "37807180", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9030801408013497, "res": {"Yes": 0.9030801408013497, "yes": 0.0897264393753713}, "ground_truth": 0}, {"key": "37807180", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8569206972187752, "res": {"Yes": 0.8569206972187752, "yes": 0.13626638442609076}, "ground_truth": 1}, {"key": "37807180", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7946317414090226, "res": {"Yes": 0.7946317414090226, "yes": 0.19614042935445314}, "ground_truth": 0}, {"key": "37807180", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9336047090994171, "res": {"Yes": 0.9336047090994171, "yes": 0.05917383565942522}, "ground_truth": 0}, {"key": "40748607", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.4816703452190024, "res": {"yes": 0.4958410833831714, "Yes": 0.4816703452190024}, "ground_truth": 0}, {"key": "40748607", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.5368355552261376, "res": {"Yes": 0.5368355552261376, "yes": 0.458939860675157}, "ground_truth": 0}, {"key": "40748607", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5474796975935224, "res": {"Yes": 0.5474796975935224, "yes": 0.4469366397236941}, "ground_truth": 1}, {"key": "40748607", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6857424905056217, "res": {"Yes": 0.6857424905056217, "yes": 0.3070295137568231}, "ground_truth": 0}, {"key": "40748607", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7216484067202035, "res": {"Yes": 0.7216484067202035, "yes": 0.2713715130663237}, "ground_truth": 0}, {"key": "40123819", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.844678143035649, "res": {"Yes": 0.844678143035649, "yes": 0.1395982394671809}, "ground_truth": 0}, {"key": "40123819", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.804075876901937, "res": {"Yes": 0.804075876901937, "yes": 0.17794353466471835}, "ground_truth": 0}, {"key": "40123819", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8368646500431524, "res": {"Yes": 0.8368646500431524, "yes": 0.10129380749007871}, "ground_truth": 1}, {"key": "40123819", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7677596657732346, "res": {"Yes": 0.7677596657732346, "yes": 0.20066751395163485}, "ground_truth": 0}, {"key": "40123819", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7631668922229715, "res": {"Yes": 0.7631668922229715, "yes": 0.22016337380990322}, "ground_truth": 0}, {"key": "38453867", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9668478211919956, "res": {"Yes": 0.9668478211919956, "yes": 0.02973807106519481}, "ground_truth": 0}, {"key": "38453867", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.804314481609872, "res": {"Yes": 0.804314481609872, "yes": 0.19000720243262348}, "ground_truth": 0}, {"key": "38453867", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8432962324826355, "res": {"Yes": 0.8432962324826355, "yes": 0.15165217541173343}, "ground_truth": 1}, {"key": "38453867", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9848613491522181, "res": {"Yes": 0.9848613491522181, "yes": 0.009492825274232937}, "ground_truth": 0}, {"key": "38453867", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7642631285606667, "res": {"Yes": 0.7642631285606667, "yes": 0.23073871789282793}, "ground_truth": 0}, {"key": "38944856", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6620983626627145, "res": {"Yes": 0.6620983626627145, "yes": 0.32665065908086}, "ground_truth": 0}, {"key": "38944856", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7119638082298204, "res": {"Yes": 0.7119638082298204, "yes": 0.2797257276463307}, "ground_truth": 0}, {"key": "38944856", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6281160478596506, "res": {"Yes": 0.6281160478596506, "yes": 0.367839565201336}, "ground_truth": 1}, {"key": "38944856", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.794930547324539, "res": {"Yes": 0.794930547324539, "yes": 0.1974190405396301}, "ground_truth": 0}, {"key": "38944856", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.612225561396261, "res": {"Yes": 0.612225561396261, "yes": 0.37582292149364854}, "ground_truth": 0}, {"key": "35778898", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7020318047997929, "res": {"Yes": 0.7020318047997929, "yes": 0.29340185232022037}, "ground_truth": 0}, {"key": "35778898", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8157755525524235, "res": {"Yes": 0.8157755525524235, "yes": 0.17939019235482256}, "ground_truth": 0}, {"key": "35778898", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9769022325348243, "res": {"Yes": 0.9769022325348243, "yes": 0.014525325018582785}, "ground_truth": 1}, {"key": "35778898", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9800670012814584, "res": {"Yes": 0.9800670012814584, "yes": 0.012385974936536547}, "ground_truth": 0}, {"key": "35778898", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9155509864089874, "res": {"Yes": 0.9155509864089874, "yes": 0.07957187322657992}, "ground_truth": 0}, {"key": "32530125", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7674612648847005, "res": {"Yes": 0.7674612648847005, "yes": 0.22520862143280362}, "ground_truth": 0}, {"key": "32530125", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8209827013261372, "res": {"Yes": 0.8209827013261372, "yes": 0.17330590655641176}, "ground_truth": 0}, {"key": "32530125", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8886932265656635, "res": {"Yes": 0.8886932265656635, "yes": 0.10477653865008935}, "ground_truth": 1}, {"key": "32530125", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9455250134233295, "res": {"Yes": 0.9455250134233295, "yes": 0.04954337650984413}, "ground_truth": 0}, {"key": "32530125", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8650910132112323, "res": {"Yes": 0.8650910132112323, "yes": 0.12866034905496124}, "ground_truth": 0}, {"key": "35010363", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6839151628316095, "res": {"Yes": 0.6839151628316095, "yes": 0.3074719547376854}, "ground_truth": 0}, {"key": "35010363", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6214587449758072, "res": {"Yes": 0.6214587449758072, "yes": 0.3722941170004017}, "ground_truth": 0}, {"key": "35010363", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6270693301877502, "res": {"Yes": 0.6270693301877502, "yes": 0.36709948183113456}, "ground_truth": 1}, {"key": "35010363", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7263344465893735, "res": {"Yes": 0.7263344465893735, "yes": 0.2678903832140293}, "ground_truth": 0}, {"key": "35010363", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6058250687583302, "res": {"Yes": 0.6058250687583302, "yes": 0.3881442228690264}, "ground_truth": 0}, {"key": "27514800", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6942064391197632, "res": {"Yes": 0.6942064391197632, "yes": 0.27496317482762495}, "ground_truth": 0}, {"key": "27514800", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9858807084930301, "res": {"Yes": 0.9858807084930301, "yes": 0.010720818217423385}, "ground_truth": 0}, {"key": "27514800", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7240533800235481, "res": {"Yes": 0.7240533800235481, "yes": 0.27085588208033834}, "ground_truth": 1}, {"key": "27514800", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9110464182014384, "res": {"Yes": 0.9110464182014384, "yes": 0.08598286042282575}, "ground_truth": 0}, {"key": "27514800", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7392109835991272, "res": {"Yes": 0.7392109835991272, "yes": 0.22196408266423112}, "ground_truth": 0}, {"key": "25725840", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7643421421670402, "res": {"Yes": 0.7643421421670402, "yes": 0.2019359118480616}, "ground_truth": 0}, {"key": "25725840", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6779908952274347, "res": {"Yes": 0.6779908952274347, "yes": 0.2948310790611463}, "ground_truth": 0}, {"key": "25725840", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.628982358232594, "res": {"Yes": 0.628982358232594, "yes": 0.3496139616249628}, "ground_truth": 1}, {"key": "25725840", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8911945501585489, "res": {"Yes": 0.8911945501585489, "yes": 0.09434941570807018}, "ground_truth": 0}, {"key": "25725840", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6451008485934445, "res": {"Yes": 0.6451008485934445, "yes": 0.3506860851439466}, "ground_truth": 0}, {"key": "38327225", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8474536258378981, "res": {"Yes": 0.8474536258378981, "yes": 0.14868132824660574}, "ground_truth": 0}, {"key": "38327225", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9328345640387294, "res": {"Yes": 0.9328345640387294, "yes": 0.06444530496254258}, "ground_truth": 0}, {"key": "38327225", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9135725193987547, "res": {"Yes": 0.9135725193987547, "yes": 0.08391814663837842}, "ground_truth": 1}, {"key": "38327225", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.926363979640471, "res": {"Yes": 0.926363979640471, "yes": 0.07124166850028112}, "ground_truth": 0}, {"key": "38327225", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9192745432002302, "res": {"Yes": 0.9192745432002302, "yes": 0.07840841681923227}, "ground_truth": 0}, {"key": "11991724", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.361356490357112, "res": {"yes": 0.4152624288214457, "Yes": 0.361356490357112}, "ground_truth": 0}, {"key": "11991724", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7957830663434583, "res": {"Yes": 0.7957830663434583, "yes": 0.19802866223529986}, "ground_truth": 0}, {"key": "11991724", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3892511943251627, "res": {"Yes": 0.3892511943251627, "yes": 0.20333284746543476}, "ground_truth": 1}, {"key": "11991724", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5874965120016002, "res": {"Yes": 0.5874965120016002, "yes": 0.29969208942085973}, "ground_truth": 0}, {"key": "11991724", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.32441403279024544, "res": {"Yes": 0.32441403279024544, "yes": 0.1898267623212406}, "ground_truth": 0}, {"key": "32217545", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6557000732499318, "res": {"Yes": 0.6557000732499318, "yes": 0.33217823556792847}, "ground_truth": 0}, {"key": "32217545", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8116002280843675, "res": {"Yes": 0.8116002280843675, "yes": 0.17966093480594417}, "ground_truth": 0}, {"key": "32217545", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8206477840873022, "res": {"Yes": 0.8206477840873022, "yes": 0.17370420458707034}, "ground_truth": 1}, {"key": "32217545", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.77927012854827, "res": {"Yes": 0.77927012854827, "yes": 0.21526303069829947}, "ground_truth": 0}, {"key": "32217545", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7226091700462735, "res": {"Yes": 0.7226091700462735, "yes": 0.2715462782973273}, "ground_truth": 0}, {"key": "12731847", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7994886045743598, "res": {"Yes": 0.7994886045743598, "yes": 0.19653111778135848}, "ground_truth": 0}, {"key": "12731847", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7113088466657234, "res": {"Yes": 0.7113088466657234, "yes": 0.28113190696138174}, "ground_truth": 0}, {"key": "12731847", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6604590866077522, "res": {"Yes": 0.6604590866077522, "yes": 0.33091540840439354}, "ground_truth": 1}, {"key": "12731847", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6865845228087817, "res": {"Yes": 0.6865845228087817, "yes": 0.3087736483892004}, "ground_truth": 0}, {"key": "12731847", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.5993222173725492, "res": {"Yes": 0.5993222173725492, "yes": 0.3884004165844567}, "ground_truth": 0}, {"key": "36827234", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9377867511615213, "res": {"Yes": 0.9377867511615213, "yes": 0.026940433953012873}, "ground_truth": 0}, {"key": "36827234", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6152937394993712, "res": {"Yes": 0.6152937394993712, "yes": 0.35885179608680307}, "ground_truth": 0}, {"key": "36827234", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7283043900295773, "res": {"Yes": 0.7283043900295773, "yes": 0.251006109376789}, "ground_truth": 1}, {"key": "36827234", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8101717149643591, "res": {"Yes": 0.8101717149643591, "yes": 0.1647014233103699}, "ground_truth": 0}, {"key": "36827234", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.928080619971555, "res": {"Yes": 0.928080619971555, "yes": 0.05994312997430994}, "ground_truth": 0}, {"key": "29111539", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8953661663163762, "res": {"Yes": 0.8953661663163762, "yes": 0.08507441796947483}, "ground_truth": 0}, {"key": "29111539", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9260595384690694, "res": {"Yes": 0.9260595384690694, "yes": 0.06973620887866112}, "ground_truth": 0}, {"key": "29111539", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8516571919382996, "res": {"Yes": 0.8516571919382996, "yes": 0.14030054603094572}, "ground_truth": 1}, {"key": "29111539", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8798014341345709, "res": {"Yes": 0.8798014341345709, "yes": 0.09467608926053439}, "ground_truth": 0}, {"key": "29111539", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6863023533730932, "res": {"Yes": 0.6863023533730932, "yes": 0.30861084410230405}, "ground_truth": 0}, {"key": "37763052", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9552019412967699, "res": {"Yes": 0.9552019412967699, "yes": 0.03319086471864023}, "ground_truth": 0}, {"key": "37763052", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6406253694199491, "res": {"Yes": 0.6406253694199491, "yes": 0.3519055370685147}, "ground_truth": 0}, {"key": "37763052", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9469850108094939, "res": {"Yes": 0.9469850108094939, "yes": 0.040075351879542136}, "ground_truth": 1}, {"key": "37763052", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9726282311094268, "res": {"Yes": 0.9726282311094268, "yes": 0.0184909880133479}, "ground_truth": 0}, {"key": "37763052", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9286453663719604, "res": {"Yes": 0.9286453663719604, "yes": 0.05400634712355366}, "ground_truth": 0}, {"key": "30682335", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9764651040927961, "res": {"Yes": 0.9764651040927961, "yes": 0.019018748941016874}, "ground_truth": 0}, {"key": "30682335", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7627269617781139, "res": {"Yes": 0.7627269617781139, "yes": 0.22884338904979631}, "ground_truth": 0}, {"key": "30682335", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9672706852507955, "res": {"Yes": 0.9672706852507955, "yes": 0.028620895958940542}, "ground_truth": 1}, {"key": "30682335", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9689716330287605, "res": {"Yes": 0.9689716330287605, "yes": 0.02699760555658041}, "ground_truth": 0}, {"key": "30682335", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9797670908124375, "res": {"Yes": 0.9797670908124375, "yes": 0.016408414441977447}, "ground_truth": 0}, {"key": "12261276", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8907806107009404, "res": {"Yes": 0.8907806107009404, "yes": 0.10465131128214714}, "ground_truth": 0}, {"key": "12261276", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8865094975401464, "res": {"Yes": 0.8865094975401464, "yes": 0.10685532096511786}, "ground_truth": 0}, {"key": "12261276", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8056806748718889, "res": {"Yes": 0.8056806748718889, "yes": 0.18769931215390215}, "ground_truth": 1}, {"key": "12261276", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9493934025140779, "res": {"Yes": 0.9493934025140779, "yes": 0.03694666103804449}, "ground_truth": 0}, {"key": "12261276", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.780129911470212, "res": {"Yes": 0.780129911470212, "yes": 0.21302724726969424}, "ground_truth": 0}, {"key": "36912979", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.3123630643252491, "res": {"yes": 0.6669390810474819, "Yes": 0.3123630643252491}, "ground_truth": 0}, {"key": "36912979", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7491000247414998, "res": {"Yes": 0.7491000247414998, "yes": 0.24456966065307928}, "ground_truth": 0}, {"key": "36912979", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.42642752335829975, "res": {"yes": 0.5548800026800786, "Yes": 0.42642752335829975}, "ground_truth": 1}, {"key": "36912979", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.40157586882383556, "res": {"yes": 0.49496765103172824, "Yes": 0.40157586882383556}, "ground_truth": 0}, {"key": "36912979", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.23031650668893902, "res": {"yes": 0.6926255691957348, "Yes": 0.23031650668893902}, "ground_truth": 0}, {"key": "30205259", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.4961726986979621, "res": {"Yes": 0.4961726986979621, "yes": 0.2918777292071607}, "ground_truth": 0}, {"key": "30205259", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9867287529743637, "res": {"Yes": 0.9867287529743637, "yes": 0.009852579302058274}, "ground_truth": 0}, {"key": "30205259", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.510994511265257, "res": {"Yes": 0.510994511265257, "yes": 0.41864279104793267}, "ground_truth": 1}, {"key": "30205259", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7756188128820976, "res": {"Yes": 0.7756188128820976, "yes": 0.18977885609245115}, "ground_truth": 0}, {"key": "30205259", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7417346012898026, "res": {"Yes": 0.7417346012898026, "yes": 0.20490176701138837}, "ground_truth": 0}, {"key": "39458032", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9857348533030734, "res": {"Yes": 0.9857348533030734, " Yes": 0.0073877134741305705}, "ground_truth": 0}, {"key": "39458032", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9860529016792999, "res": {"Yes": 0.9860529016792999, "yes": 0.008801048648458116}, "ground_truth": 0}, {"key": "39458032", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.876653729334891, "res": {"Yes": 0.876653729334891, "yes": 0.11881212677885886}, "ground_truth": 1}, {"key": "39458032", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8870855789366777, "res": {"Yes": 0.8870855789366777, "yes": 0.11010731476438708}, "ground_truth": 0}, {"key": "39458032", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9151808252568355, "res": {"Yes": 0.9151808252568355, "yes": 0.06704996441352029}, "ground_truth": 0}, {"key": "35116452", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9859904411505234, "res": {"Yes": 0.9859904411505234, "yes": 0.009939918077975311}, "ground_truth": 0}, {"key": "35116452", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8257484597392413, "res": {"Yes": 0.8257484597392413, "yes": 0.16901740182184746}, "ground_truth": 0}, {"key": "35116452", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8069904618911029, "res": {"Yes": 0.8069904618911029, "yes": 0.18347741464798564}, "ground_truth": 1}, {"key": "35116452", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8825062571013417, "res": {"Yes": 0.8825062571013417, "yes": 0.11145135068748095}, "ground_truth": 0}, {"key": "35116452", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8017805796559703, "res": {"Yes": 0.8017805796559703, "yes": 0.19065200043966324}, "ground_truth": 0}, {"key": "40107476", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8785305559517401, "res": {"Yes": 0.8785305559517401, "yes": 0.117232832533569}, "ground_truth": 0}, {"key": "40107476", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.818168830026669, "res": {"Yes": 0.818168830026669, "yes": 0.17978972597218965}, "ground_truth": 0}, {"key": "40107476", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8314730850709379, "res": {"Yes": 0.8314730850709379, "yes": 0.16671906977453987}, "ground_truth": 1}, {"key": "40107476", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7911022284960703, "res": {"Yes": 0.7911022284960703, "yes": 0.20593273348454358}, "ground_truth": 0}, {"key": "40107476", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6835140958415412, "res": {"Yes": 0.6835140958415412, "yes": 0.31263847123406785}, "ground_truth": 0}, {"key": "39501049", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9775128621494781, "res": {"Yes": 0.9775128621494781, "yes": 0.01942462282316156}, "ground_truth": 0}, {"key": "39501049", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9798120660934493, "res": {"Yes": 0.9798120660934493, "yes": 0.01550476318510139}, "ground_truth": 0}, {"key": "39501049", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7832326270417832, "res": {"Yes": 0.7832326270417832, "yes": 0.21072078830229019}, "ground_truth": 1}, {"key": "39501049", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9807876594560622, "res": {"Yes": 0.9807876594560622, "yes": 0.014078450275083577}, "ground_truth": 0}, {"key": "39501049", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9104257643295668, "res": {"Yes": 0.9104257643295668, "yes": 0.08739431886307397}, "ground_truth": 0}, {"key": "39642178", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9711908257743398, "res": {"Yes": 0.9711908257743398, "yes": 0.022325164138905392}, "ground_truth": 0}, {"key": "39642178", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6887922481979227, "res": {"Yes": 0.6887922481979227, "yes": 0.3032164415306294}, "ground_truth": 0}, {"key": "39642178", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7180995719162054, "res": {"Yes": 0.7180995719162054, "yes": 0.2743470138474083}, "ground_truth": 1}, {"key": "39642178", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8165760985467322, "res": {"Yes": 0.8165760985467322, "yes": 0.17618433043437917}, "ground_truth": 0}, {"key": "39642178", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9426410082266513, "res": {"Yes": 0.9426410082266513, "yes": 0.05175593331390842}, "ground_truth": 0}, {"key": "38024796", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8805489316654733, "res": {"Yes": 0.8805489316654733, "yes": 0.11643289273150866}, "ground_truth": 0}, {"key": "38024796", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8434004366027191, "res": {"Yes": 0.8434004366027191, "yes": 0.1536092866707768}, "ground_truth": 0}, {"key": "38024796", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8895800780632566, "res": {"Yes": 0.8895800780632566, "yes": 0.10620061279427846}, "ground_truth": 1}, {"key": "38024796", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8247455701752714, "res": {"Yes": 0.8247455701752714, "yes": 0.17229933413902065}, "ground_truth": 0}, {"key": "38024796", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.87378876515797, "res": {"Yes": 0.87378876515797, "yes": 0.12278050867648026}, "ground_truth": 0}, {"key": "36652079", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9537901855610557, "res": {"Yes": 0.9537901855610557, "yes": 0.035284129689601235}, "ground_truth": 0}, {"key": "36652079", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6440899749380251, "res": {"Yes": 0.6440899749380251, "yes": 0.3414383648029411}, "ground_truth": 0}, {"key": "36652079", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.95751198886298, "res": {"Yes": 0.95751198886298, "yes": 0.03502280003533048}, "ground_truth": 1}, {"key": "36652079", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9403533538526957, "res": {"Yes": 0.9403533538526957, "yes": 0.05267963662025304}, "ground_truth": 0}, {"key": "36652079", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9754306999361305, "res": {"Yes": 0.9754306999361305, "yes": 0.02019507330731179}, "ground_truth": 0}, {"key": "32193402", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.642801970261371, "res": {"Yes": 0.642801970261371, "yes": 0.3394436245895655}, "ground_truth": 0}, {"key": "32193402", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.5295289400524247, "res": {"Yes": 0.5295289400524247, "yes": 0.4651862388142048}, "ground_truth": 0}, {"key": "32193402", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5286460308068747, "res": {"Yes": 0.5286460308068747, "yes": 0.46551162938962826}, "ground_truth": 1}, {"key": "32193402", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7249781953203092, "res": {"Yes": 0.7249781953203092, "yes": 0.26563364339310463}, "ground_truth": 0}, {"key": "32193402", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8504023742813352, "res": {"Yes": 0.8504023742813352, "yes": 0.14530642362336527}, "ground_truth": 0}, {"key": "32589706", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8192645511588065, "res": {"Yes": 0.8192645511588065, "yes": 0.1465890918308833}, "ground_truth": 0}, {"key": "32589706", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7510088447428028, "res": {"Yes": 0.7510088447428028, "yes": 0.2299712449112176}, "ground_truth": 0}, {"key": "32589706", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9923068312766384, "res": {"Yes": 0.9923068312766384, "yes": 0.006947213048801166}, "ground_truth": 1}, {"key": "32589706", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8668919631005831, "res": {"Yes": 0.8668919631005831, "yes": 0.10977823429735396}, "ground_truth": 0}, {"key": "32589706", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7956418592101845, "res": {"Yes": 0.7956418592101845, "yes": 0.18334594516145908}, "ground_truth": 0}, {"key": "38590589", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9001654808034548, "res": {"Yes": 0.9001654808034548, "yes": 0.09558484467461163}, "ground_truth": 0}, {"key": "38590589", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8732941921921997, "res": {"Yes": 0.8732941921921997, "yes": 0.1251046489911911}, "ground_truth": 0}, {"key": "38590589", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9845909640305662, "res": {"Yes": 0.9845909640305662, "yes": 0.01082904178200001}, "ground_truth": 1}, {"key": "38590589", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9735603989524723, "res": {"Yes": 0.9735603989524723, "yes": 0.02372103670175825}, "ground_truth": 0}, {"key": "38590589", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9842218874245365, "res": {"Yes": 0.9842218874245365, "yes": 0.011288065069091713}, "ground_truth": 0}, {"key": "37045414", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7649109478276329, "res": {"Yes": 0.7649109478276329, "yes": 0.21064220410511728}, "ground_truth": 0}, {"key": "37045414", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8809402817175717, "res": {"Yes": 0.8809402817175717, "yes": 0.09974428680403169}, "ground_truth": 0}, {"key": "37045414", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9107267571309631, "res": {"Yes": 0.9107267571309631, "yes": 0.07359878216907736}, "ground_truth": 1}, {"key": "37045414", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8683917250367217, "res": {"Yes": 0.8683917250367217, "yes": 0.11373692190780142}, "ground_truth": 0}, {"key": "37045414", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6887291921540346, "res": {"Yes": 0.6887291921540346, "yes": 0.28738939666951924}, "ground_truth": 0}, {"key": "33310095", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9180941901277854, "res": {"Yes": 0.9180941901277854, "yes": 0.0649556207476607}, "ground_truth": 0}, {"key": "33310095", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.4801924826214731, "res": {"Yes": 0.4801924826214731, "yes": 0.38870450160894987}, "ground_truth": 0}, {"key": "33310095", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5487981903736528, "res": {"Yes": 0.5487981903736528, "yes": 0.3465031740765679}, "ground_truth": 1}, {"key": "33310095", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6463006506391251, "res": {"Yes": 0.6463006506391251, "yes": 0.3103593684045074}, "ground_truth": 0}, {"key": "33310095", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6393719388564528, "res": {"Yes": 0.6393719388564528, "yes": 0.28943880094957003}, "ground_truth": 0}, {"key": "37934604", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.5705393713809315, "res": {"Yes": 0.5705393713809315, "yes": 0.4223089178210963}, "ground_truth": 0}, {"key": "37934604", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7853495921624286, "res": {"Yes": 0.7853495921624286, "yes": 0.2110150755403143}, "ground_truth": 0}, {"key": "37934604", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6440025070181723, "res": {"Yes": 0.6440025070181723, "yes": 0.3541931121639235}, "ground_truth": 1}, {"key": "37934604", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6583287844731067, "res": {"Yes": 0.6583287844731067, "yes": 0.33917736765691997}, "ground_truth": 0}, {"key": "37934604", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7988939663258539, "res": {"Yes": 0.7988939663258539, "yes": 0.1978434930481704}, "ground_truth": 0}, {"key": "39012181", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9602875462748373, "res": {"Yes": 0.9602875462748373, "yes": 0.0295249384964985}, "ground_truth": 0}, {"key": "39012181", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9044159705592429, "res": {"Yes": 0.9044159705592429, "yes": 0.08438727153078572}, "ground_truth": 0}, {"key": "39012181", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8861988481328306, "res": {"Yes": 0.8861988481328306, "yes": 0.10059350359420663}, "ground_truth": 1}, {"key": "39012181", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8377122454393027, "res": {"Yes": 0.8377122454393027, "yes": 0.14215847820464228}, "ground_truth": 0}, {"key": "39012181", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9039140685697705, "res": {"Yes": 0.9039140685697705, "yes": 0.07276661602794972}, "ground_truth": 0}, {"key": "40221674", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8844376143308423, "res": {"Yes": 0.8844376143308423, "yes": 0.11256874244426267}, "ground_truth": 0}, {"key": "40221674", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9656678156956692, "res": {"Yes": 0.9656678156956692, "yes": 0.026999754651515554}, "ground_truth": 0}, {"key": "40221674", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9810081054643406, "res": {"Yes": 0.9810081054643406, "yes": 0.014584581061553761}, "ground_truth": 1}, {"key": "40221674", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.972765833694886, "res": {"Yes": 0.972765833694886, "yes": 0.01667204357084814}, "ground_truth": 0}, {"key": "40221674", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9756934695331894, "res": {"Yes": 0.9756934695331894, "yes": 0.01710680399787532}, "ground_truth": 0}, {"key": "36884862", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8982944851259372, "res": {"Yes": 0.8982944851259372, "yes": 0.07537892894397975}, "ground_truth": 0}, {"key": "36884862", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9878531933515183, "res": {"Yes": 0.9878531933515183, "yes": 0.010307061872359262}, "ground_truth": 0}, {"key": "36884862", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9027572029126159, "res": {"Yes": 0.9027572029126159, "yes": 0.09572997606978854}, "ground_truth": 1}, {"key": "36884862", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5925839667380678, "res": {"Yes": 0.5925839667380678, "yes": 0.3297679017510244}, "ground_truth": 0}, {"key": "36884862", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8088182639421388, "res": {"Yes": 0.8088182639421388, "yes": 0.1731154441170025}, "ground_truth": 0}, {"key": "39054429", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8880640235469834, "res": {"Yes": 0.8880640235469834, "yes": 0.10463546827296508}, "ground_truth": 0}, {"key": "39054429", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9441822840188115, "res": {"Yes": 0.9441822840188115, "yes": 0.0530091308542335}, "ground_truth": 0}, {"key": "39054429", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9373332346505958, "res": {"Yes": 0.9373332346505958, "yes": 0.05795849082990716}, "ground_truth": 1}, {"key": "39054429", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8855728812068995, "res": {"Yes": 0.8855728812068995, "yes": 0.10769530382448138}, "ground_truth": 0}, {"key": "39054429", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9100267484519426, "res": {"Yes": 0.9100267484519426, "yes": 0.08591390403946055}, "ground_truth": 0}, {"key": "36753964", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8386592359702746, "res": {"Yes": 0.8386592359702746, "yes": 0.15234672048070466}, "ground_truth": 0}, {"key": "36753964", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7904990374345146, "res": {"Yes": 0.7904990374345146, "yes": 0.19909232840907864}, "ground_truth": 0}, {"key": "36753964", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8644329293828344, "res": {"Yes": 0.8644329293828344, "yes": 0.12834452876981672}, "ground_truth": 1}, {"key": "36753964", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7887074284676027, "res": {"Yes": 0.7887074284676027, "yes": 0.2032739707985616}, "ground_truth": 0}, {"key": "36753964", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9130296167278418, "res": {"Yes": 0.9130296167278418, "yes": 0.0796981902874681}, "ground_truth": 0}, {"key": "37612459", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8723770078997117, "res": {"Yes": 0.8723770078997117, "yes": 0.11703017165919885}, "ground_truth": 0}, {"key": "37612459", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8210593600597174, "res": {"Yes": 0.8210593600597174, "yes": 0.17106414087037297}, "ground_truth": 0}, {"key": "37612459", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8203404269989599, "res": {"Yes": 0.8203404269989599, "yes": 0.16931540528127936}, "ground_truth": 1}, {"key": "37612459", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8655702592652502, "res": {"Yes": 0.8655702592652502, "yes": 0.12288872596734138}, "ground_truth": 0}, {"key": "37612459", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8928904339304543, "res": {"Yes": 0.8928904339304543, "yes": 0.10040486667908784}, "ground_truth": 0}, {"key": "36805789", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8918000372672177, "res": {"Yes": 0.8918000372672177, "yes": 0.09876819885283658}, "ground_truth": 0}, {"key": "36805789", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8295425539036682, "res": {"Yes": 0.8295425539036682, "yes": 0.1564003833166826}, "ground_truth": 0}, {"key": "36805789", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.892782097178125, "res": {"Yes": 0.892782097178125, "yes": 0.07813364664451752}, "ground_truth": 1}, {"key": "36805789", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6800770353153788, "res": {"Yes": 0.6800770353153788, "yes": 0.2874677496980839}, "ground_truth": 0}, {"key": "36805789", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9917044327007067, "res": {"Yes": 0.9917044327007067, "yes": 0.004309968859787027}, "ground_truth": 0}, {"key": "12757394", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.5955998083194427, "res": {"Yes": 0.5955998083194427, "yes": 0.32399188817504526}, "ground_truth": 0}, {"key": "12757394", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7534994054251293, "res": {"Yes": 0.7534994054251293, "yes": 0.2006226898360582}, "ground_truth": 0}, {"key": "12757394", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6074752896912804, "res": {"Yes": 0.6074752896912804, "yes": 0.3650029292826249}, "ground_truth": 1}, {"key": "12757394", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.46927465669242624, "res": {"yes": 0.4895322184874151, "Yes": 0.46927465669242624}, "ground_truth": 0}, {"key": "12757394", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.5664792373137181, "res": {"Yes": 0.5664792373137181, "yes": 0.3830443270789036}, "ground_truth": 0}, {"key": "32192542", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9092658348312311, "res": {"Yes": 0.9092658348312311, "yes": 0.08488996419614417}, "ground_truth": 0}, {"key": "32192542", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7883459441851208, "res": {"Yes": 0.7883459441851208, "yes": 0.19605375444124537}, "ground_truth": 0}, {"key": "32192542", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8575391628695773, "res": {"Yes": 0.8575391628695773, "yes": 0.13451087879189477}, "ground_truth": 1}, {"key": "32192542", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.920648081983228, "res": {"Yes": 0.920648081983228, "yes": 0.07173704486629376}, "ground_truth": 0}, {"key": "32192542", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8874905283244452, "res": {"Yes": 0.8874905283244452, "yes": 0.1039002993235468}, "ground_truth": 0}, {"key": "34856060", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7847240983883094, "res": {"Yes": 0.7847240983883094, "yes": 0.20495248647499845}, "ground_truth": 0}, {"key": "34856060", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6955432945662416, "res": {"Yes": 0.6955432945662416, "yes": 0.29080210345786983}, "ground_truth": 0}, {"key": "34856060", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.753590245253714, "res": {"Yes": 0.753590245253714, "yes": 0.23521840816797226}, "ground_truth": 1}, {"key": "34856060", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7938648470856108, "res": {"Yes": 0.7938648470856108, "yes": 0.19069842985432212}, "ground_truth": 0}, {"key": "34856060", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6472513723983097, "res": {"Yes": 0.6472513723983097, "yes": 0.3395751165671468}, "ground_truth": 0}, {"key": "36083416", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.672571603209521, "res": {"Yes": 0.672571603209521, "yes": 0.30967649218554977}, "ground_truth": 0}, {"key": "36083416", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7951155889428119, "res": {"Yes": 0.7951155889428119, "yes": 0.18736715337886228}, "ground_truth": 0}, {"key": "36083416", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5722767076162484, "res": {"Yes": 0.5722767076162484, "yes": 0.4245238037538175}, "ground_truth": 1}, {"key": "36083416", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.744756991290583, "res": {"Yes": 0.744756991290583, "yes": 0.2460991193536754}, "ground_truth": 0}, {"key": "36083416", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.5009986362283896, "res": {"Yes": 0.5009986362283896, "yes": 0.4938432400437547}, "ground_truth": 0}, {"key": "33839050", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.781892786107898, "res": {"Yes": 0.781892786107898, "yes": 0.2143788289993467}, "ground_truth": 0}, {"key": "33839050", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7224230282246827, "res": {"Yes": 0.7224230282246827, "yes": 0.27055878081389684}, "ground_truth": 0}, {"key": "33839050", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9051670198404537, "res": {"Yes": 0.9051670198404537, "yes": 0.08860849904109293}, "ground_truth": 1}, {"key": "33839050", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8467055021571952, "res": {"Yes": 0.8467055021571952, "yes": 0.14789997179035283}, "ground_truth": 0}, {"key": "33839050", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8199161254434382, "res": {"Yes": 0.8199161254434382, "yes": 0.1756500351774819}, "ground_truth": 0}, {"key": "18464690", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9775038602749797, "res": {"Yes": 0.9775038602749797, "yes": 0.015461240495586854}, "ground_truth": 0}, {"key": "18464690", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6834575032173725, "res": {"Yes": 0.6834575032173725, "yes": 0.31001890484232997}, "ground_truth": 0}, {"key": "18464690", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7862902106138826, "res": {"Yes": 0.7862902106138826, "yes": 0.20903957309263074}, "ground_truth": 1}, {"key": "18464690", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.956712745527411, "res": {"Yes": 0.956712745527411, "yes": 0.034371845310233136}, "ground_truth": 0}, {"key": "18464690", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6977648319263584, "res": {"Yes": 0.6977648319263584, "yes": 0.2982492000957989}, "ground_truth": 0}, {"key": "39212665", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9786261193069559, "res": {"Yes": 0.9786261193069559, "yes": 0.0170723514208819}, "ground_truth": 0}, {"key": "39212665", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8423857198930189, "res": {"Yes": 0.8423857198930189, "yes": 0.15141304185167992}, "ground_truth": 0}, {"key": "39212665", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8636572329633093, "res": {"Yes": 0.8636572329633093, "yes": 0.13230512114915405}, "ground_truth": 1}, {"key": "39212665", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8157527845744803, "res": {"Yes": 0.8157527845744803, "yes": 0.17435376702246888}, "ground_truth": 0}, {"key": "39212665", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6950570364153243, "res": {"Yes": 0.6950570364153243, "yes": 0.2889564194475999}, "ground_truth": 0}, {"key": "40094011", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.78829194433746, "res": {"Yes": 0.78829194433746, "yes": 0.17523805765699008}, "ground_truth": 0}, {"key": "40094011", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7985346407410405, "res": {"Yes": 0.7985346407410405, "yes": 0.15319602017240386}, "ground_truth": 0}, {"key": "40094011", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.844205416095376, "res": {"Yes": 0.844205416095376, "yes": 0.15107085708697318}, "ground_truth": 1}, {"key": "40094011", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6538950724994734, "res": {"Yes": 0.6538950724994734, "yes": 0.3000097514562622}, "ground_truth": 0}, {"key": "40094011", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7580363267619851, "res": {"Yes": 0.7580363267619851, "yes": 0.17045816140926162}, "ground_truth": 0}, {"key": "36036272", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.885070850603072, "res": {"Yes": 0.885070850603072, "yes": 0.10473150489682252}, "ground_truth": 0}, {"key": "36036272", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.810936334835967, "res": {"Yes": 0.810936334835967, "yes": 0.18102218249583168}, "ground_truth": 0}, {"key": "36036272", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8828934798734399, "res": {"Yes": 0.8828934798734399, "yes": 0.1040955137365745}, "ground_truth": 1}, {"key": "36036272", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9850666343616731, "res": {"Yes": 0.9850666343616731, "yes": 0.00717827267052958}, "ground_truth": 0}, {"key": "36036272", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8951567252736564, "res": {"Yes": 0.8951567252736564, "yes": 0.09744047502316028}, "ground_truth": 0}, {"key": "30681904", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8289589769940758, "res": {"Yes": 0.8289589769940758, "yes": 0.1670258313139876}, "ground_truth": 0}, {"key": "30681904", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9231867643872466, "res": {"Yes": 0.9231867643872466, "yes": 0.07239162232709297}, "ground_truth": 0}, {"key": "30681904", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.931403998912882, "res": {"Yes": 0.931403998912882, "yes": 0.06480088618648881}, "ground_truth": 1}, {"key": "30681904", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9552286768180533, "res": {"Yes": 0.9552286768180533, "yes": 0.041054636575415505}, "ground_truth": 0}, {"key": "30681904", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8515668444193757, "res": {"Yes": 0.8515668444193757, "yes": 0.1456707559595265}, "ground_truth": 0}, {"key": "27834240", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8888948050475843, "res": {"Yes": 0.8888948050475843, "yes": 0.1055425067107954}, "ground_truth": 0}, {"key": "27834240", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8676507908161563, "res": {"Yes": 0.8676507908161563, "yes": 0.12363309634758674}, "ground_truth": 0}, {"key": "27834240", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8810555691059265, "res": {"Yes": 0.8810555691059265, "yes": 0.10639823399539276}, "ground_truth": 1}, {"key": "27834240", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.911781190856715, "res": {"Yes": 0.911781190856715, "yes": 0.08003074545116448}, "ground_truth": 0}, {"key": "27834240", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9213359708957969, "res": {"Yes": 0.9213359708957969, "yes": 0.0722435159972158}, "ground_truth": 0}, {"key": "35025075", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8343590363841837, "res": {"Yes": 0.8343590363841837, "yes": 0.16110821450497456}, "ground_truth": 0}, {"key": "35025075", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8646751168997532, "res": {"Yes": 0.8646751168997532, "yes": 0.13085365730939347}, "ground_truth": 0}, {"key": "35025075", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9023911955412319, "res": {"Yes": 0.9023911955412319, "yes": 0.0929983246971756}, "ground_truth": 1}, {"key": "35025075", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8468163600981401, "res": {"Yes": 0.8468163600981401, "yes": 0.14852545816176443}, "ground_truth": 0}, {"key": "35025075", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8674181527735938, "res": {"Yes": 0.8674181527735938, "yes": 0.12700167546154206}, "ground_truth": 0}, {"key": "33316985", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8647241280743426, "res": {"Yes": 0.8647241280743426, "yes": 0.1289737736923673}, "ground_truth": 0}, {"key": "33316985", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9500981556386286, "res": {"Yes": 0.9500981556386286, "yes": 0.0458548640388073}, "ground_truth": 0}, {"key": "33316985", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8355548782331658, "res": {"Yes": 0.8355548782331658, "yes": 0.15575325852883123}, "ground_truth": 1}, {"key": "33316985", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8958550801434558, "res": {"Yes": 0.8958550801434558, "yes": 0.09959319898561017}, "ground_truth": 0}, {"key": "33316985", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9068765601380211, "res": {"Yes": 0.9068765601380211, "yes": 0.0878550308171042}, "ground_truth": 0}, {"key": "17037056", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6664809512779022, "res": {"Yes": 0.6664809512779022, "yes": 0.28285083644326897}, "ground_truth": 0}, {"key": "17037056", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.4719764257919173, "res": {"Yes": 0.4719764257919173, "yes": 0.46586110836561423}, "ground_truth": 0}, {"key": "17037056", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5092503686335902, "res": {"Yes": 0.5092503686335902, "yes": 0.4291128676080462}, "ground_truth": 1}, {"key": "17037056", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4740280887862911, "res": {"yes": 0.48636414796517236, "Yes": 0.4740280887862911}, "ground_truth": 0}, {"key": "17037056", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.35272188280056055, "res": {"yes": 0.601596454388507, "Yes": 0.35272188280056055}, "ground_truth": 0}, {"key": "34050457", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8806965592621772, "res": {"Yes": 0.8806965592621772, "yes": 0.10895098247102455}, "ground_truth": 0}, {"key": "34050457", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.990031878368203, "res": {"Yes": 0.990031878368203, "yes": 0.008375308216316026}, "ground_truth": 0}, {"key": "34050457", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9729803132917997, "res": {"Yes": 0.9729803132917997, "yes": 0.021630972052782763}, "ground_truth": 1}, {"key": "34050457", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9753217094483793, "res": {"Yes": 0.9753217094483793, "yes": 0.02040080944995074}, "ground_truth": 0}, {"key": "34050457", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9436987825851769, "res": {"Yes": 0.9436987825851769, "yes": 0.04622329440796304}, "ground_truth": 0}, {"key": "34713745", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6005711439250272, "res": {"Yes": 0.6005711439250272, "yes": 0.39265504525557976}, "ground_truth": 0}, {"key": "34713745", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8316343067009913, "res": {"Yes": 0.8316343067009913, "yes": 0.15642303173189256}, "ground_truth": 0}, {"key": "34713745", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8427880635520403, "res": {"Yes": 0.8427880635520403, "yes": 0.1526938412380802}, "ground_truth": 1}, {"key": "34713745", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8603450837874248, "res": {"Yes": 0.8603450837874248, "yes": 0.1346681735532291}, "ground_truth": 0}, {"key": "34713745", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7439332767190014, "res": {"Yes": 0.7439332767190014, "yes": 0.24968955818792443}, "ground_truth": 0}, {"key": "40856210", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.884029266492983, "res": {"Yes": 0.884029266492983, "yes": 0.09605125728040398}, "ground_truth": 0}, {"key": "40856210", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8599744581510437, "res": {"Yes": 0.8599744581510437, "yes": 0.12631410213495922}, "ground_truth": 0}, {"key": "40856210", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7845077327048093, "res": {"Yes": 0.7845077327048093, "yes": 0.199972664355084}, "ground_truth": 1}, {"key": "40856210", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7836132373974571, "res": {"Yes": 0.7836132373974571, "yes": 0.19442863469291383}, "ground_truth": 0}, {"key": "40856210", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7933184989199111, "res": {"Yes": 0.7933184989199111, "yes": 0.18449208797968966}, "ground_truth": 0}, {"key": "40848302", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9610360958492586, "res": {"Yes": 0.9610360958492586, "yes": 0.03361338931069008}, "ground_truth": 0}, {"key": "40848302", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9766974913773085, "res": {"Yes": 0.9766974913773085, "yes": 0.016351461455108247}, "ground_truth": 0}, {"key": "40848302", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9809687501705987, "res": {"Yes": 0.9809687501705987, "yes": 0.015549226256800457}, "ground_truth": 1}, {"key": "40848302", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7788697177793751, "res": {"Yes": 0.7788697177793751, "yes": 0.21740534903954348}, "ground_truth": 0}, {"key": "40848302", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9762149882301584, "res": {"Yes": 0.9762149882301584, "yes": 0.018924087452041334}, "ground_truth": 0}, {"key": "40636168", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9095867568168765, "res": {"Yes": 0.9095867568168765, "yes": 0.08307593582366926}, "ground_truth": 0}, {"key": "40636168", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9498720154900183, "res": {"Yes": 0.9498720154900183, "yes": 0.04282501005411009}, "ground_truth": 0}, {"key": "40636168", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9566914722993181, "res": {"Yes": 0.9566914722993181, "yes": 0.03937305814519063}, "ground_truth": 1}, {"key": "40636168", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9414649381430369, "res": {"Yes": 0.9414649381430369, "yes": 0.052415978548836735}, "ground_truth": 0}, {"key": "40636168", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.93222822748391, "res": {"Yes": 0.93222822748391, "yes": 0.059848584383384446}, "ground_truth": 0}, {"key": "34423311", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.4842046166163427, "res": {"yes": 0.5107610758791016, "Yes": 0.4842046166163427}, "ground_truth": 0}, {"key": "34423311", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.5993870675448, "res": {"Yes": 0.5993870675448, "yes": 0.39676579873117396}, "ground_truth": 0}, {"key": "34423311", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.529893073978528, "res": {"Yes": 0.529893073978528, "yes": 0.46605650352843675}, "ground_truth": 1}, {"key": "34423311", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6152296784502905, "res": {"Yes": 0.6152296784502905, "yes": 0.3743404505676446}, "ground_truth": 0}, {"key": "34423311", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7578277969151268, "res": {"Yes": 0.7578277969151268, "yes": 0.238818841457591}, "ground_truth": 0}, {"key": "34833945", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9098721807964554, "res": {"Yes": 0.9098721807964554, "yes": 0.07905079171553679}, "ground_truth": 0}, {"key": "34833945", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6371049374547436, "res": {"Yes": 0.6371049374547436, "yes": 0.329824131971763}, "ground_truth": 0}, {"key": "34833945", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6853018848451051, "res": {"Yes": 0.6853018848451051, "yes": 0.1355056963584855}, "ground_truth": 1}, {"key": "34833945", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9275956343543387, "res": {"Yes": 0.9275956343543387, "yes": 0.05269992745506497}, "ground_truth": 0}, {"key": "34833945", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7624434249806299, "res": {"Yes": 0.7624434249806299, "yes": 0.19814345310250653}, "ground_truth": 0}, {"key": "21272328", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7632215445609273, "res": {"Yes": 0.7632215445609273, "yes": 0.23044646488162435}, "ground_truth": 0}, {"key": "21272328", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.866860703540007, "res": {"Yes": 0.866860703540007, "yes": 0.12602676171748262}, "ground_truth": 0}, {"key": "21272328", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8565565233431864, "res": {"Yes": 0.8565565233431864, "yes": 0.13854437604726033}, "ground_truth": 1}, {"key": "21272328", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8650649657129715, "res": {"Yes": 0.8650649657129715, "yes": 0.12935356627394676}, "ground_truth": 0}, {"key": "21272328", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8190256393020251, "res": {"Yes": 0.8190256393020251, "yes": 0.17339343080226863}, "ground_truth": 0}, {"key": "38648957", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7307107415041048, "res": {"Yes": 0.7307107415041048, "yes": 0.26504975921499213}, "ground_truth": 0}, {"key": "38648957", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8711928451803622, "res": {"Yes": 0.8711928451803622, "yes": 0.1227009113288331}, "ground_truth": 0}, {"key": "38648957", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7680548185972074, "res": {"Yes": 0.7680548185972074, "yes": 0.22332322890449033}, "ground_truth": 1}, {"key": "38648957", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8294333849921667, "res": {"Yes": 0.8294333849921667, "yes": 0.16198366211136292}, "ground_truth": 0}, {"key": "38648957", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8941784814988945, "res": {"Yes": 0.8941784814988945, "yes": 0.1016454603013507}, "ground_truth": 0}, {"key": "24942981", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8265432863609353, "res": {"Yes": 0.8265432863609353, "yes": 0.12973951103644976}, "ground_truth": 0}, {"key": "24942981", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7450971994891343, "res": {"Yes": 0.7450971994891343, "yes": 0.22448450370390827}, "ground_truth": 0}, {"key": "24942981", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.719626752998678, "res": {"Yes": 0.719626752998678, "yes": 0.19338497262308466}, "ground_truth": 1}, {"key": "24942981", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7983411552861618, "res": {"Yes": 0.7983411552861618, "yes": 0.1781025626435059}, "ground_truth": 0}, {"key": "24942981", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7219764776876619, "res": {"Yes": 0.7219764776876619, "yes": 0.254613237725989}, "ground_truth": 0}, {"key": "35882366", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9291402024087874, "res": {"Yes": 0.9291402024087874, "yes": 0.06716498659837816}, "ground_truth": 0}, {"key": "35882366", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9184603899234215, "res": {"Yes": 0.9184603899234215, "yes": 0.07800274192701077}, "ground_truth": 0}, {"key": "35882366", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9297358225148716, "res": {"Yes": 0.9297358225148716, "yes": 0.06629606731933917}, "ground_truth": 1}, {"key": "35882366", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9274286784565878, "res": {"Yes": 0.9274286784565878, "yes": 0.0683990734840636}, "ground_truth": 0}, {"key": "35882366", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8798082614201896, "res": {"Yes": 0.8798082614201896, "yes": 0.11471902512859061}, "ground_truth": 0}, {"key": "40559523", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8242167314936344, "res": {"Yes": 0.8242167314936344, "yes": 0.17068423644322325}, "ground_truth": 0}, {"key": "40559523", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8169280615151333, "res": {"Yes": 0.8169280615151333, "yes": 0.17897210916543438}, "ground_truth": 0}, {"key": "40559523", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8277771113712012, "res": {"Yes": 0.8277771113712012, "yes": 0.167315671890509}, "ground_truth": 1}, {"key": "40559523", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.859708380225999, "res": {"Yes": 0.859708380225999, "yes": 0.13739928541109844}, "ground_truth": 0}, {"key": "40559523", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.856192915206592, "res": {"Yes": 0.856192915206592, "yes": 0.14111435343941178}, "ground_truth": 0}, {"key": "24632722", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7220829915139171, "res": {"Yes": 0.7220829915139171, "yes": 0.26332504430694487}, "ground_truth": 0}, {"key": "24632722", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8274073183479882, "res": {"Yes": 0.8274073183479882, "yes": 0.16905408257776092}, "ground_truth": 0}, {"key": "24632722", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5163203875904611, "res": {"Yes": 0.5163203875904611, "yes": 0.480073529669809}, "ground_truth": 1}, {"key": "24632722", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9707709357797809, "res": {"Yes": 0.9707709357797809, "yes": 0.019447810226725693}, "ground_truth": 0}, {"key": "24632722", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7128454373341798, "res": {"Yes": 0.7128454373341798, "yes": 0.28311958747886884}, "ground_truth": 0}, {"key": "36002759", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.990739421731133, "res": {"Yes": 0.990739421731133, "yes": 0.004707358765034405}, "ground_truth": 0}, {"key": "36002759", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7519571786035306, "res": {"Yes": 0.7519571786035306, "yes": 0.23605531873612265}, "ground_truth": 0}, {"key": "36002759", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.841156860293418, "res": {"Yes": 0.841156860293418, "yes": 0.1505988349051282}, "ground_truth": 1}, {"key": "36002759", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8383641972407136, "res": {"Yes": 0.8383641972407136, "yes": 0.15484045728541854}, "ground_truth": 0}, {"key": "36002759", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7866170686063361, "res": {"Yes": 0.7866170686063361, "yes": 0.2066557734733907}, "ground_truth": 0}, {"key": "29508534", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6928300832303823, "res": {"Yes": 0.6928300832303823, "yes": 0.30324970619879904}, "ground_truth": 0}, {"key": "29508534", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7320363264165864, "res": {"Yes": 0.7320363264165864, "yes": 0.2636414085789269}, "ground_truth": 0}, {"key": "29508534", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.974101581165495, "res": {"Yes": 0.974101581165495, "yes": 0.018950988220014002}, "ground_truth": 1}, {"key": "29508534", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8368670685854858, "res": {"Yes": 0.8368670685854858, "yes": 0.15878514180600353}, "ground_truth": 0}, {"key": "29508534", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.65180212655225, "res": {"Yes": 0.65180212655225, "yes": 0.34377018903081125}, "ground_truth": 0}, {"key": "15631612", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7351128383411081, "res": {"Yes": 0.7351128383411081, "yes": 0.2588119093067847}, "ground_truth": 0}, {"key": "15631612", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9502172883139695, "res": {"Yes": 0.9502172883139695, "yes": 0.04209442373149821}, "ground_truth": 0}, {"key": "15631612", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7461594214005471, "res": {"Yes": 0.7461594214005471, "yes": 0.24760307971046489}, "ground_truth": 1}, {"key": "15631612", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5602825606038334, "res": {"Yes": 0.5602825606038334, "yes": 0.43396807825269007}, "ground_truth": 0}, {"key": "15631612", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6675860921638896, "res": {"Yes": 0.6675860921638896, "yes": 0.3276706142556219}, "ground_truth": 0}, {"key": "40731892", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6842534835675953, "res": {"Yes": 0.6842534835675953, "yes": 0.3110188057442587}, "ground_truth": 0}, {"key": "40731892", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6846096069811979, "res": {"Yes": 0.6846096069811979, "yes": 0.3068445841904108}, "ground_truth": 0}, {"key": "40731892", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8890944287879744, "res": {"Yes": 0.8890944287879744, "yes": 0.106287796783836}, "ground_truth": 1}, {"key": "40731892", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8281728916941478, "res": {"Yes": 0.8281728916941478, "yes": 0.16470208211738074}, "ground_truth": 0}, {"key": "40731892", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7321924131996382, "res": {"Yes": 0.7321924131996382, "yes": 0.2570386798908122}, "ground_truth": 0}, {"key": "35971910", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.46694570101025173, "res": {"yes": 0.5298272335559057, "Yes": 0.46694570101025173}, "ground_truth": 0}, {"key": "35971910", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.5868068613242842, "res": {"Yes": 0.5868068613242842, "yes": 0.4091000916575497}, "ground_truth": 0}, {"key": "35971910", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9049267063684928, "res": {"Yes": 0.9049267063684928, "yes": 0.08327332576482722}, "ground_truth": 1}, {"key": "35971910", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.47115846396980804, "res": {"yes": 0.5209234171852701, "Yes": 0.47115846396980804}, "ground_truth": 0}, {"key": "35971910", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.4944629276958398, "res": {"yes": 0.49938342076275766, "Yes": 0.4944629276958398}, "ground_truth": 0}, {"key": "34428424", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9123854277853751, "res": {"Yes": 0.9123854277853751, "yes": 0.07133282348931422}, "ground_truth": 0}, {"key": "34428424", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.883403471609554, "res": {"Yes": 0.883403471609554, "yes": 0.10706233971661551}, "ground_truth": 0}, {"key": "34428424", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7500426180496552, "res": {"Yes": 0.7500426180496552, "yes": 0.2139893353532452}, "ground_truth": 1}, {"key": "34428424", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8231049295635743, "res": {"Yes": 0.8231049295635743, "yes": 0.13851612972879432}, "ground_truth": 0}, {"key": "34428424", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9231580075674257, "res": {"Yes": 0.9231580075674257, "yes": 0.0599750102111272}, "ground_truth": 0}, {"key": "36971005", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.773410232001918, "res": {"Yes": 0.773410232001918, "yes": 0.20994669790128173}, "ground_truth": 0}, {"key": "36971005", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.5964147647157603, "res": {"Yes": 0.5964147647157603, "yes": 0.3303885806929297}, "ground_truth": 0}, {"key": "36971005", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6819685953996549, "res": {"Yes": 0.6819685953996549, "yes": 0.309204904652434}, "ground_truth": 1}, {"key": "36971005", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7385439546204147, "res": {"Yes": 0.7385439546204147, "yes": 0.23327849072103732}, "ground_truth": 0}, {"key": "36971005", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8476263460110319, "res": {"Yes": 0.8476263460110319, "yes": 0.13941602922491422}, "ground_truth": 0}, {"key": "34649067", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8847342425274429, "res": {"Yes": 0.8847342425274429, "yes": 0.10319111753873868}, "ground_truth": 0}, {"key": "34649067", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9534875034552663, "res": {"Yes": 0.9534875034552663, "yes": 0.03749756805407801}, "ground_truth": 0}, {"key": "34649067", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9515441527613908, "res": {"Yes": 0.9515441527613908, "yes": 0.03855595176880355}, "ground_truth": 1}, {"key": "34649067", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9426388835162133, "res": {"Yes": 0.9426388835162133, "yes": 0.04565046891652226}, "ground_truth": 0}, {"key": "34649067", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9513940347254369, "res": {"Yes": 0.9513940347254369, "yes": 0.0415705206973413}, "ground_truth": 0}, {"key": "37355154", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9489708816839566, "res": {"Yes": 0.9489708816839566, "yes": 0.025561269072028964}, "ground_truth": 0}, {"key": "37355154", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9204879583822012, "res": {"Yes": 0.9204879583822012, "yes": 0.07510256247164059}, "ground_truth": 0}, {"key": "37355154", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8880920370803693, "res": {"Yes": 0.8880920370803693, "yes": 0.10670593489101142}, "ground_truth": 1}, {"key": "37355154", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9445007745286386, "res": {"Yes": 0.9445007745286386, "yes": 0.05114933436812833}, "ground_truth": 0}, {"key": "37355154", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9452842245406041, "res": {"Yes": 0.9452842245406041, "yes": 0.04945269613266555}, "ground_truth": 0}, {"key": "38674697", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7933852753482118, "res": {"Yes": 0.7933852753482118, "yes": 0.1877194533706532}, "ground_truth": 0}, {"key": "38674697", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8256272074810935, "res": {"Yes": 0.8256272074810935, "yes": 0.16098420892807974}, "ground_truth": 0}, {"key": "38674697", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8387798186765101, "res": {"Yes": 0.8387798186765101, "yes": 0.14891453920039174}, "ground_truth": 1}, {"key": "38674697", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8728837181583742, "res": {"Yes": 0.8728837181583742, "yes": 0.12052971005119734}, "ground_truth": 0}, {"key": "38674697", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9705888731160766, "res": {"Yes": 0.9705888731160766, "yes": 0.021147823216336622}, "ground_truth": 0}, {"key": "40525767", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9321859472314701, "res": {"Yes": 0.9321859472314701, "yes": 0.06479447121630261}, "ground_truth": 0}, {"key": "40525767", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8872462890451807, "res": {"Yes": 0.8872462890451807, "yes": 0.10167366066324764}, "ground_truth": 0}, {"key": "40525767", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9458684204855967, "res": {"Yes": 0.9458684204855967, "yes": 0.05016471940810253}, "ground_truth": 1}, {"key": "40525767", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9457374740677554, "res": {"Yes": 0.9457374740677554, "yes": 0.0469858258695151}, "ground_truth": 0}, {"key": "40525767", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8340603142447425, "res": {"Yes": 0.8340603142447425, "yes": 0.15149323628694344}, "ground_truth": 0}, {"key": "27165110", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8806702180220248, "res": {"Yes": 0.8806702180220248, "yes": 0.11013705876685162}, "ground_truth": 0}, {"key": "27165110", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9100427523230625, "res": {"Yes": 0.9100427523230625, "yes": 0.07520737357305723}, "ground_truth": 0}, {"key": "27165110", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9251009835713426, "res": {"Yes": 0.9251009835713426, "yes": 0.06510322446772455}, "ground_truth": 1}, {"key": "27165110", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8242203745396387, "res": {"Yes": 0.8242203745396387, "yes": 0.16288303702689574}, "ground_truth": 0}, {"key": "27165110", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8917036344350968, "res": {"Yes": 0.8917036344350968, "yes": 0.10013496028894522}, "ground_truth": 0}, {"key": "35497491", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8475557755793508, "res": {"Yes": 0.8475557755793508, "yes": 0.1488493735691225}, "ground_truth": 0}, {"key": "35497491", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.87237573423021, "res": {"Yes": 0.87237573423021, "yes": 0.11979783083707266}, "ground_truth": 0}, {"key": "35497491", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8874113898852195, "res": {"Yes": 0.8874113898852195, "yes": 0.10417170829635578}, "ground_truth": 1}, {"key": "35497491", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9079446049556386, "res": {"Yes": 0.9079446049556386, "yes": 0.08240477456461974}, "ground_truth": 0}, {"key": "35497491", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8522556827913714, "res": {"Yes": 0.8522556827913714, "yes": 0.14202959892050124}, "ground_truth": 0}, {"key": "40690716", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9930544237867655, "res": {"Yes": 0.9930544237867655, "yes": 0.00452427923319181}, "ground_truth": 0}, {"key": "40690716", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7124621436287469, "res": {"Yes": 0.7124621436287469, "yes": 0.09107840955548976}, "ground_truth": 0}, {"key": "40690716", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9927260697228378, "res": {"Yes": 0.9927260697228378, "yes": 0.006217098252347793}, "ground_truth": 1}, {"key": "40690716", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7133183246970635, "res": {"Yes": 0.7133183246970635, "yes": 0.2156008925744844}, "ground_truth": 0}, {"key": "40690716", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8427469112156395, "res": {"Yes": 0.8427469112156395, "yes": 0.12284477653088818}, "ground_truth": 0}, {"key": "34835193", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.93897502271987, "res": {"Yes": 0.93897502271987, "yes": 0.05718348191559968}, "ground_truth": 0}, {"key": "34835193", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9482947019503752, "res": {"Yes": 0.9482947019503752, "yes": 0.047986824276224985}, "ground_truth": 0}, {"key": "34835193", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9585752801879182, "res": {"Yes": 0.9585752801879182, "yes": 0.03783999753277379}, "ground_truth": 1}, {"key": "34835193", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9548104374699213, "res": {"Yes": 0.9548104374699213, "yes": 0.040857413218642086}, "ground_truth": 0}, {"key": "34835193", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9468492598222905, "res": {"Yes": 0.9468492598222905, "yes": 0.0491667252669949}, "ground_truth": 0}, {"key": "39471712", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6531797627965195, "res": {"Yes": 0.6531797627965195, "yes": 0.3325798158579443}, "ground_truth": 0}, {"key": "39471712", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8210058615748154, "res": {"Yes": 0.8210058615748154, "yes": 0.16990321968719913}, "ground_truth": 0}, {"key": "39471712", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.626381593071444, "res": {"Yes": 0.626381593071444, "yes": 0.3698338234654898}, "ground_truth": 1}, {"key": "39471712", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8338178214443033, "res": {"Yes": 0.8338178214443033, "yes": 0.1592808631766874}, "ground_truth": 0}, {"key": "39471712", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.39713636841882205, "res": {"yes": 0.59663586085516, "Yes": 0.39713636841882205}, "ground_truth": 0}, {"key": "39115192", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.5600360064991586, "res": {"Yes": 0.5600360064991586, "yes": 0.43144980585954684}, "ground_truth": 0}, {"key": "39115192", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6463981976940352, "res": {"Yes": 0.6463981976940352, "yes": 0.345985746451444}, "ground_truth": 0}, {"key": "39115192", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4012917941848262, "res": {"yes": 0.5939929074563223, "Yes": 0.4012917941848262}, "ground_truth": 1}, {"key": "39115192", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5735556335819424, "res": {"Yes": 0.5735556335819424, "yes": 0.42113408681972275}, "ground_truth": 0}, {"key": "39115192", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.5246050422430281, "res": {"Yes": 0.5246050422430281, "yes": 0.4688902248904168}, "ground_truth": 0}, {"key": "23520673", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9723973322633195, "res": {"Yes": 0.9723973322633195, "yes": 0.021185368514238197}, "ground_truth": 0}, {"key": "23520673", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8589322249950412, "res": {"Yes": 0.8589322249950412, "yes": 0.1380320464681927}, "ground_truth": 0}, {"key": "23520673", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6100591861781447, "res": {"Yes": 0.6100591861781447, "yes": 0.38788855887832946}, "ground_truth": 1}, {"key": "23520673", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7125293034755992, "res": {"Yes": 0.7125293034755992, "yes": 0.2838048854479576}, "ground_truth": 0}, {"key": "23520673", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7014522010254374, "res": {"Yes": 0.7014522010254374, "yes": 0.28935287932526443}, "ground_truth": 0}, {"key": "35764233", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8306898074874287, "res": {"Yes": 0.8306898074874287, "yes": 0.16278447375776614}, "ground_truth": 0}, {"key": "35764233", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8364349132587532, "res": {"Yes": 0.8364349132587532, "yes": 0.1579021249883721}, "ground_truth": 0}, {"key": "35764233", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.905513783316827, "res": {"Yes": 0.905513783316827, "yes": 0.08977916914635788}, "ground_truth": 1}, {"key": "35764233", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9183765291339402, "res": {"Yes": 0.9183765291339402, "yes": 0.07872204881212763}, "ground_truth": 0}, {"key": "35764233", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7327255627488553, "res": {"Yes": 0.7327255627488553, "yes": 0.2583330848934852}, "ground_truth": 0}, {"key": "35228910", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8552893125761596, "res": {"Yes": 0.8552893125761596, "yes": 0.14191734050384852}, "ground_truth": 0}, {"key": "35228910", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7551101849879372, "res": {"Yes": 0.7551101849879372, "yes": 0.23984341645587642}, "ground_truth": 0}, {"key": "35228910", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.867042139132802, "res": {"Yes": 0.867042139132802, "yes": 0.12844118290712736}, "ground_truth": 1}, {"key": "35228910", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.813035251984355, "res": {"Yes": 0.813035251984355, "yes": 0.18107441492964804}, "ground_truth": 0}, {"key": "35228910", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.828969305887278, "res": {"Yes": 0.828969305887278, "yes": 0.16729191475189734}, "ground_truth": 0}, {"key": "36795599", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9327313609318653, "res": {"Yes": 0.9327313609318653, "yes": 0.06430085826531758}, "ground_truth": 0}, {"key": "36795599", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9901225585505011, "res": {"Yes": 0.9901225585505011, "yes": 0.008433031188577448}, "ground_truth": 0}, {"key": "36795599", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.996497778633861, "res": {"Yes": 0.996497778633861, "yes": 0.002085056553746883}, "ground_truth": 1}, {"key": "36795599", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9965259512200949, "res": {"Yes": 0.9965259512200949, "yes": 0.00202295897558735}, "ground_truth": 0}, {"key": "36795599", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9926735567961585, "res": {"Yes": 0.9926735567961585, "yes": 0.005056393989677668}, "ground_truth": 0}, {"key": "38641949", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9606838983864183, "res": {"Yes": 0.9606838983864183, "yes": 0.034227444019475944}, "ground_truth": 0}, {"key": "38641949", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9257712361972448, "res": {"Yes": 0.9257712361972448, "yes": 0.06659864812934443}, "ground_truth": 0}, {"key": "38641949", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5116590979594587, "res": {"Yes": 0.5116590979594587, "yes": 0.4798655108829773}, "ground_truth": 1}, {"key": "38641949", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7087596205953938, "res": {"Yes": 0.7087596205953938, "yes": 0.28462362088086335}, "ground_truth": 0}, {"key": "38641949", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.449273071070705, "res": {"yes": 0.5406867814492523, "Yes": 0.449273071070705}, "ground_truth": 0}, {"key": "29968443", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.87440018547758, "res": {"Yes": 0.87440018547758, "yes": 0.04845571456125067}, "ground_truth": 0}, {"key": "29968443", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.915493330485556, "res": {"Yes": 0.915493330485556, "yes": 0.06402102717331588}, "ground_truth": 0}, {"key": "29968443", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9046044885806075, "res": {"Yes": 0.9046044885806075, "yes": 0.08374753847936986}, "ground_truth": 1}, {"key": "29968443", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8840947666477608, "res": {"Yes": 0.8840947666477608, "yes": 0.07535731336698051}, "ground_truth": 0}, {"key": "29968443", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9255296747531531, "res": {"Yes": 0.9255296747531531, "yes": 0.06576211278662171}, "ground_truth": 0}, {"key": "21268042", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8026016221760841, "res": {"Yes": 0.8026016221760841, "yes": 0.1871505698036899}, "ground_truth": 0}, {"key": "21268042", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7747209879487085, "res": {"Yes": 0.7747209879487085, "yes": 0.22142551974852115}, "ground_truth": 0}, {"key": "21268042", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9824196310918274, "res": {"Yes": 0.9824196310918274, "yes": 0.015704273597596574}, "ground_truth": 1}, {"key": "21268042", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9820418229229572, "res": {"Yes": 0.9820418229229572, "yes": 0.016188755219682466}, "ground_truth": 0}, {"key": "21268042", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8496339918968733, "res": {"Yes": 0.8496339918968733, "yes": 0.14686079700531332}, "ground_truth": 0}, {"key": "26808572", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.27720582449511394, "res": {"yes": 0.6022740528319416, "Yes": 0.27720582449511394}, "ground_truth": 0}, {"key": "26808572", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8222496414275183, "res": {"Yes": 0.8222496414275183, "yes": 0.1718999942573941}, "ground_truth": 0}, {"key": "26808572", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6861033682689067, "res": {"Yes": 0.6861033682689067, "yes": 0.30816935291268294}, "ground_truth": 1}, {"key": "26808572", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5847568495196779, "res": {"Yes": 0.5847568495196779, "yes": 0.40721238466563064}, "ground_truth": 0}, {"key": "26808572", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7408997151681075, "res": {"Yes": 0.7408997151681075, "yes": 0.2523932984436952}, "ground_truth": 0}, {"key": "37829390", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8323033843301827, "res": {"Yes": 0.8323033843301827, "yes": 0.16256027872431772}, "ground_truth": 0}, {"key": "37829390", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7328583812934204, "res": {"Yes": 0.7328583812934204, "yes": 0.26217362601302824}, "ground_truth": 0}, {"key": "37829390", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.538703344957951, "res": {"Yes": 0.538703344957951, "yes": 0.4569871530367623}, "ground_truth": 1}, {"key": "37829390", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9731707441721648, "res": {"Yes": 0.9731707441721648, "yes": 0.023479476026688734}, "ground_truth": 0}, {"key": "37829390", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6344322655553394, "res": {"Yes": 0.6344322655553394, "yes": 0.36091197028913213}, "ground_truth": 0}, {"key": "35716045", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9583087374669811, "res": {"Yes": 0.9583087374669811, "yes": 0.027557154103925213}, "ground_truth": 0}, {"key": "35716045", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9110360359756147, "res": {"Yes": 0.9110360359756147, "yes": 0.08534546203909638}, "ground_truth": 0}, {"key": "35716045", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7858399289722657, "res": {"Yes": 0.7858399289722657, "yes": 0.2077432576343884}, "ground_truth": 1}, {"key": "35716045", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.610423768835622, "res": {"Yes": 0.610423768835622, "yes": 0.38204552579607387}, "ground_truth": 0}, {"key": "35716045", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.747689256358143, "res": {"Yes": 0.747689256358143, "yes": 0.24740309722586273}, "ground_truth": 0}, {"key": "34367070", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.986170332781099, "res": {"Yes": 0.986170332781099, "yes": 0.009296242432360689}, "ground_truth": 0}, {"key": "34367070", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.981189289865817, "res": {"Yes": 0.981189289865817, "yes": 0.01471358591667066}, "ground_truth": 0}, {"key": "34367070", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.904570638913976, "res": {"Yes": 0.904570638913976, "yes": 0.08675620806558328}, "ground_truth": 1}, {"key": "34367070", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9740761399126369, "res": {"Yes": 0.9740761399126369, "yes": 0.011242866953537009}, "ground_truth": 0}, {"key": "34367070", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9294213451533812, "res": {"Yes": 0.9294213451533812, "yes": 0.06413894071850379}, "ground_truth": 0}, {"key": "35239748", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8692080406585909, "res": {"Yes": 0.8692080406585909, "yes": 0.12163469857880939}, "ground_truth": 0}, {"key": "35239748", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8473621142621003, "res": {"Yes": 0.8473621142621003, "yes": 0.14697349714330757}, "ground_truth": 0}, {"key": "35239748", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8264230247971792, "res": {"Yes": 0.8264230247971792, "yes": 0.1677050160076681}, "ground_truth": 1}, {"key": "35239748", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8506515021250232, "res": {"Yes": 0.8506515021250232, "yes": 0.1403902407688146}, "ground_truth": 0}, {"key": "35239748", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8224314280253847, "res": {"Yes": 0.8224314280253847, "yes": 0.17082365703298927}, "ground_truth": 0}, {"key": "40421370", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.5885585011503683, "res": {"Yes": 0.5885585011503683, "yes": 0.37673705564828247}, "ground_truth": 0}, {"key": "40421370", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7151965778640371, "res": {"Yes": 0.7151965778640371, "yes": 0.27616708941840246}, "ground_truth": 0}, {"key": "40421370", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8095114349680342, "res": {"Yes": 0.8095114349680342, "yes": 0.1762207514994674}, "ground_truth": 1}, {"key": "40421370", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7970374042406491, "res": {"Yes": 0.7970374042406491, "yes": 0.19190402533056933}, "ground_truth": 0}, {"key": "40421370", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6105830430682975, "res": {"Yes": 0.6105830430682975, "yes": 0.38014639517161064}, "ground_truth": 0}, {"key": "37288396", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9701509917507566, "res": {"Yes": 0.9701509917507566, "yes": 0.026514434398474177}, "ground_truth": 0}, {"key": "37288396", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6676766229758374, "res": {"Yes": 0.6676766229758374, "yes": 0.3215147363881056}, "ground_truth": 0}, {"key": "37288396", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7568853652279485, "res": {"Yes": 0.7568853652279485, "yes": 0.23615389240751705}, "ground_truth": 1}, {"key": "37288396", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.844449139605708, "res": {"Yes": 0.844449139605708, "yes": 0.1490566863627635}, "ground_truth": 0}, {"key": "37288396", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7840527560621974, "res": {"Yes": 0.7840527560621974, "yes": 0.2108716340645759}, "ground_truth": 0}, {"key": "38903688", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9829154173099631, "res": {"Yes": 0.9829154173099631, "yes": 0.014428867378865888}, "ground_truth": 0}, {"key": "38903688", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9860187217339264, "res": {"Yes": 0.9860187217339264, "yes": 0.012006621376434006}, "ground_truth": 0}, {"key": "38903688", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9879407921536023, "res": {"Yes": 0.9879407921536023, "yes": 0.008852570589923591}, "ground_truth": 1}, {"key": "38903688", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9861272055369843, "res": {"Yes": 0.9861272055369843, "yes": 0.01133520737597865}, "ground_truth": 0}, {"key": "38903688", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9860421251660259, "res": {"Yes": 0.9860421251660259, "yes": 0.011312210063843852}, "ground_truth": 0}, {"key": "28071228", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.979045530874874, "res": {"Yes": 0.979045530874874, "yes": 0.013360575973687073}, "ground_truth": 0}, {"key": "28071228", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.986417801851643, "res": {"Yes": 0.986417801851643, "yes": 0.01101574132043174}, "ground_truth": 0}, {"key": "28071228", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9564077807262656, "res": {"Yes": 0.9564077807262656, "yes": 0.036362036568738404}, "ground_truth": 1}, {"key": "28071228", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9880923133838214, "res": {"Yes": 0.9880923133838214, "yes": 0.010039670339186561}, "ground_truth": 0}, {"key": "28071228", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9815697726680822, "res": {"Yes": 0.9815697726680822, "yes": 0.010056175053296416}, "ground_truth": 0}, {"key": "36855834", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8615315857657038, "res": {"Yes": 0.8615315857657038, "yes": 0.13018077728599778}, "ground_truth": 0}, {"key": "36855834", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7859874921301253, "res": {"Yes": 0.7859874921301253, "yes": 0.20517028215080335}, "ground_truth": 0}, {"key": "36855834", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.865355797527911, "res": {"Yes": 0.865355797527911, "yes": 0.1288241477955369}, "ground_truth": 1}, {"key": "36855834", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8056244161546586, "res": {"Yes": 0.8056244161546586, "yes": 0.18516747373040787}, "ground_truth": 0}, {"key": "36855834", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8751806121843275, "res": {"Yes": 0.8751806121843275, "yes": 0.1188333722864187}, "ground_truth": 0}, {"key": "40548717", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8844724795487882, "res": {"Yes": 0.8844724795487882, "yes": 0.1008614485953652}, "ground_truth": 0}, {"key": "40548717", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.910843341260241, "res": {"Yes": 0.910843341260241, "yes": 0.08462207606633275}, "ground_truth": 0}, {"key": "40548717", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7900358221937913, "res": {"Yes": 0.7900358221937913, "yes": 0.18637742398986012}, "ground_truth": 1}, {"key": "40548717", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9085943627512872, "res": {"Yes": 0.9085943627512872, "yes": 0.08631279758984033}, "ground_truth": 0}, {"key": "40548717", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9377389440115617, "res": {"Yes": 0.9377389440115617, "yes": 0.05611183646654536}, "ground_truth": 0}, {"key": "37051175", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9331728618512998, "res": {"Yes": 0.9331728618512998, "yes": 0.05759988544985869}, "ground_truth": 0}, {"key": "37051175", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.5442257971242948, "res": {"Yes": 0.5442257971242948, "yes": 0.4480536048688394}, "ground_truth": 0}, {"key": "37051175", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9721905292342046, "res": {"Yes": 0.9721905292342046, "yes": 0.02404162201705331}, "ground_truth": 1}, {"key": "37051175", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9701352851333456, "res": {"Yes": 0.9701352851333456, "yes": 0.022206372094626965}, "ground_truth": 0}, {"key": "37051175", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6654659080715617, "res": {"Yes": 0.6654659080715617, "yes": 0.32154582836638734}, "ground_truth": 0}, {"key": "38882119", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.29868555932721275, "res": {"yes": 0.6607495345002946, "Yes": 0.29868555932721275}, "ground_truth": 0}, {"key": "38882119", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6852448632476836, "res": {"Yes": 0.6852448632476836, "yes": 0.3110224135833307}, "ground_truth": 0}, {"key": "38882119", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8179803783616801, "res": {"Yes": 0.8179803783616801, "yes": 0.17790104682189076}, "ground_truth": 1}, {"key": "38882119", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6486332903199525, "res": {"Yes": 0.6486332903199525, "yes": 0.34734567963312063}, "ground_truth": 0}, {"key": "38882119", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.5643989045026262, "res": {"Yes": 0.5643989045026262, "yes": 0.43328351335103577}, "ground_truth": 0}, {"key": "19485402", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.4821891570038614, "res": {"yes": 0.5111110466299355, "Yes": 0.4821891570038614}, "ground_truth": 0}, {"key": "19485402", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7209994244239023, "res": {"Yes": 0.7209994244239023, "yes": 0.2726172724067732}, "ground_truth": 0}, {"key": "19485402", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9595559822543254, "res": {"Yes": 0.9595559822543254, "yes": 0.028647459883893094}, "ground_truth": 1}, {"key": "19485402", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9574913298071789, "res": {"Yes": 0.9574913298071789, "yes": 0.03303019948791127}, "ground_truth": 0}, {"key": "19485402", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6764794153458636, "res": {"Yes": 0.6764794153458636, "yes": 0.3108703918900609}, "ground_truth": 0}, {"key": "36060907", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9255946214490587, "res": {"Yes": 0.9255946214490587, "yes": 0.06717497477448783}, "ground_truth": 0}, {"key": "36060907", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9024548750226902, "res": {"Yes": 0.9024548750226902, "yes": 0.07937427148127998}, "ground_truth": 0}, {"key": "36060907", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.892818496646225, "res": {"Yes": 0.892818496646225, "yes": 0.07797092329974589}, "ground_truth": 1}, {"key": "36060907", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8929870097178191, "res": {"Yes": 0.8929870097178191, "yes": 0.09948019498566}, "ground_truth": 0}, {"key": "36060907", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9246736209032017, "res": {"Yes": 0.9246736209032017, "yes": 0.06632218648408805}, "ground_truth": 0}, {"key": "24037309", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9100542971986475, "res": {"Yes": 0.9100542971986475, "yes": 0.07677071704607533}, "ground_truth": 0}, {"key": "24037309", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9156651151211513, "res": {"Yes": 0.9156651151211513, "yes": 0.07851539961451139}, "ground_truth": 0}, {"key": "24037309", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9483502433009361, "res": {"Yes": 0.9483502433009361, "yes": 0.042734261646610726}, "ground_truth": 1}, {"key": "24037309", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8779398988718083, "res": {"Yes": 0.8779398988718083, "yes": 0.10730346899484533}, "ground_truth": 0}, {"key": "24037309", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9019667248591828, "res": {"Yes": 0.9019667248591828, "yes": 0.08306863376984032}, "ground_truth": 0}, {"key": "35605805", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.446131628205013, "res": {"yes": 0.5322008091780105, "Yes": 0.446131628205013}, "ground_truth": 0}, {"key": "35605805", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.5828696514782369, "res": {"Yes": 0.5828696514782369, "yes": 0.3888781980295061}, "ground_truth": 0}, {"key": "35605805", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8611861894699859, "res": {"Yes": 0.8611861894699859, "yes": 0.12589097806888708}, "ground_truth": 1}, {"key": "35605805", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7604897236479923, "res": {"Yes": 0.7604897236479923, "yes": 0.20385995825801195}, "ground_truth": 0}, {"key": "35605805", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6000749229370771, "res": {"Yes": 0.6000749229370771, "yes": 0.3790454993688966}, "ground_truth": 0}, {"key": "17706248", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8188348858768226, "res": {"Yes": 0.8188348858768226, "yes": 0.17252641910599212}, "ground_truth": 0}, {"key": "17706248", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9289265098732834, "res": {"Yes": 0.9289265098732834, "yes": 0.07016228942257434}, "ground_truth": 0}, {"key": "17706248", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8524904774785408, "res": {"Yes": 0.8524904774785408, "yes": 0.1362405098965526}, "ground_truth": 1}, {"key": "17706248", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8672214099552328, "res": {"Yes": 0.8672214099552328, "yes": 0.12676958901631966}, "ground_truth": 0}, {"key": "17706248", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9913735710119423, "res": {"Yes": 0.9913735710119423, "yes": 0.005976251781065961}, "ground_truth": 0}, {"key": "36883559", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8082728272232286, "res": {"Yes": 0.8082728272232286, "yes": 0.18577230905720846}, "ground_truth": 0}, {"key": "36883559", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6902175784570911, "res": {"Yes": 0.6902175784570911, "yes": 0.30357453918632715}, "ground_truth": 0}, {"key": "36883559", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7899021515418423, "res": {"Yes": 0.7899021515418423, "yes": 0.20497640582583704}, "ground_truth": 1}, {"key": "36883559", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9792197068177161, "res": {"Yes": 0.9792197068177161, "yes": 0.01738139322177719}, "ground_truth": 0}, {"key": "36883559", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7183906335576554, "res": {"Yes": 0.7183906335576554, "yes": 0.2756685612272307}, "ground_truth": 0}, {"key": "32799471", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7856457954842825, "res": {"Yes": 0.7856457954842825, "yes": 0.21032883332273886}, "ground_truth": 0}, {"key": "32799471", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8267964870785592, "res": {"Yes": 0.8267964870785592, "yes": 0.17057320833748138}, "ground_truth": 0}, {"key": "32799471", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9879001965246547, "res": {"Yes": 0.9879001965246547, "yes": 0.010259932686818225}, "ground_truth": 1}, {"key": "32799471", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8016439759012901, "res": {"Yes": 0.8016439759012901, "yes": 0.18842265021407362}, "ground_truth": 0}, {"key": "32799471", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8095670584096524, "res": {"Yes": 0.8095670584096524, "yes": 0.18560401988526729}, "ground_truth": 0}, {"key": "34797243", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8573669434244316, "res": {"Yes": 0.8573669434244316, "yes": 0.13900979231826094}, "ground_truth": 0}, {"key": "34797243", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8015738752007954, "res": {"Yes": 0.8015738752007954, "yes": 0.18850949537888156}, "ground_truth": 0}, {"key": "34797243", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7545715003918219, "res": {"Yes": 0.7545715003918219, "yes": 0.23875159953994293}, "ground_truth": 1}, {"key": "34797243", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7551785104484722, "res": {"Yes": 0.7551785104484722, "yes": 0.23867153568091695}, "ground_truth": 0}, {"key": "34797243", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9143350639052314, "res": {"Yes": 0.9143350639052314, "yes": 0.08138972031539952}, "ground_truth": 0}, {"key": "32154876", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7362672019579833, "res": {"Yes": 0.7362672019579833, "yes": 0.25890160317192423}, "ground_truth": 0}, {"key": "32154876", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8106298507227151, "res": {"Yes": 0.8106298507227151, "yes": 0.18079318957742185}, "ground_truth": 0}, {"key": "32154876", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8931161226477751, "res": {"Yes": 0.8931161226477751, "yes": 0.0993553759723869}, "ground_truth": 1}, {"key": "32154876", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7865151453703351, "res": {"Yes": 0.7865151453703351, "yes": 0.20391576236503084}, "ground_truth": 0}, {"key": "32154876", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8562735895035692, "res": {"Yes": 0.8562735895035692, "yes": 0.13596339280914016}, "ground_truth": 0}, {"key": "37962274", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8119459541209115, "res": {"Yes": 0.8119459541209115, "yes": 0.17899441047957945}, "ground_truth": 0}, {"key": "37962274", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7729366864681434, "res": {"Yes": 0.7729366864681434, "yes": 0.22171464650896958}, "ground_truth": 0}, {"key": "37962274", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7545524929751265, "res": {"Yes": 0.7545524929751265, "yes": 0.2395872327128758}, "ground_truth": 1}, {"key": "37962274", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7722250159415707, "res": {"Yes": 0.7722250159415707, "yes": 0.22407456568452272}, "ground_truth": 0}, {"key": "37962274", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7175806955549293, "res": {"Yes": 0.7175806955549293, "yes": 0.278118332328317}, "ground_truth": 0}, {"key": "35574030", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9870913585284184, "res": {"Yes": 0.9870913585284184, "yes": 0.011238562883847229}, "ground_truth": 0}, {"key": "35574030", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9773792638443496, "res": {"Yes": 0.9773792638443496, "yes": 0.019555433554304568}, "ground_truth": 0}, {"key": "35574030", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9883046495671161, "res": {"Yes": 0.9883046495671161, "yes": 0.009802488470284594}, "ground_truth": 1}, {"key": "35574030", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7904314763690482, "res": {"Yes": 0.7904314763690482, "yes": 0.20610455302305808}, "ground_truth": 0}, {"key": "35574030", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9787624631658361, "res": {"Yes": 0.9787624631658361, "yes": 0.018234658183050173}, "ground_truth": 0}, {"key": "39105949", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8633230536471198, "res": {"Yes": 0.8633230536471198, "yes": 0.13503422691148756}, "ground_truth": 0}, {"key": "39105949", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9605408920194086, "res": {"Yes": 0.9605408920194086, "yes": 0.03370540731649049}, "ground_truth": 0}, {"key": "39105949", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9324876047927173, "res": {"Yes": 0.9324876047927173, "yes": 0.06112170124532727}, "ground_truth": 1}, {"key": "39105949", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9546453917462783, "res": {"Yes": 0.9546453917462783, "yes": 0.03863427666671553}, "ground_truth": 0}, {"key": "39105949", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7525392080466344, "res": {"Yes": 0.7525392080466344, "yes": 0.23851857239701163}, "ground_truth": 0}, {"key": "41064322", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9442277569325889, "res": {"Yes": 0.9442277569325889, "yes": 0.05203976713910179}, "ground_truth": 0}, {"key": "41064322", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8152475251596921, "res": {"Yes": 0.8152475251596921, "yes": 0.1756433957316385}, "ground_truth": 0}, {"key": "41064322", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6742299527973629, "res": {"Yes": 0.6742299527973629, "yes": 0.31486899989657885}, "ground_truth": 1}, {"key": "41064322", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8130391139109739, "res": {"Yes": 0.8130391139109739, "yes": 0.17900051429304623}, "ground_truth": 0}, {"key": "41064322", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7826576712973285, "res": {"Yes": 0.7826576712973285, "yes": 0.21025567264866304}, "ground_truth": 0}, {"key": "28105101", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6170377955399268, "res": {"Yes": 0.6170377955399268, "yes": 0.25882321963435023}, "ground_truth": 0}, {"key": "28105101", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.5466790798757599, "res": {"Yes": 0.5466790798757599, "yes": 0.38627147639452925}, "ground_truth": 0}, {"key": "28105101", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3476726162153733, "res": {"yes": 0.469113047563597, "Yes": 0.3476726162153733}, "ground_truth": 1}, {"key": "28105101", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3607199801132574, "res": {"yes": 0.5019103002587272, "Yes": 0.3607199801132574}, "ground_truth": 0}, {"key": "28105101", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.522107799483372, "res": {"Yes": 0.522107799483372, "yes": 0.42238979772286245}, "ground_truth": 0}, {"key": "36036068", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7923349340781022, "res": {"Yes": 0.7923349340781022, "yes": 0.2039059134715613}, "ground_truth": 0}, {"key": "36036068", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7049539341048346, "res": {"Yes": 0.7049539341048346, "yes": 0.2907843650705852}, "ground_truth": 0}, {"key": "36036068", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5924044706158924, "res": {"Yes": 0.5924044706158924, "yes": 0.4020733276128943}, "ground_truth": 1}, {"key": "36036068", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.795959081189415, "res": {"Yes": 0.795959081189415, "yes": 0.19599349677920733}, "ground_truth": 0}, {"key": "36036068", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9564975323305831, "res": {"Yes": 0.9564975323305831, "yes": 0.031246615895739276}, "ground_truth": 0}, {"key": "37991460", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9564423440151658, "res": {"Yes": 0.9564423440151658, "yes": 0.040541619117160706}, "ground_truth": 0}, {"key": "37991460", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7385338070961919, "res": {"Yes": 0.7385338070961919, "yes": 0.25606767857715657}, "ground_truth": 0}, {"key": "37991460", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8000444822879932, "res": {"Yes": 0.8000444822879932, "yes": 0.19179453694879456}, "ground_truth": 1}, {"key": "37991460", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8896544322734002, "res": {"Yes": 0.8896544322734002, "yes": 0.10615107114731374}, "ground_truth": 0}, {"key": "37991460", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7128553174404102, "res": {"Yes": 0.7128553174404102, "yes": 0.2743104458277751}, "ground_truth": 0}, {"key": "38437830", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7049528414270836, "res": {"Yes": 0.7049528414270836, "yes": 0.21937800211520683}, "ground_truth": 0}, {"key": "38437830", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.4634775040989165, "res": {"yes": 0.4943399206221436, "Yes": 0.4634775040989165}, "ground_truth": 0}, {"key": "38437830", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8386869037948474, "res": {"Yes": 0.8386869037948474, "yes": 0.11861996116044439}, "ground_truth": 1}, {"key": "38437830", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6587724829457842, "res": {"Yes": 0.6587724829457842, "yes": 0.27500433989602513}, "ground_truth": 0}, {"key": "38437830", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.3561282358248781, "res": {"yes": 0.6305667568242307, "Yes": 0.3561282358248781}, "ground_truth": 0}, {"key": "36507138", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9805760204703649, "res": {"Yes": 0.9805760204703649, "yes": 0.014075796737310513}, "ground_truth": 0}, {"key": "36507138", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6898928384819785, "res": {"Yes": 0.6898928384819785, "yes": 0.3031092431506947}, "ground_truth": 0}, {"key": "36507138", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9715680784646589, "res": {"Yes": 0.9715680784646589, "yes": 0.02222581117356655}, "ground_truth": 1}, {"key": "36507138", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9719948615566726, "res": {"Yes": 0.9719948615566726, "yes": 0.02331990246430037}, "ground_truth": 0}, {"key": "36507138", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.5976587141938708, "res": {"Yes": 0.5976587141938708, "yes": 0.3923842459628148}, "ground_truth": 0}, {"key": "37824866", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.5223644785405293, "res": {"Yes": 0.5223644785405293, "yes": 0.44546606848001535}, "ground_truth": 0}, {"key": "37824866", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6315620145712684, "res": {"Yes": 0.6315620145712684, "yes": 0.36140253010665324}, "ground_truth": 0}, {"key": "37824866", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5533745506208532, "res": {"Yes": 0.5533745506208532, "yes": 0.43712118985964216}, "ground_truth": 1}, {"key": "37824866", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5292373505295391, "res": {"Yes": 0.5292373505295391, "yes": 0.40855478543603907}, "ground_truth": 0}, {"key": "37824866", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.690552808788972, "res": {"Yes": 0.690552808788972, "yes": 0.2756943650121599}, "ground_truth": 0}, {"key": "25088134", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9234688872022232, "res": {"Yes": 0.9234688872022232, "yes": 0.07228112130699368}, "ground_truth": 0}, {"key": "25088134", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9073272945409324, "res": {"Yes": 0.9073272945409324, "yes": 0.08427747079904023}, "ground_truth": 0}, {"key": "25088134", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9229633063837717, "res": {"Yes": 0.9229633063837717, "yes": 0.06031570870328992}, "ground_truth": 1}, {"key": "25088134", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8726248330594986, "res": {"Yes": 0.8726248330594986, "yes": 0.12095841138843055}, "ground_truth": 0}, {"key": "25088134", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8682802480629215, "res": {"Yes": 0.8682802480629215, "yes": 0.1231168468696472}, "ground_truth": 0}, {"key": "40172531", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.36270291346955424, "res": {"yes": 0.6270524936022669, "Yes": 0.36270291346955424}, "ground_truth": 0}, {"key": "40172531", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6925775580553636, "res": {"Yes": 0.6925775580553636, "yes": 0.30257424306645814}, "ground_truth": 0}, {"key": "40172531", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9703976597398907, "res": {"Yes": 0.9703976597398907, "yes": 0.019509385919676596}, "ground_truth": 1}, {"key": "40172531", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8022045690603811, "res": {"Yes": 0.8022045690603811, "yes": 0.1910663364457117}, "ground_truth": 0}, {"key": "40172531", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6360498743037587, "res": {"Yes": 0.6360498743037587, "yes": 0.3499524118100245}, "ground_truth": 0}, {"key": "37035874", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9108116945581345, "res": {"Yes": 0.9108116945581345, "yes": 0.08020690263862584}, "ground_truth": 0}, {"key": "37035874", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8927732676068462, "res": {"Yes": 0.8927732676068462, "yes": 0.09920951023321846}, "ground_truth": 0}, {"key": "37035874", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7470021243584625, "res": {"Yes": 0.7470021243584625, "yes": 0.21180081454514751}, "ground_truth": 1}, {"key": "37035874", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8950639909455288, "res": {"Yes": 0.8950639909455288, "yes": 0.09487914074525146}, "ground_truth": 0}, {"key": "37035874", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8822968632120991, "res": {"Yes": 0.8822968632120991, "yes": 0.11050318885770026}, "ground_truth": 0}, {"key": "36404465", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7356252066868167, "res": {"Yes": 0.7356252066868167, "yes": 0.25587902622497616}, "ground_truth": 0}, {"key": "36404465", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8416990512065041, "res": {"Yes": 0.8416990512065041, "yes": 0.15537382368383018}, "ground_truth": 0}, {"key": "36404465", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7821507203404257, "res": {"Yes": 0.7821507203404257, "yes": 0.21008140090795183}, "ground_truth": 1}, {"key": "36404465", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8450955340231304, "res": {"Yes": 0.8450955340231304, "yes": 0.1459559813990529}, "ground_truth": 0}, {"key": "36404465", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8240230633186352, "res": {"Yes": 0.8240230633186352, "yes": 0.1685809549094145}, "ground_truth": 0}, {"key": "39602052", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9860833918924643, "res": {"Yes": 0.9860833918924643, "yes": 0.012016117564642784}, "ground_truth": 0}, {"key": "39602052", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9916504983384029, "res": {"Yes": 0.9916504983384029, "yes": 0.005493407730978397}, "ground_truth": 0}, {"key": "39602052", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8186032375070529, "res": {"Yes": 0.8186032375070529, "yes": 0.1773138238933093}, "ground_truth": 1}, {"key": "39602052", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9901939737154151, "res": {"Yes": 0.9901939737154151, "yes": 0.007936594500086977}, "ground_truth": 0}, {"key": "39602052", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9823982312780802, "res": {"Yes": 0.9823982312780802, "yes": 0.013554346610955196}, "ground_truth": 0}, {"key": "33792789", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9621375963457769, "res": {"Yes": 0.9621375963457769, "yes": 0.034232845536310695}, "ground_truth": 0}, {"key": "33792789", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.971690075239759, "res": {"Yes": 0.971690075239759, "yes": 0.02665971111997889}, "ground_truth": 0}, {"key": "33792789", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9135803761562055, "res": {"Yes": 0.9135803761562055, "yes": 0.08391071181995355}, "ground_truth": 1}, {"key": "33792789", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9669764379094545, "res": {"Yes": 0.9669764379094545, "yes": 0.03090154179638742}, "ground_truth": 0}, {"key": "33792789", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9633002593382729, "res": {"Yes": 0.9633002593382729, "yes": 0.03393708193628593}, "ground_truth": 0}, {"key": "32776626", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7984068934032892, "res": {"Yes": 0.7984068934032892, "yes": 0.1971803671170147}, "ground_truth": 0}, {"key": "32776626", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6241361782795632, "res": {"Yes": 0.6241361782795632, "yes": 0.3618125568562749}, "ground_truth": 0}, {"key": "32776626", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7161786156026873, "res": {"Yes": 0.7161786156026873, "yes": 0.2707448810054981}, "ground_truth": 1}, {"key": "32776626", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7435970428089532, "res": {"Yes": 0.7435970428089532, "yes": 0.24761959538667327}, "ground_truth": 0}, {"key": "32776626", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6109839796742699, "res": {"Yes": 0.6109839796742699, "yes": 0.3826361849490258}, "ground_truth": 0}, {"key": "37195090", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9886126068156139, "res": {"Yes": 0.9886126068156139, "yes": 0.006887796888663701}, "ground_truth": 0}, {"key": "37195090", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9803929904868051, "res": {"Yes": 0.9803929904868051, "yes": 0.016482573627109864}, "ground_truth": 0}, {"key": "37195090", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8947333751053389, "res": {"Yes": 0.8947333751053389, "yes": 0.10157340905423973}, "ground_truth": 1}, {"key": "37195090", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7831309308879275, "res": {"Yes": 0.7831309308879275, "yes": 0.2109441441961447}, "ground_truth": 0}, {"key": "37195090", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8468995469548481, "res": {"Yes": 0.8468995469548481, "yes": 0.1441759139975214}, "ground_truth": 0}, {"key": "33981824", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8058895021342566, "res": {"Yes": 0.8058895021342566, "yes": 0.18620501148015306}, "ground_truth": 0}, {"key": "33981824", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8367068824552512, "res": {"Yes": 0.8367068824552512, "yes": 0.1601032274851441}, "ground_truth": 0}, {"key": "33981824", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8710325517331714, "res": {"Yes": 0.8710325517331714, "yes": 0.12300392666667186}, "ground_truth": 1}, {"key": "33981824", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8706691979194302, "res": {"Yes": 0.8706691979194302, "yes": 0.1260670463076621}, "ground_truth": 0}, {"key": "33981824", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.820461994050113, "res": {"Yes": 0.820461994050113, "yes": 0.17354535528698628}, "ground_truth": 0}, {"key": "39569142", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9209286554048819, "res": {"Yes": 0.9209286554048819, "yes": 0.07211324127697966}, "ground_truth": 0}, {"key": "39569142", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8870888522885033, "res": {"Yes": 0.8870888522885033, "yes": 0.10795437444083156}, "ground_truth": 0}, {"key": "39569142", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9437505959017775, "res": {"Yes": 0.9437505959017775, "yes": 0.050147631199341854}, "ground_truth": 1}, {"key": "39569142", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8450979087449175, "res": {"Yes": 0.8450979087449175, "yes": 0.1461961596200767}, "ground_truth": 0}, {"key": "39569142", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8993911775098612, "res": {"Yes": 0.8993911775098612, "yes": 0.09153392595101069}, "ground_truth": 0}, {"key": "40268210", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7681522448490404, "res": {"Yes": 0.7681522448490404, "yes": 0.22255994453961375}, "ground_truth": 0}, {"key": "40268210", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.44766768337567897, "res": {"yes": 0.5461846683497924, "Yes": 0.44766768337567897}, "ground_truth": 0}, {"key": "40268210", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6389697597879115, "res": {"Yes": 0.6389697597879115, "yes": 0.35778544066553697}, "ground_truth": 1}, {"key": "40268210", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6370147934839302, "res": {"Yes": 0.6370147934839302, "yes": 0.3554604811688582}, "ground_truth": 0}, {"key": "40268210", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6928782712383895, "res": {"Yes": 0.6928782712383895, "yes": 0.29912052295592506}, "ground_truth": 0}, {"key": "34925159", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9741828129349756, "res": {"Yes": 0.9741828129349756, " Yes": 0.010806685592860198}, "ground_truth": 0}, {"key": "34925159", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8357716409248526, "res": {"Yes": 0.8357716409248526, "yes": 0.16121346864686653}, "ground_truth": 0}, {"key": "34925159", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9645511262557662, "res": {"Yes": 0.9645511262557662, "yes": 0.027545061922812263}, "ground_truth": 1}, {"key": "34925159", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8915372134834479, "res": {"Yes": 0.8915372134834479, "yes": 0.10551881507752418}, "ground_truth": 0}, {"key": "34925159", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7792715779920572, "res": {"Yes": 0.7792715779920572, "yes": 0.21561764541467685}, "ground_truth": 0}, {"key": "36181903", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.828885741703675, "res": {"Yes": 0.828885741703675, "yes": 0.16300633726577593}, "ground_truth": 0}, {"key": "36181903", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9332393107906481, "res": {"Yes": 0.9332393107906481, "yes": 0.06422919634079757}, "ground_truth": 0}, {"key": "36181903", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8556168056667833, "res": {"Yes": 0.8556168056667833, "yes": 0.13939667962295124}, "ground_truth": 1}, {"key": "36181903", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8596125194893347, "res": {"Yes": 0.8596125194893347, "yes": 0.13485731303239318}, "ground_truth": 0}, {"key": "36181903", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9107832139217428, "res": {"Yes": 0.9107832139217428, "yes": 0.08666830785721492}, "ground_truth": 0}, {"key": "38620559", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9645715267278208, "res": {"Yes": 0.9645715267278208, "yes": 0.019713724710478584}, "ground_truth": 0}, {"key": "38620559", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9364184035921888, "res": {"Yes": 0.9364184035921888, "yes": 0.046128163205123904}, "ground_truth": 0}, {"key": "38620559", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8481792294832206, "res": {"Yes": 0.8481792294832206, "yes": 0.1488533479004539}, "ground_truth": 1}, {"key": "38620559", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.698900914510587, "res": {"Yes": 0.698900914510587, "yes": 0.2953910521401079}, "ground_truth": 0}, {"key": "38620559", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8771695199685243, "res": {"Yes": 0.8771695199685243, "yes": 0.12028474163471739}, "ground_truth": 0}, {"key": "32719657", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7525080836686462, "res": {"Yes": 0.7525080836686462, "yes": 0.24388855495866651}, "ground_truth": 0}, {"key": "32719657", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7600239979122113, "res": {"Yes": 0.7600239979122113, "yes": 0.23364941817383617}, "ground_truth": 0}, {"key": "32719657", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7474288366913292, "res": {"Yes": 0.7474288366913292, "yes": 0.24751913174031784}, "ground_truth": 1}, {"key": "32719657", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.728676043875499, "res": {"Yes": 0.728676043875499, "yes": 0.26618487308803607}, "ground_truth": 0}, {"key": "32719657", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7412220025305413, "res": {"Yes": 0.7412220025305413, "yes": 0.2532214694120768}, "ground_truth": 0}, {"key": "37530914", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8585000846863992, "res": {"Yes": 0.8585000846863992, "yes": 0.1296875737347724}, "ground_truth": 0}, {"key": "37530914", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.706021322573145, "res": {"Yes": 0.706021322573145, "yes": 0.26982002940268257}, "ground_truth": 0}, {"key": "37530914", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7192765958756414, "res": {"Yes": 0.7192765958756414, "yes": 0.2656343606049101}, "ground_truth": 1}, {"key": "37530914", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7214579888324154, "res": {"Yes": 0.7214579888324154, "yes": 0.2639294358528278}, "ground_truth": 0}, {"key": "37530914", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6283026511634574, "res": {"Yes": 0.6283026511634574, "yes": 0.3595833424137508}, "ground_truth": 0}, {"key": "33306933", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.740865679017009, "res": {"Yes": 0.740865679017009, "yes": 0.25576932881933545}, "ground_truth": 0}, {"key": "33306933", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8116057388686253, "res": {"Yes": 0.8116057388686253, "yes": 0.18521169700318332}, "ground_truth": 0}, {"key": "33306933", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7281449962093837, "res": {"Yes": 0.7281449962093837, "yes": 0.26786755992566536}, "ground_truth": 1}, {"key": "33306933", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8890262200799173, "res": {"Yes": 0.8890262200799173, "yes": 0.10846267222367627}, "ground_truth": 0}, {"key": "33306933", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8588614347183645, "res": {"Yes": 0.8588614347183645, "yes": 0.13764762519920395}, "ground_truth": 0}, {"key": "33837212", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7022284152719204, "res": {"Yes": 0.7022284152719204, "yes": 0.29362034197084613}, "ground_truth": 0}, {"key": "33837212", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8343148081839439, "res": {"Yes": 0.8343148081839439, "yes": 0.15976429598412145}, "ground_truth": 0}, {"key": "33837212", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8302424603058997, "res": {"Yes": 0.8302424603058997, "yes": 0.16267181706719655}, "ground_truth": 1}, {"key": "33837212", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8649605932711246, "res": {"Yes": 0.8649605932711246, "yes": 0.12776909365124733}, "ground_truth": 0}, {"key": "33837212", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7970704102429432, "res": {"Yes": 0.7970704102429432, "yes": 0.19514039599845906}, "ground_truth": 0}, {"key": "40945179", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9710828887684495, "res": {"Yes": 0.9710828887684495, "yes": 0.026369796387914837}, "ground_truth": 0}, {"key": "40945179", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8618690017454184, "res": {"Yes": 0.8618690017454184, "yes": 0.13396794822406552}, "ground_truth": 0}, {"key": "40945179", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9882233816523662, "res": {"Yes": 0.9882233816523662, "yes": 0.008826100335727895}, "ground_truth": 1}, {"key": "40945179", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9903004678721782, "res": {"Yes": 0.9903004678721782, "yes": 0.007371478369650574}, "ground_truth": 0}, {"key": "40945179", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8973923195813507, "res": {"Yes": 0.8973923195813507, "yes": 0.09760114051762138}, "ground_truth": 0}, {"key": "34152358", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.5456846686592458, "res": {"Yes": 0.5456846686592458, "yes": 0.445863984929324}, "ground_truth": 0}, {"key": "34152358", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7640628490840726, "res": {"Yes": 0.7640628490840726, "yes": 0.22975855093634406}, "ground_truth": 0}, {"key": "34152358", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3989536756341818, "res": {"yes": 0.5966878301018683, "Yes": 0.3989536756341818}, "ground_truth": 1}, {"key": "34152358", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6352660699839374, "res": {"Yes": 0.6352660699839374, "yes": 0.359773180595766}, "ground_truth": 0}, {"key": "34152358", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6094331964033002, "res": {"Yes": 0.6094331964033002, "yes": 0.3864704767616166}, "ground_truth": 0}, {"key": "34136541", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7455376298961822, "res": {"Yes": 0.7455376298961822, "yes": 0.25004386912852333}, "ground_truth": 0}, {"key": "34136541", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8146178918817559, "res": {"Yes": 0.8146178918817559, "yes": 0.18040205140385554}, "ground_truth": 0}, {"key": "34136541", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8065705245860484, "res": {"Yes": 0.8065705245860484, "yes": 0.18735960263472962}, "ground_truth": 1}, {"key": "34136541", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.854512156567909, "res": {"Yes": 0.854512156567909, "yes": 0.14097158975574972}, "ground_truth": 0}, {"key": "34136541", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7144088224800154, "res": {"Yes": 0.7144088224800154, "yes": 0.27777817929530807}, "ground_truth": 0}, {"key": "37469603", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7430426451622149, "res": {"Yes": 0.7430426451622149, "yes": 0.2521259032918124}, "ground_truth": 0}, {"key": "37469603", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8454763522397889, "res": {"Yes": 0.8454763522397889, "yes": 0.15079039787568735}, "ground_truth": 0}, {"key": "37469603", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6057360795921438, "res": {"Yes": 0.6057360795921438, "yes": 0.3889713066147673}, "ground_truth": 1}, {"key": "37469603", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8342340921309541, "res": {"Yes": 0.8342340921309541, "yes": 0.1633132080757545}, "ground_truth": 0}, {"key": "37469603", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9826850214211929, "res": {"Yes": 0.9826850214211929, "yes": 0.01472673101730813}, "ground_truth": 0}, {"key": "37353611", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7755810256540382, "res": {"Yes": 0.7755810256540382, "yes": 0.19007808833853043}, "ground_truth": 0}, {"key": "37353611", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7448098113293818, "res": {"Yes": 0.7448098113293818, "yes": 0.21827771607498977}, "ground_truth": 0}, {"key": "37353611", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9894832419100601, "res": {"Yes": 0.9894832419100601, "yes": 0.008053169003390136}, "ground_truth": 1}, {"key": "37353611", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6883204218733652, "res": {"Yes": 0.6883204218733652, "yes": 0.23349612436955292}, "ground_truth": 0}, {"key": "37353611", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9499626851010352, "res": {"Yes": 0.9499626851010352, "yes": 0.03469276205838993}, "ground_truth": 0}, {"key": "37211649", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9781721616818931, "res": {"Yes": 0.9781721616818931, "yes": 0.01819670946266724}, "ground_truth": 0}, {"key": "37211649", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.5467428482425354, "res": {"Yes": 0.5467428482425354, "yes": 0.43291071637049494}, "ground_truth": 0}, {"key": "37211649", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.953377300017802, "res": {"Yes": 0.953377300017802, "yes": 0.04196260560614887}, "ground_truth": 1}, {"key": "37211649", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7638776321398459, "res": {"Yes": 0.7638776321398459, "yes": 0.2131458086107815}, "ground_truth": 0}, {"key": "37211649", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.5795095712182905, "res": {"Yes": 0.5795095712182905, "yes": 0.28495463124482157}, "ground_truth": 0}, {"key": "37320976", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6482820597134432, "res": {"Yes": 0.6482820597134432, "yes": 0.3407454027701845}, "ground_truth": 0}, {"key": "37320976", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6639741639344372, "res": {"Yes": 0.6639741639344372, "yes": 0.3271932840446852}, "ground_truth": 0}, {"key": "37320976", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.65433855059838, "res": {"Yes": 0.65433855059838, "yes": 0.340276995638422}, "ground_truth": 1}, {"key": "37320976", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9617045414117241, "res": {"Yes": 0.9617045414117241, "yes": 0.031268236656632475}, "ground_truth": 0}, {"key": "37320976", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9689623425732816, "res": {"Yes": 0.9689623425732816, "yes": 0.024051483508203048}, "ground_truth": 0}, {"key": "34492412", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7796552220751749, "res": {"Yes": 0.7796552220751749, "yes": 0.2164250242136952}, "ground_truth": 0}, {"key": "34492412", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.756551841508913, "res": {"Yes": 0.756551841508913, "yes": 0.24041826540144898}, "ground_truth": 0}, {"key": "34492412", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8250024524377964, "res": {"Yes": 0.8250024524377964, "yes": 0.17118812174708303}, "ground_truth": 1}, {"key": "34492412", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9053629861759496, "res": {"Yes": 0.9053629861759496, "yes": 0.09333823014667965}, "ground_truth": 0}, {"key": "34492412", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9321951759180296, "res": {"Yes": 0.9321951759180296, "yes": 0.06612110461188501}, "ground_truth": 0}, {"key": "36655016", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8161489205461091, "res": {"Yes": 0.8161489205461091, "yes": 0.17514956726419756}, "ground_truth": 0}, {"key": "36655016", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7132818037337837, "res": {"Yes": 0.7132818037337837, "yes": 0.2739201879170623}, "ground_truth": 0}, {"key": "36655016", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.781288014933861, "res": {"Yes": 0.781288014933861, "yes": 0.1967032561175374}, "ground_truth": 1}, {"key": "36655016", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9918451407483373, "res": {"Yes": 0.9918451407483373, "yes": 0.0038077245470608642}, "ground_truth": 0}, {"key": "36655016", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7593295546290774, "res": {"Yes": 0.7593295546290774, "yes": 0.22915918177233446}, "ground_truth": 0}, {"key": "35220773", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9564732089076096, "res": {"Yes": 0.9564732089076096, "yes": 0.03961020467919545}, "ground_truth": 0}, {"key": "35220773", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9342067084340661, "res": {"Yes": 0.9342067084340661, "yes": 0.06129701697183382}, "ground_truth": 0}, {"key": "35220773", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8948367362009277, "res": {"Yes": 0.8948367362009277, "yes": 0.10235132716824431}, "ground_truth": 1}, {"key": "35220773", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7178245350161271, "res": {"Yes": 0.7178245350161271, "yes": 0.2775672591079441}, "ground_truth": 0}, {"key": "35220773", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7898217356974319, "res": {"Yes": 0.7898217356974319, "yes": 0.20636061136421535}, "ground_truth": 0}, {"key": "31569808", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7666660653955402, "res": {"Yes": 0.7666660653955402, "yes": 0.18917807722154115}, "ground_truth": 0}, {"key": "31569808", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8046764321547724, "res": {"Yes": 0.8046764321547724, "yes": 0.19050594072375868}, "ground_truth": 0}, {"key": "31569808", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6558594933144627, "res": {"Yes": 0.6558594933144627, "yes": 0.30907481860058117}, "ground_truth": 1}, {"key": "31569808", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8874419892448994, "res": {"Yes": 0.8874419892448994, "yes": 0.10839056852378494}, "ground_truth": 0}, {"key": "31569808", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8198662842571107, "res": {"Yes": 0.8198662842571107, "yes": 0.174721168256186}, "ground_truth": 0}, {"key": "37696256", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8226631560469645, "res": {"Yes": 0.8226631560469645, "yes": 0.16978103526596205}, "ground_truth": 0}, {"key": "37696256", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7534589059205217, "res": {"Yes": 0.7534589059205217, "yes": 0.23665151992024203}, "ground_truth": 0}, {"key": "37696256", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.719660180437701, "res": {"Yes": 0.719660180437701, "yes": 0.2719957058845264}, "ground_truth": 1}, {"key": "37696256", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8107886686661813, "res": {"Yes": 0.8107886686661813, "yes": 0.18322347432296618}, "ground_truth": 0}, {"key": "37696256", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7223333088562652, "res": {"Yes": 0.7223333088562652, "yes": 0.25759687913100443}, "ground_truth": 0}, {"key": "36874328", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.5146215771849634, "res": {"Yes": 0.5146215771849634, "yes": 0.23195601866667315}, "ground_truth": 0}, {"key": "36874328", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8489095121800136, "res": {"Yes": 0.8489095121800136, "yes": 0.12219115698027384}, "ground_truth": 0}, {"key": "36874328", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8444060484655938, "res": {"Yes": 0.8444060484655938, "yes": 0.10781362899215438}, "ground_truth": 1}, {"key": "36874328", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7917111130163152, "res": {"Yes": 0.7917111130163152, "yes": 0.15962339811201115}, "ground_truth": 0}, {"key": "36874328", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.4715462926848158, "res": {"Yes": 0.4715462926848158, "yes": 0.40769213519913966}, "ground_truth": 0}, {"key": "24532377", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8863880983774557, "res": {"Yes": 0.8863880983774557, "yes": 0.1107252468254417}, "ground_truth": 0}, {"key": "24532377", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9146316253643975, "res": {"Yes": 0.9146316253643975, "yes": 0.07844719152482511}, "ground_truth": 0}, {"key": "24532377", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9168680648615628, "res": {"Yes": 0.9168680648615628, "yes": 0.07777191520901106}, "ground_truth": 1}, {"key": "24532377", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9485136581256397, "res": {"Yes": 0.9485136581256397, "yes": 0.04776110374147209}, "ground_truth": 0}, {"key": "24532377", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9373635110030354, "res": {"Yes": 0.9373635110030354, "yes": 0.057902952651228266}, "ground_truth": 0}, {"key": "39560618", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8771054977017829, "res": {"Yes": 0.8771054977017829, "yes": 0.11550886485202334}, "ground_truth": 0}, {"key": "39560618", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6618117162764522, "res": {"Yes": 0.6618117162764522, "yes": 0.3288187802113952}, "ground_truth": 0}, {"key": "39560618", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6284325598817604, "res": {"Yes": 0.6284325598817604, "yes": 0.3622138484503248}, "ground_truth": 1}, {"key": "39560618", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.48241499012266054, "res": {"Yes": 0.48241499012266054, "yes": 0.3952216429730621}, "ground_truth": 0}, {"key": "39560618", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.620400867121125, "res": {"Yes": 0.620400867121125, "yes": 0.31723803114075844}, "ground_truth": 0}, {"key": "34922693", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7458491805963622, "res": {"Yes": 0.7458491805963622, "yes": 0.2447468911003742}, "ground_truth": 0}, {"key": "34922693", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8683552186159714, "res": {"Yes": 0.8683552186159714, "yes": 0.12234186266119829}, "ground_truth": 0}, {"key": "34922693", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7285621315675567, "res": {"Yes": 0.7285621315675567, "yes": 0.25986528938135955}, "ground_truth": 1}, {"key": "34922693", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9631839025847605, "res": {"Yes": 0.9631839025847605, "yes": 0.02441238728236085}, "ground_truth": 0}, {"key": "34922693", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.716017894510997, "res": {"Yes": 0.716017894510997, "yes": 0.27822676398621576}, "ground_truth": 0}, {"key": "33629577", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9524978587645769, "res": {"Yes": 0.9524978587645769, "yes": 0.040214172328566146}, "ground_truth": 0}, {"key": "33629577", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9552939412259217, "res": {"Yes": 0.9552939412259217, "yes": 0.04024242868568061}, "ground_truth": 0}, {"key": "33629577", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9259255288298336, "res": {"Yes": 0.9259255288298336, "yes": 0.06844050850822449}, "ground_truth": 1}, {"key": "33629577", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9341175520004258, "res": {"Yes": 0.9341175520004258, "yes": 0.057008933235362956}, "ground_truth": 0}, {"key": "33629577", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.921714395206899, "res": {"Yes": 0.921714395206899, "yes": 0.07371707677838636}, "ground_truth": 0}, {"key": "32284359", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.4940266874871885, "res": {"Yes": 0.4940266874871885, "yes": 0.3563584046260689}, "ground_truth": 0}, {"key": "32284359", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.4299673944628972, "res": {"Yes": 0.4299673944628972, "yes": 0.40066171948761686}, "ground_truth": 0}, {"key": "32284359", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5018906056849551, "res": {"Yes": 0.5018906056849551, "yes": 0.3426244601464918}, "ground_truth": 1}, {"key": "32284359", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3507530375080004, "res": {"yes": 0.48440124360326636, "Yes": 0.3507530375080004}, "ground_truth": 0}, {"key": "32284359", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.2514188765310967, "res": {"yes": 0.6361113006060415, "Yes": 0.2514188765310967}, "ground_truth": 0}, {"key": "28082962", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.895862449477655, "res": {"Yes": 0.895862449477655, "yes": 0.09674732461623106}, "ground_truth": 0}, {"key": "28082962", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9305784601830065, "res": {"Yes": 0.9305784601830065, "yes": 0.06425210455650479}, "ground_truth": 0}, {"key": "28082962", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9405753993719111, "res": {"Yes": 0.9405753993719111, "yes": 0.054724607928148414}, "ground_truth": 1}, {"key": "28082962", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8984844046538797, "res": {"Yes": 0.8984844046538797, "yes": 0.09267301028404423}, "ground_truth": 0}, {"key": "28082962", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9035762350797751, "res": {"Yes": 0.9035762350797751, "yes": 0.0874966650551798}, "ground_truth": 0}, {"key": "24796803", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9836697570642758, "res": {"Yes": 0.9836697570642758, "yes": 0.012458060946556064}, "ground_truth": 0}, {"key": "24796803", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9580567928921563, "res": {"Yes": 0.9580567928921563, "yes": 0.037379494511985314}, "ground_truth": 0}, {"key": "24796803", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9943952495493831, "res": {"Yes": 0.9943952495493831, "yes": 0.0031437969180404873}, "ground_truth": 1}, {"key": "24796803", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9540217719917276, "res": {"Yes": 0.9540217719917276, "yes": 0.040389517318425446}, "ground_truth": 0}, {"key": "24796803", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.992002084885877, "res": {"Yes": 0.992002084885877, "yes": 0.0038340112503477035}, "ground_truth": 0}, {"key": "35466150", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.638266609306852, "res": {"Yes": 0.638266609306852, "yes": 0.34693053721873135}, "ground_truth": 0}, {"key": "35466150", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9217049753339157, "res": {"Yes": 0.9217049753339157, "yes": 0.07620493916133}, "ground_truth": 0}, {"key": "35466150", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9637732807269909, "res": {"Yes": 0.9637732807269909, "yes": 0.03462152086348858}, "ground_truth": 1}, {"key": "35466150", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9881657610151227, "res": {"Yes": 0.9881657610151227, "yes": 0.005796166663615527}, "ground_truth": 0}, {"key": "35466150", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9929489888516909, "res": {"Yes": 0.9929489888516909, "yes": 0.0049281397243373}, "ground_truth": 0}, {"key": "35754289", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9148559534712148, "res": {"Yes": 0.9148559534712148, "yes": 0.07883780801811249}, "ground_truth": 0}, {"key": "35754289", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9235810363793454, "res": {"Yes": 0.9235810363793454, "yes": 0.07085074483901514}, "ground_truth": 0}, {"key": "35754289", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9741202451305904, "res": {"Yes": 0.9741202451305904, "yes": 0.01945969257771275}, "ground_truth": 1}, {"key": "35754289", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9599061305290554, "res": {"Yes": 0.9599061305290554, "yes": 0.03434224671692306}, "ground_truth": 0}, {"key": "35754289", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9561421218088624, "res": {"Yes": 0.9561421218088624, "yes": 0.03703803208760087}, "ground_truth": 0}, {"key": "36678662", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9289161245329572, "res": {"Yes": 0.9289161245329572, "yes": 0.07055368990969935}, "ground_truth": 0}, {"key": "36678662", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7806873651409372, "res": {"Yes": 0.7806873651409372, "yes": 0.19643507684530392}, "ground_truth": 0}, {"key": "36678662", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.932996523774238, "res": {"Yes": 0.932996523774238, "yes": 0.05872956595446787}, "ground_truth": 1}, {"key": "36678662", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9287284374152794, "res": {"Yes": 0.9287284374152794, "yes": 0.07065531094962474}, "ground_truth": 0}, {"key": "36678662", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7401723311232142, "res": {"Yes": 0.7401723311232142, "yes": 0.14420587686548422}, "ground_truth": 0}, {"key": "35399671", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8313181046185517, "res": {"Yes": 0.8313181046185517, "yes": 0.15227799714557405}, "ground_truth": 0}, {"key": "35399671", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8218500923180891, "res": {"Yes": 0.8218500923180891, "yes": 0.16561179727252626}, "ground_truth": 0}, {"key": "35399671", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8783038189085794, "res": {"Yes": 0.8783038189085794, "yes": 0.11259187769790084}, "ground_truth": 1}, {"key": "35399671", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7013627365029178, "res": {"Yes": 0.7013627365029178, "yes": 0.28538169900909655}, "ground_truth": 0}, {"key": "35399671", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7875759015549217, "res": {"Yes": 0.7875759015549217, "yes": 0.19957601253640705}, "ground_truth": 0}, {"key": "36888180", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9068508732232479, "res": {"Yes": 0.9068508732232479, "yes": 0.09023651601206738}, "ground_truth": 0}, {"key": "36888180", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8966614705471335, "res": {"Yes": 0.8966614705471335, "yes": 0.10070629185779563}, "ground_truth": 0}, {"key": "36888180", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9060955544615155, "res": {"Yes": 0.9060955544615155, "yes": 0.08899426940775032}, "ground_truth": 1}, {"key": "36888180", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.907112700723693, "res": {"Yes": 0.907112700723693, "yes": 0.08626181904956783}, "ground_truth": 0}, {"key": "36888180", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8557714723819909, "res": {"Yes": 0.8557714723819909, "yes": 0.1383006140964334}, "ground_truth": 0}, {"key": "28061069", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7972795012608886, "res": {"Yes": 0.7972795012608886, "yes": 0.1907813063257034}, "ground_truth": 0}, {"key": "28061069", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8092491876556603, "res": {"Yes": 0.8092491876556603, "yes": 0.17888767215159956}, "ground_truth": 0}, {"key": "28061069", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8366633329954039, "res": {"Yes": 0.8366633329954039, "yes": 0.14766591321494363}, "ground_truth": 1}, {"key": "28061069", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7891499227715839, "res": {"Yes": 0.7891499227715839, "yes": 0.19974424615143876}, "ground_truth": 0}, {"key": "28061069", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9708567693156841, "res": {"Yes": 0.9708567693156841, "yes": 0.018884600073147257}, "ground_truth": 0}, {"key": "22259982", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9346442998896678, "res": {"Yes": 0.9346442998896678, "yes": 0.05917772942591511}, "ground_truth": 0}, {"key": "22259982", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9165253669652769, "res": {"Yes": 0.9165253669652769, "yes": 0.07819613947318928}, "ground_truth": 0}, {"key": "22259982", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8883670338305792, "res": {"Yes": 0.8883670338305792, "yes": 0.10748293373187956}, "ground_truth": 1}, {"key": "22259982", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8552157865145205, "res": {"Yes": 0.8552157865145205, "yes": 0.13922124786809034}, "ground_truth": 0}, {"key": "22259982", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.921864001593471, "res": {"Yes": 0.921864001593471, "yes": 0.07163668327298867}, "ground_truth": 0}, {"key": "34026805", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6321635099637589, "res": {"Yes": 0.6321635099637589, "yes": 0.3642142083341426}, "ground_truth": 0}, {"key": "34026805", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9112416809262329, "res": {"Yes": 0.9112416809262329, "yes": 0.08533332677723199}, "ground_truth": 0}, {"key": "34026805", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8086150172770906, "res": {"Yes": 0.8086150172770906, "yes": 0.1849522121453739}, "ground_truth": 1}, {"key": "34026805", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7773714392871202, "res": {"Yes": 0.7773714392871202, "yes": 0.2145319930473891}, "ground_truth": 0}, {"key": "34026805", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.5520283363804683, "res": {"Yes": 0.5520283363804683, "yes": 0.44294878283004047}, "ground_truth": 0}, {"key": "36713809", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8267366209124106, "res": {"Yes": 0.8267366209124106, "yes": 0.16453596507476617}, "ground_truth": 0}, {"key": "36713809", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9397900591427295, "res": {"Yes": 0.9397900591427295, "yes": 0.041902600061733235}, "ground_truth": 0}, {"key": "36713809", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9064546840983331, "res": {"Yes": 0.9064546840983331, "yes": 0.08817838623720256}, "ground_truth": 1}, {"key": "36713809", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8957593273115405, "res": {"Yes": 0.8957593273115405, "yes": 0.09877922199890865}, "ground_truth": 0}, {"key": "36713809", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6854584873631466, "res": {"Yes": 0.6854584873631466, "yes": 0.3085607914827679}, "ground_truth": 0}, {"key": "39726411", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8807427089709943, "res": {"Yes": 0.8807427089709943, "yes": 0.1135730882463523}, "ground_truth": 0}, {"key": "39726411", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.851398914966387, "res": {"Yes": 0.851398914966387, "yes": 0.14432174955402488}, "ground_truth": 0}, {"key": "39726411", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7469201154971881, "res": {"Yes": 0.7469201154971881, "yes": 0.24801535981428777}, "ground_truth": 1}, {"key": "39726411", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6169980163955429, "res": {"Yes": 0.6169980163955429, "yes": 0.3747149782508432}, "ground_truth": 0}, {"key": "39726411", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7879995816700263, "res": {"Yes": 0.7879995816700263, "yes": 0.20479960797565264}, "ground_truth": 0}, {"key": "37069841", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7829909430845953, "res": {"Yes": 0.7829909430845953, "yes": 0.16741977461101468}, "ground_truth": 0}, {"key": "37069841", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7393660567585513, "res": {"Yes": 0.7393660567585513, "yes": 0.19387006739523777}, "ground_truth": 0}, {"key": "37069841", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8703908633551057, "res": {"Yes": 0.8703908633551057, "yes": 0.10376208964097845}, "ground_truth": 1}, {"key": "37069841", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9929593287812328, "res": {"Yes": 0.9929593287812328, "yes": 0.0048529816001719895}, "ground_truth": 0}, {"key": "37069841", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8440774020509354, "res": {"Yes": 0.8440774020509354, "yes": 0.09306406984154962}, "ground_truth": 0}, {"key": "38894693", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.4669453554705609, "res": {"yes": 0.4766707061460161, "Yes": 0.4669453554705609}, "ground_truth": 0}, {"key": "38894693", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.5481573216125762, "res": {"Yes": 0.5481573216125762, "yes": 0.438627476079659}, "ground_truth": 0}, {"key": "38894693", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6082357958973332, "res": {"Yes": 0.6082357958973332, "yes": 0.33381187272756135}, "ground_truth": 1}, {"key": "38894693", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6369005614943554, "res": {"Yes": 0.6369005614943554, "yes": 0.3193275182238983}, "ground_truth": 0}, {"key": "38894693", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6543900098053486, "res": {"Yes": 0.6543900098053486, "yes": 0.28372727542544196}, "ground_truth": 0}, {"key": "33946032", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9912465822086821, "res": {"Yes": 0.9912465822086821, "yes": 0.006809138990003785}, "ground_truth": 0}, {"key": "33946032", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6987669410587017, "res": {"Yes": 0.6987669410587017, "yes": 0.23448902356886517}, "ground_truth": 0}, {"key": "33946032", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9932330323433692, "res": {"Yes": 0.9932330323433692, "yes": 0.0042596712203536394}, "ground_truth": 1}, {"key": "33946032", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8494206246192528, "res": {"Yes": 0.8494206246192528, "yes": 0.14646198093604218}, "ground_truth": 0}, {"key": "33946032", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.99011624653931, "res": {"Yes": 0.99011624653931, "yes": 0.006651725395485684}, "ground_truth": 0}, {"key": "39035311", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8519640737516754, "res": {"Yes": 0.8519640737516754, "yes": 0.13999216811127077}, "ground_truth": 0}, {"key": "39035311", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9084354453246781, "res": {"Yes": 0.9084354453246781, "yes": 0.08564597062304184}, "ground_truth": 0}, {"key": "39035311", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9054848526136204, "res": {"Yes": 0.9054848526136204, "yes": 0.088032483670262}, "ground_truth": 1}, {"key": "39035311", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7677262152133226, "res": {"Yes": 0.7677262152133226, "yes": 0.2258256584945174}, "ground_truth": 0}, {"key": "39035311", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8654643459130645, "res": {"Yes": 0.8654643459130645, "yes": 0.1297282631084476}, "ground_truth": 0}, {"key": "27680038", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8802891024070514, "res": {"Yes": 0.8802891024070514, "yes": 0.10862871453846332}, "ground_truth": 0}, {"key": "27680038", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8961007339886089, "res": {"Yes": 0.8961007339886089, "yes": 0.08982993565847903}, "ground_truth": 0}, {"key": "27680038", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8609743206742944, "res": {"Yes": 0.8609743206742944, "yes": 0.1226595067586739}, "ground_truth": 1}, {"key": "27680038", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9475625522489167, "res": {"Yes": 0.9475625522489167, "yes": 0.045403517452895205}, "ground_truth": 0}, {"key": "27680038", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6597817766994787, "res": {"Yes": 0.6597817766994787, "yes": 0.3349202617889492}, "ground_truth": 0}, {"key": "36901907", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8560257684192274, "res": {"Yes": 0.8560257684192274, "yes": 0.13780908413088686}, "ground_truth": 0}, {"key": "36901907", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9808644905049544, "res": {"Yes": 0.9808644905049544, "yes": 0.011871941738980993}, "ground_truth": 0}, {"key": "36901907", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8738072459857871, "res": {"Yes": 0.8738072459857871, "yes": 0.12123602664143712}, "ground_truth": 1}, {"key": "36901907", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8421559568324468, "res": {"Yes": 0.8421559568324468, "yes": 0.1513239463586219}, "ground_truth": 0}, {"key": "36901907", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9193247185740716, "res": {"Yes": 0.9193247185740716, "yes": 0.07276795494600265}, "ground_truth": 0}, {"key": "21530542", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9163123307421186, "res": {"Yes": 0.9163123307421186, "yes": 0.0783951433978916}, "ground_truth": 0}, {"key": "21530542", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.880039294205965, "res": {"Yes": 0.880039294205965, "yes": 0.11479840372819584}, "ground_truth": 0}, {"key": "21530542", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.906987264781778, "res": {"Yes": 0.906987264781778, "yes": 0.08738242530565164}, "ground_truth": 1}, {"key": "21530542", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9006957993183262, "res": {"Yes": 0.9006957993183262, "yes": 0.09257464378897662}, "ground_truth": 0}, {"key": "21530542", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8808832779163196, "res": {"Yes": 0.8808832779163196, "yes": 0.11104743795521589}, "ground_truth": 0}, {"key": "38192532", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9096112213912859, "res": {"Yes": 0.9096112213912859, "yes": 0.0805970363497339}, "ground_truth": 0}, {"key": "38192532", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9772518531697986, "res": {"Yes": 0.9772518531697986, "yes": 0.016512285489713452}, "ground_truth": 0}, {"key": "38192532", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7577996820253957, "res": {"Yes": 0.7577996820253957, "yes": 0.22549630634913664}, "ground_truth": 1}, {"key": "38192532", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8368974641493969, "res": {"Yes": 0.8368974641493969, "yes": 0.14724408898097285}, "ground_truth": 0}, {"key": "38192532", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9447381876308076, "res": {"Yes": 0.9447381876308076, "yes": 0.048885521998014755}, "ground_truth": 0}, {"key": "34102400", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9426432309767693, "res": {"Yes": 0.9426432309767693, "yes": 0.04862513989644272}, "ground_truth": 0}, {"key": "34102400", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9285171942980393, "res": {"Yes": 0.9285171942980393, "yes": 0.06346352167251987}, "ground_truth": 0}, {"key": "34102400", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9334525103293408, "res": {"Yes": 0.9334525103293408, "yes": 0.051197457826271636}, "ground_truth": 1}, {"key": "34102400", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9608763580179357, "res": {"Yes": 0.9608763580179357, "yes": 0.032867278190924804}, "ground_truth": 0}, {"key": "34102400", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9413499988811267, "res": {"Yes": 0.9413499988811267, "yes": 0.04889094859212694}, "ground_truth": 0}, {"key": "36133399", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7192812999599608, "res": {"Yes": 0.7192812999599608, "yes": 0.24457775604282486}, "ground_truth": 0}, {"key": "36133399", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.5673008252233271, "res": {"Yes": 0.5673008252233271, "yes": 0.33746843465217147}, "ground_truth": 0}, {"key": "36133399", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8290132175544012, "res": {"Yes": 0.8290132175544012, "yes": 0.1642743636324428}, "ground_truth": 1}, {"key": "36133399", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8563552305193579, "res": {"Yes": 0.8563552305193579, "yes": 0.1398002724959689}, "ground_truth": 0}, {"key": "36133399", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.4780681898859539, "res": {"Yes": 0.4780681898859539, "yes": 0.4650515868898746}, "ground_truth": 0}, {"key": "34314544", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9690188599190739, "res": {"Yes": 0.9690188599190739, "yes": 0.028516215174755968}, "ground_truth": 0}, {"key": "34314544", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.987906947857667, "res": {"Yes": 0.987906947857667, "yes": 0.009712884266039671}, "ground_truth": 0}, {"key": "34314544", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8592500027944475, "res": {"Yes": 0.8592500027944475, "yes": 0.13750215352474685}, "ground_truth": 1}, {"key": "34314544", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.98755211590445, "res": {"Yes": 0.98755211590445, "yes": 0.011265868622415352}, "ground_truth": 0}, {"key": "34314544", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7615817717067372, "res": {"Yes": 0.7615817717067372, "yes": 0.23385294531492284}, "ground_truth": 0}, {"key": "33460074", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.985597374565057, "res": {"Yes": 0.985597374565057, " Yes": 0.006660622702011859}, "ground_truth": 0}, {"key": "33460074", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9055430137735294, "res": {"Yes": 0.9055430137735294, "yes": 0.09121183687753845}, "ground_truth": 0}, {"key": "33460074", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9305443830237512, "res": {"Yes": 0.9305443830237512, "yes": 0.06824418841303648}, "ground_truth": 1}, {"key": "33460074", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8216040097199595, "res": {"Yes": 0.8216040097199595, "yes": 0.1667198033400607}, "ground_truth": 0}, {"key": "33460074", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9792650878481275, "res": {"Yes": 0.9792650878481275, "yes": 0.018989378166640616}, "ground_truth": 0}, {"key": "36191495", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9068018592581645, "res": {"Yes": 0.9068018592581645, "yes": 0.08811513212552842}, "ground_truth": 0}, {"key": "36191495", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8431531541170197, "res": {"Yes": 0.8431531541170197, "yes": 0.1441172463402025}, "ground_truth": 0}, {"key": "36191495", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9147479190218775, "res": {"Yes": 0.9147479190218775, "yes": 0.07623397115144807}, "ground_truth": 1}, {"key": "36191495", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7685245278400926, "res": {"Yes": 0.7685245278400926, "yes": 0.21750059343942374}, "ground_truth": 0}, {"key": "36191495", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6390584868987952, "res": {"Yes": 0.6390584868987952, "yes": 0.3467023666832228}, "ground_truth": 0}, {"key": "39532668", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9903692060228483, "res": {"Yes": 0.9903692060228483, "yes": 0.006782968099749424}, "ground_truth": 0}, {"key": "39532668", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.876944405759102, "res": {"Yes": 0.876944405759102, "yes": 0.1202814338498047}, "ground_truth": 0}, {"key": "39532668", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9881339811271218, "res": {"Yes": 0.9881339811271218, "yes": 0.00983606053710918}, "ground_truth": 1}, {"key": "39532668", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.985585565208065, "res": {"Yes": 0.985585565208065, "yes": 0.012418766785990467}, "ground_truth": 0}, {"key": "39532668", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.984729562839698, "res": {"Yes": 0.984729562839698, "yes": 0.013214652858745122}, "ground_truth": 0}, {"key": "20328247", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.502657527808469, "res": {"Yes": 0.502657527808469, "yes": 0.3721537736857122}, "ground_truth": 0}, {"key": "20328247", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.396733423962906, "res": {"yes": 0.5516272679515091, "Yes": 0.396733423962906}, "ground_truth": 0}, {"key": "20328247", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6920138286263823, "res": {"Yes": 0.6920138286263823, "yes": 0.2506014125159352}, "ground_truth": 1}, {"key": "20328247", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4719656365341453, "res": {"Yes": 0.4719656365341453, "yes": 0.4638856041187905}, "ground_truth": 0}, {"key": "20328247", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7250093990533353, "res": {"Yes": 0.7250093990533353, "yes": 0.214050523650673}, "ground_truth": 0}, {"key": "39112675", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7916222800963878, "res": {"Yes": 0.7916222800963878, "yes": 0.19772766981499398}, "ground_truth": 0}, {"key": "39112675", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6364869906996953, "res": {"Yes": 0.6364869906996953, "yes": 0.3452697097432436}, "ground_truth": 0}, {"key": "39112675", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7063659521232677, "res": {"Yes": 0.7063659521232677, "yes": 0.28487907129630746}, "ground_truth": 1}, {"key": "39112675", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7825639927808453, "res": {"Yes": 0.7825639927808453, "yes": 0.21123234293031365}, "ground_truth": 0}, {"key": "39112675", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.5394175676144201, "res": {"Yes": 0.5394175676144201, "yes": 0.4557560184114362}, "ground_truth": 0}, {"key": "31620300", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.4095347048301724, "res": {"yes": 0.5756081355700067, "Yes": 0.4095347048301724}, "ground_truth": 0}, {"key": "31620300", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.5938230913432109, "res": {"Yes": 0.5938230913432109, "yes": 0.39595190386114426}, "ground_truth": 0}, {"key": "31620300", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.44017228583193196, "res": {"yes": 0.5546254266637832, "Yes": 0.44017228583193196}, "ground_truth": 1}, {"key": "31620300", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7428661489164843, "res": {"Yes": 0.7428661489164843, "yes": 0.25121910307662076}, "ground_truth": 0}, {"key": "31620300", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6960845125094048, "res": {"Yes": 0.6960845125094048, "yes": 0.27964688407723043}, "ground_truth": 0}, {"key": "37518509", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8831504922508888, "res": {"Yes": 0.8831504922508888, "yes": 0.11085584670684649}, "ground_truth": 0}, {"key": "37518509", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6006145727945963, "res": {"Yes": 0.6006145727945963, "yes": 0.3938648534792814}, "ground_truth": 0}, {"key": "37518509", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8312540872763344, "res": {"Yes": 0.8312540872763344, "yes": 0.1626432544039948}, "ground_truth": 1}, {"key": "37518509", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7296236176962354, "res": {"Yes": 0.7296236176962354, "yes": 0.2632186824755814}, "ground_truth": 0}, {"key": "37518509", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.649016168269846, "res": {"Yes": 0.649016168269846, "yes": 0.34519286125940835}, "ground_truth": 0}, {"key": "35454095", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9795083077600235, "res": {"Yes": 0.9795083077600235, "yes": 0.013383562555092182}, "ground_truth": 0}, {"key": "35454095", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8346830558666923, "res": {"Yes": 0.8346830558666923, "yes": 0.16344569246471896}, "ground_truth": 0}, {"key": "35454095", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8082369245416477, "res": {"Yes": 0.8082369245416477, "yes": 0.18916668904408}, "ground_truth": 1}, {"key": "35454095", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7425837797429465, "res": {"Yes": 0.7425837797429465, "yes": 0.2138924629157722}, "ground_truth": 0}, {"key": "35454095", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.988934159872137, "res": {"Yes": 0.988934159872137, "yes": 0.007290337046081106}, "ground_truth": 0}, {"key": "38542788", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9433616215063656, "res": {"Yes": 0.9433616215063656, "yes": 0.055397766606700684}, "ground_truth": 0}, {"key": "38542788", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9782529297571098, "res": {"Yes": 0.9782529297571098, "yes": 0.01847836482831068}, "ground_truth": 0}, {"key": "38542788", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7609746904135499, "res": {"Yes": 0.7609746904135499, "yes": 0.23582732865277692}, "ground_truth": 1}, {"key": "38542788", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9049339421913658, "res": {"Yes": 0.9049339421913658, "yes": 0.09327131196774584}, "ground_truth": 0}, {"key": "38542788", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8806178020905617, "res": {"Yes": 0.8806178020905617, "yes": 0.11754030299472427}, "ground_truth": 0}, {"key": "23944937", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7661808044139411, "res": {"Yes": 0.7661808044139411, "yes": 0.22776488231850248}, "ground_truth": 0}, {"key": "23944937", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7374990344421517, "res": {"Yes": 0.7374990344421517, "yes": 0.25633540916613834}, "ground_truth": 0}, {"key": "23944937", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.707480112075838, "res": {"Yes": 0.707480112075838, "yes": 0.2880070557541316}, "ground_truth": 1}, {"key": "23944937", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7762618501168158, "res": {"Yes": 0.7762618501168158, "yes": 0.21178814923513423}, "ground_truth": 0}, {"key": "23944937", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7262622525334865, "res": {"Yes": 0.7262622525334865, "yes": 0.26836875304964525}, "ground_truth": 0}, {"key": "31753944", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7863651790962533, "res": {"Yes": 0.7863651790962533, "yes": 0.20675949936316887}, "ground_truth": 0}, {"key": "31753944", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7830016153238807, "res": {"Yes": 0.7830016153238807, "yes": 0.21370214057032905}, "ground_truth": 0}, {"key": "31753944", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8100136496806858, "res": {"Yes": 0.8100136496806858, "yes": 0.18433907837910338}, "ground_truth": 1}, {"key": "31753944", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8320585400492534, "res": {"Yes": 0.8320585400492534, "yes": 0.16530528541234554}, "ground_truth": 0}, {"key": "31753944", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7737872608154923, "res": {"Yes": 0.7737872608154923, "yes": 0.2202363700667823}, "ground_truth": 0}, {"key": "35527214", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8674464137173795, "res": {"Yes": 0.8674464137173795, "yes": 0.12036154389413635}, "ground_truth": 0}, {"key": "35527214", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8839074238323521, "res": {"Yes": 0.8839074238323521, "yes": 0.10949679495185771}, "ground_truth": 0}, {"key": "35527214", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7507561953036742, "res": {"Yes": 0.7507561953036742, "yes": 0.2365229111130542}, "ground_truth": 1}, {"key": "35527214", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9219319915706881, "res": {"Yes": 0.9219319915706881, "yes": 0.0647565192547298}, "ground_truth": 0}, {"key": "35527214", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8197352964871037, "res": {"Yes": 0.8197352964871037, "yes": 0.1693018605906517}, "ground_truth": 0}, {"key": "40400404", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9369513116938732, "res": {"Yes": 0.9369513116938732, "yes": 0.06009887238078116}, "ground_truth": 0}, {"key": "40400404", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8954040052900967, "res": {"Yes": 0.8954040052900967, "yes": 0.10278145922098611}, "ground_truth": 0}, {"key": "40400404", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9094950441697939, "res": {"Yes": 0.9094950441697939, "yes": 0.08924784538128133}, "ground_truth": 1}, {"key": "40400404", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9896972289225523, "res": {"Yes": 0.9896972289225523, "yes": 0.00850910935934687}, "ground_truth": 0}, {"key": "40400404", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8252411519222843, "res": {"Yes": 0.8252411519222843, "yes": 0.17348944798066349}, "ground_truth": 0}, {"key": "21713119", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8668948325177299, "res": {"Yes": 0.8668948325177299, "yes": 0.12576172924977283}, "ground_truth": 0}, {"key": "21713119", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9042953620703393, "res": {"Yes": 0.9042953620703393, "yes": 0.08586147819443052}, "ground_truth": 0}, {"key": "21713119", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.858488220297212, "res": {"Yes": 0.858488220297212, "yes": 0.13349781077038517}, "ground_truth": 1}, {"key": "21713119", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9076351484675631, "res": {"Yes": 0.9076351484675631, "yes": 0.08684034162680443}, "ground_truth": 0}, {"key": "21713119", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.958316624380345, "res": {"Yes": 0.958316624380345, "yes": 0.03335237149010714}, "ground_truth": 0}, {"key": "28730678", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8633947642380553, "res": {"Yes": 0.8633947642380553, "yes": 0.12886628018013555}, "ground_truth": 0}, {"key": "28730678", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7122752465789715, "res": {"Yes": 0.7122752465789715, "yes": 0.28320249703144884}, "ground_truth": 0}, {"key": "28730678", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8811391323786083, "res": {"Yes": 0.8811391323786083, "yes": 0.10885613836625066}, "ground_truth": 1}, {"key": "28730678", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8611502959776345, "res": {"Yes": 0.8611502959776345, "yes": 0.13336439302271472}, "ground_truth": 0}, {"key": "28730678", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6228227880599785, "res": {"Yes": 0.6228227880599785, "yes": 0.37110341818651593}, "ground_truth": 0}, {"key": "36823733", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8969698136981706, "res": {"Yes": 0.8969698136981706, "yes": 0.09373417766017031}, "ground_truth": 0}, {"key": "36823733", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9422517967168902, "res": {"Yes": 0.9422517967168902, "yes": 0.0536164797574272}, "ground_truth": 0}, {"key": "36823733", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8830041663377595, "res": {"Yes": 0.8830041663377595, "yes": 0.10894274608808896}, "ground_truth": 1}, {"key": "36823733", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8848620958626691, "res": {"Yes": 0.8848620958626691, "yes": 0.10389027342841933}, "ground_truth": 0}, {"key": "36823733", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8873122715688303, "res": {"Yes": 0.8873122715688303, "yes": 0.09675565491949491}, "ground_truth": 0}, {"key": "35988862", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.940934314279024, "res": {"Yes": 0.940934314279024, "yes": 0.055472077108323053}, "ground_truth": 0}, {"key": "35988862", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7863558921783275, "res": {"Yes": 0.7863558921783275, "yes": 0.211023052060925}, "ground_truth": 0}, {"key": "35988862", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8135398236895198, "res": {"Yes": 0.8135398236895198, "yes": 0.1829367892032187}, "ground_truth": 1}, {"key": "35988862", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8454047772416207, "res": {"Yes": 0.8454047772416207, "yes": 0.15003832762873864}, "ground_truth": 0}, {"key": "35988862", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6571763167914032, "res": {"Yes": 0.6571763167914032, "yes": 0.3396259208046475}, "ground_truth": 0}, {"key": "40499665", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9158487573535012, "res": {"Yes": 0.9158487573535012, "yes": 0.05843641727612863}, "ground_truth": 0}, {"key": "40499665", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9924848123063413, "res": {"Yes": 0.9924848123063413, "yes": 0.006418394310250275}, "ground_truth": 0}, {"key": "40499665", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7912523622991944, "res": {"Yes": 0.7912523622991944, "yes": 0.18517239925071824}, "ground_truth": 1}, {"key": "40499665", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8843827773610766, "res": {"Yes": 0.8843827773610766, "yes": 0.10453059241892286}, "ground_truth": 0}, {"key": "40499665", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9097733103587907, "res": {"Yes": 0.9097733103587907, "yes": 0.07933945762535878}, "ground_truth": 0}, {"key": "32829820", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8176334810234446, "res": {"Yes": 0.8176334810234446, "yes": 0.17176637677906792}, "ground_truth": 0}, {"key": "32829820", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9624767143461185, "res": {"Yes": 0.9624767143461185, "yes": 0.029447669680851336}, "ground_truth": 0}, {"key": "32829820", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4880836518958406, "res": {"Yes": 0.4880836518958406, "yes": 0.39239986316658443}, "ground_truth": 1}, {"key": "32829820", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.773275337820288, "res": {"Yes": 0.773275337820288, "yes": 0.21255440496953773}, "ground_truth": 0}, {"key": "32829820", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8577852175736423, "res": {"Yes": 0.8577852175736423, "yes": 0.13537502350935754}, "ground_truth": 0}, {"key": "20583553", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7637815041888193, "res": {"Yes": 0.7637815041888193, "yes": 0.2089277459390921}, "ground_truth": 0}, {"key": "20583553", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.902067588417533, "res": {"Yes": 0.902067588417533, "yes": 0.08807171968612057}, "ground_truth": 0}, {"key": "20583553", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9077355293890929, "res": {"Yes": 0.9077355293890929, "yes": 0.08604587454562634}, "ground_truth": 1}, {"key": "20583553", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9125570362494748, "res": {"Yes": 0.9125570362494748, "yes": 0.07426324747812364}, "ground_truth": 0}, {"key": "20583553", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8904112894716917, "res": {"Yes": 0.8904112894716917, "yes": 0.10052191656342492}, "ground_truth": 0}, {"key": "30501550", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8914636914411751, "res": {"Yes": 0.8914636914411751, "yes": 0.09900618064883054}, "ground_truth": 0}, {"key": "30501550", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9463587533021653, "res": {"Yes": 0.9463587533021653, "yes": 0.04417125238627101}, "ground_truth": 0}, {"key": "30501550", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9072535862655946, "res": {"Yes": 0.9072535862655946, "yes": 0.08423689254113222}, "ground_truth": 1}, {"key": "30501550", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9172113164819324, "res": {"Yes": 0.9172113164819324, "yes": 0.07305386012136059}, "ground_truth": 0}, {"key": "30501550", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.910820693422131, "res": {"Yes": 0.910820693422131, "yes": 0.07872935455723873}, "ground_truth": 0}, {"key": "38755897", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8286488634348672, "res": {"Yes": 0.8286488634348672, "yes": 0.16865572027907882}, "ground_truth": 0}, {"key": "38755897", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6897063304714639, "res": {"Yes": 0.6897063304714639, "yes": 0.3069281186547522}, "ground_truth": 0}, {"key": "38755897", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6152434351396999, "res": {"Yes": 0.6152434351396999, "yes": 0.3786777308507561}, "ground_truth": 1}, {"key": "38755897", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7411766707791256, "res": {"Yes": 0.7411766707791256, "yes": 0.25119576590597326}, "ground_truth": 0}, {"key": "38755897", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6571361054028587, "res": {"Yes": 0.6571361054028587, "yes": 0.33805630991272295}, "ground_truth": 0}, {"key": "35507201", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9766615583609132, "res": {"Yes": 0.9766615583609132, "yes": 0.019493322655058307}, "ground_truth": 0}, {"key": "35507201", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9803243673400266, "res": {"Yes": 0.9803243673400266, "yes": 0.01659717630827093}, "ground_truth": 0}, {"key": "35507201", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9823715467573415, "res": {"Yes": 0.9823715467573415, "yes": 0.015787441449094587}, "ground_truth": 1}, {"key": "35507201", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9874440036294767, "res": {"Yes": 0.9874440036294767, "yes": 0.010626323576757262}, "ground_truth": 0}, {"key": "35507201", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9014810257330794, "res": {"Yes": 0.9014810257330794, "yes": 0.09508065952377026}, "ground_truth": 0}, {"key": "36453511", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7434161476552256, "res": {"Yes": 0.7434161476552256, "yes": 0.2167890539178682}, "ground_truth": 0}, {"key": "36453511", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6956439817451888, "res": {"Yes": 0.6956439817451888, "yes": 0.2984967601432023}, "ground_truth": 0}, {"key": "36453511", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.736601395837088, "res": {"Yes": 0.736601395837088, "yes": 0.2567185064537317}, "ground_truth": 1}, {"key": "36453511", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9765651402548543, "res": {"Yes": 0.9765651402548543, "yes": 0.014288495157047878}, "ground_truth": 0}, {"key": "36453511", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9838372774990685, "res": {"Yes": 0.9838372774990685, "yes": 0.011474030667776386}, "ground_truth": 0}, {"key": "38066835", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8563867792271739, "res": {"Yes": 0.8563867792271739, "yes": 0.13887657360464314}, "ground_truth": 0}, {"key": "38066835", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8438209849694437, "res": {"Yes": 0.8438209849694437, "yes": 0.1520027448614343}, "ground_truth": 0}, {"key": "38066835", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.876376224695486, "res": {"Yes": 0.876376224695486, "yes": 0.11765805504283758}, "ground_truth": 1}, {"key": "38066835", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8754823914699084, "res": {"Yes": 0.8754823914699084, "yes": 0.12127570155981689}, "ground_truth": 0}, {"key": "38066835", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8123689988192647, "res": {"Yes": 0.8123689988192647, "yes": 0.18304111120090338}, "ground_truth": 0}, {"key": "39697181", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7371175879958504, "res": {"Yes": 0.7371175879958504, "yes": 0.2531325283332875}, "ground_truth": 0}, {"key": "39697181", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.841158130441236, "res": {"Yes": 0.841158130441236, "yes": 0.1480716353032179}, "ground_truth": 0}, {"key": "39697181", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7446288817650282, "res": {"Yes": 0.7446288817650282, "yes": 0.24981723212074697}, "ground_truth": 1}, {"key": "39697181", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7926384406259146, "res": {"Yes": 0.7926384406259146, "yes": 0.1997960865093106}, "ground_truth": 0}, {"key": "39697181", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.690454936770208, "res": {"Yes": 0.690454936770208, "yes": 0.3004857942003581}, "ground_truth": 0}, {"key": "21820893", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9695732575084061, "res": {"Yes": 0.9695732575084061, "yes": 0.02777858996898772}, "ground_truth": 0}, {"key": "21820893", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9714891029055656, "res": {"Yes": 0.9714891029055656, "yes": 0.02567065675222476}, "ground_truth": 0}, {"key": "21820893", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9654394332979317, "res": {"Yes": 0.9654394332979317, "yes": 0.030636689040026772}, "ground_truth": 1}, {"key": "21820893", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.43775972157947807, "res": {"yes": 0.4686195049008184, "Yes": 0.43775972157947807}, "ground_truth": 0}, {"key": "21820893", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8903880411364319, "res": {"Yes": 0.8903880411364319, "yes": 0.06345811481080894}, "ground_truth": 0}, {"key": "40519933", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9994125891333553, "res": {"Yes": 0.9994125891333553, " Yes": 0.0003232673073692699}, "ground_truth": 0}, {"key": "40519933", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9478891253015654, "res": {"Yes": 0.9478891253015654, "yes": 0.04399016028320218}, "ground_truth": 0}, {"key": "40519933", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9425427241728517, "res": {"Yes": 0.9425427241728517, "yes": 0.0435191024716824}, "ground_truth": 1}, {"key": "40519933", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.885975341525216, "res": {"Yes": 0.885975341525216, "yes": 0.07037049561791424}, "ground_truth": 0}, {"key": "40519933", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8625881779482291, "res": {"Yes": 0.8625881779482291, "yes": 0.0970097102324305}, "ground_truth": 0}, {"key": "30446033", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7683831707549129, "res": {"Yes": 0.7683831707549129, "yes": 0.22832790577477335}, "ground_truth": 0}, {"key": "30446033", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.5181882737092873, "res": {"Yes": 0.5181882737092873, "yes": 0.47737012289289227}, "ground_truth": 0}, {"key": "30446033", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7676515958517636, "res": {"Yes": 0.7676515958517636, "yes": 0.22921065670497912}, "ground_truth": 1}, {"key": "30446033", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.724050078347663, "res": {"Yes": 0.724050078347663, "yes": 0.27121264442954895}, "ground_truth": 0}, {"key": "30446033", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.5495706730679467, "res": {"Yes": 0.5495706730679467, "yes": 0.44325314000645305}, "ground_truth": 0}, {"key": "40216291", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8468958629498315, "res": {"Yes": 0.8468958629498315, "yes": 0.14880838609587568}, "ground_truth": 0}, {"key": "40216291", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9050490480594756, "res": {"Yes": 0.9050490480594756, "yes": 0.0917246430749435}, "ground_truth": 0}, {"key": "40216291", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9769458140924212, "res": {"Yes": 0.9769458140924212, "yes": 0.01935931654857252}, "ground_truth": 1}, {"key": "40216291", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9725625955079845, "res": {"Yes": 0.9725625955079845, "yes": 0.021676759134520614}, "ground_truth": 0}, {"key": "40216291", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.935392394003878, "res": {"Yes": 0.935392394003878, "yes": 0.05914159470544881}, "ground_truth": 0}, {"key": "33479118", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7245760889721605, "res": {"Yes": 0.7245760889721605, "yes": 0.2572445474557229}, "ground_truth": 0}, {"key": "33479118", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7024248911518751, "res": {"Yes": 0.7024248911518751, "yes": 0.2845959283498151}, "ground_truth": 0}, {"key": "33479118", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7385067624833659, "res": {"Yes": 0.7385067624833659, "yes": 0.2489228673959715}, "ground_truth": 1}, {"key": "33479118", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.450431052617763, "res": {"yes": 0.48400623021660305, "Yes": 0.450431052617763}, "ground_truth": 0}, {"key": "33479118", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6872569769539681, "res": {"Yes": 0.6872569769539681, "yes": 0.2870922460663148}, "ground_truth": 0}, {"key": "22297373", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8587176733473552, "res": {"Yes": 0.8587176733473552, "yes": 0.1248531540325447}, "ground_truth": 0}, {"key": "22297373", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8907046526230714, "res": {"Yes": 0.8907046526230714, "yes": 0.10124650420114331}, "ground_truth": 0}, {"key": "22297373", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8376036012068, "res": {"Yes": 0.8376036012068, "yes": 0.14100874066406757}, "ground_truth": 1}, {"key": "22297373", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7521233042921909, "res": {"Yes": 0.7521233042921909, "yes": 0.22446084428409122}, "ground_truth": 0}, {"key": "22297373", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8671146355567257, "res": {"Yes": 0.8671146355567257, "yes": 0.12159450495329156}, "ground_truth": 0}, {"key": "36463668", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7795394440754526, "res": {"Yes": 0.7795394440754526, "yes": 0.20185505270849}, "ground_truth": 0}, {"key": "36463668", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6497680863422038, "res": {"Yes": 0.6497680863422038, "yes": 0.32397643413056115}, "ground_truth": 0}, {"key": "36463668", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.614314037414351, "res": {"Yes": 0.614314037414351, "yes": 0.3629679647460359}, "ground_truth": 1}, {"key": "36463668", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6258638334456734, "res": {"Yes": 0.6258638334456734, "yes": 0.34333288234008363}, "ground_truth": 0}, {"key": "36463668", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7128605213032216, "res": {"Yes": 0.7128605213032216, "yes": 0.26095987998698533}, "ground_truth": 0}, {"key": "35264615", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8606796251712621, "res": {"Yes": 0.8606796251712621, "yes": 0.12706204099943724}, "ground_truth": 0}, {"key": "35264615", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7863030429748316, "res": {"Yes": 0.7863030429748316, "yes": 0.20543851230762886}, "ground_truth": 0}, {"key": "35264615", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7333173455970962, "res": {"Yes": 0.7333173455970962, "yes": 0.26181346354374097}, "ground_truth": 1}, {"key": "35264615", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8828158063219903, "res": {"Yes": 0.8828158063219903, "yes": 0.10849952325222348}, "ground_truth": 0}, {"key": "35264615", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7712057729824973, "res": {"Yes": 0.7712057729824973, "yes": 0.21632784955752313}, "ground_truth": 0}, {"key": "39898482", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9859840667429266, "res": {"Yes": 0.9859840667429266, " Yes": 0.011094941447374648}, "ground_truth": 0}, {"key": "39898482", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9893925530007566, "res": {"Yes": 0.9893925530007566, "yes": 0.004791755222117697}, "ground_truth": 0}, {"key": "39898482", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9764820429798757, "res": {"Yes": 0.9764820429798757, "yes": 0.012527334025885503}, "ground_truth": 1}, {"key": "39898482", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8738045109733874, "res": {"Yes": 0.8738045109733874, "yes": 0.12173916680410873}, "ground_truth": 0}, {"key": "39898482", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8935346187765388, "res": {"Yes": 0.8935346187765388, "yes": 0.10219204927320336}, "ground_truth": 0}, {"key": "37228721", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9734534085968413, "res": {"Yes": 0.9734534085968413, "yes": 0.02141468879765672}, "ground_truth": 0}, {"key": "37228721", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9594182430821029, "res": {"Yes": 0.9594182430821029, "yes": 0.032704683010579064}, "ground_truth": 0}, {"key": "37228721", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9722652332801079, "res": {"Yes": 0.9722652332801079, "yes": 0.023301361188723085}, "ground_truth": 1}, {"key": "37228721", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9689432038181061, "res": {"Yes": 0.9689432038181061, "yes": 0.026662209251946494}, "ground_truth": 0}, {"key": "37228721", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7483677031822167, "res": {"Yes": 0.7483677031822167, "yes": 0.24828780540947054}, "ground_truth": 0}, {"key": "24535799", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8216712278935167, "res": {"Yes": 0.8216712278935167, "yes": 0.16627961166975905}, "ground_truth": 0}, {"key": "24535799", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8961648755902171, "res": {"Yes": 0.8961648755902171, "yes": 0.09888796859284145}, "ground_truth": 0}, {"key": "24535799", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8520213532217815, "res": {"Yes": 0.8520213532217815, "yes": 0.1408311674534548}, "ground_truth": 1}, {"key": "24535799", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8498586563261964, "res": {"Yes": 0.8498586563261964, "yes": 0.13943076627804282}, "ground_truth": 0}, {"key": "24535799", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8499423460284797, "res": {"Yes": 0.8499423460284797, "yes": 0.14389659897647206}, "ground_truth": 0}, {"key": "35177759", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8286711378156827, "res": {"Yes": 0.8286711378156827, "yes": 0.1553514669992041}, "ground_truth": 0}, {"key": "35177759", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8483587571322941, "res": {"Yes": 0.8483587571322941, "yes": 0.14608300374763236}, "ground_truth": 0}, {"key": "35177759", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5519167275348867, "res": {"Yes": 0.5519167275348867, "yes": 0.44415943196580576}, "ground_truth": 1}, {"key": "35177759", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7520757114353491, "res": {"Yes": 0.7520757114353491, "yes": 0.24430041548586875}, "ground_truth": 0}, {"key": "35177759", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.5564872887205025, "res": {"Yes": 0.5564872887205025, "yes": 0.43786582988328493}, "ground_truth": 0}, {"key": "34364829", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8415908661636637, "res": {"Yes": 0.8415908661636637, "yes": 0.1546320894511874}, "ground_truth": 0}, {"key": "34364829", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9639360834932497, "res": {"Yes": 0.9639360834932497, "yes": 0.02667381250329629}, "ground_truth": 0}, {"key": "34364829", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9721699151247742, "res": {"Yes": 0.9721699151247742, "yes": 0.02321519402041126}, "ground_truth": 1}, {"key": "34364829", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9804562494441126, "res": {"Yes": 0.9804562494441126, "yes": 0.0119568576891673}, "ground_truth": 0}, {"key": "34364829", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9842183048633866, "res": {"Yes": 0.9842183048633866, "yes": 0.011237228945603719}, "ground_truth": 0}, {"key": "38090732", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7422078305152632, "res": {"Yes": 0.7422078305152632, "yes": 0.2539241445678735}, "ground_truth": 0}, {"key": "38090732", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.635669852667658, "res": {"Yes": 0.635669852667658, "yes": 0.35990232636410374}, "ground_truth": 0}, {"key": "38090732", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7050500188240583, "res": {"Yes": 0.7050500188240583, "yes": 0.2918514322084568}, "ground_truth": 1}, {"key": "38090732", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7733535972443355, "res": {"Yes": 0.7733535972443355, "yes": 0.22321786753474074}, "ground_truth": 0}, {"key": "38090732", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.526882065560208, "res": {"Yes": 0.526882065560208, "yes": 0.4697098900834556}, "ground_truth": 0}, {"key": "30651479", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.696619659060993, "res": {"Yes": 0.696619659060993, "yes": 0.2983176561422518}, "ground_truth": 0}, {"key": "30651479", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7254639121028762, "res": {"Yes": 0.7254639121028762, "yes": 0.27089480686727224}, "ground_truth": 0}, {"key": "30651479", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7931980188557829, "res": {"Yes": 0.7931980188557829, "yes": 0.2004568234128098}, "ground_truth": 1}, {"key": "30651479", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.986660862451451, "res": {"Yes": 0.986660862451451, "yes": 0.010181814928579273}, "ground_truth": 0}, {"key": "30651479", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9666871589370291, "res": {"Yes": 0.9666871589370291, "yes": 0.028806123295857126}, "ground_truth": 0}, {"key": "39380921", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9169397245685138, "res": {"Yes": 0.9169397245685138, "yes": 0.07920635844403617}, "ground_truth": 0}, {"key": "39380921", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8391944377169553, "res": {"Yes": 0.8391944377169553, "yes": 0.1556094712304497}, "ground_truth": 0}, {"key": "39380921", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8662843721635014, "res": {"Yes": 0.8662843721635014, "yes": 0.12934986681485386}, "ground_truth": 1}, {"key": "39380921", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8691069054951378, "res": {"Yes": 0.8691069054951378, "yes": 0.12450171351492473}, "ground_truth": 0}, {"key": "39380921", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8683978298520071, "res": {"Yes": 0.8683978298520071, "yes": 0.12527756291924724}, "ground_truth": 0}, {"key": "39037490", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8908170265566409, "res": {"Yes": 0.8908170265566409, "yes": 0.09835945524784304}, "ground_truth": 0}, {"key": "39037490", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8283442252589432, "res": {"Yes": 0.8283442252589432, "yes": 0.15969059774144642}, "ground_truth": 0}, {"key": "39037490", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8401538408874046, "res": {"Yes": 0.8401538408874046, "yes": 0.1511850796000099}, "ground_truth": 1}, {"key": "39037490", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9124284021510622, "res": {"Yes": 0.9124284021510622, "yes": 0.0797509041561707}, "ground_truth": 0}, {"key": "39037490", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9368740819803167, "res": {"Yes": 0.9368740819803167, "yes": 0.06001391661928842}, "ground_truth": 0}, {"key": "35917499", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7909668271162763, "res": {"Yes": 0.7909668271162763, "yes": 0.19174193492200733}, "ground_truth": 0}, {"key": "35917499", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6245162306015182, "res": {"Yes": 0.6245162306015182, "yes": 0.36687899470341584}, "ground_truth": 0}, {"key": "35917499", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7206170644915132, "res": {"Yes": 0.7206170644915132, "yes": 0.2688671735375363}, "ground_truth": 1}, {"key": "35917499", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3996189543597323, "res": {"yes": 0.5923353114311939, "Yes": 0.3996189543597323}, "ground_truth": 0}, {"key": "35917499", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.5146888477676904, "res": {"Yes": 0.5146888477676904, "yes": 0.4711084295978067}, "ground_truth": 0}, {"key": "34908073", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7889142231125074, "res": {"Yes": 0.7889142231125074, "yes": 0.20646895848233393}, "ground_truth": 0}, {"key": "34908073", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7229470760404229, "res": {"Yes": 0.7229470760404229, "yes": 0.27403284705946607}, "ground_truth": 0}, {"key": "34908073", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7825056782849285, "res": {"Yes": 0.7825056782849285, "yes": 0.21404024947212003}, "ground_truth": 1}, {"key": "34908073", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9732873788624445, "res": {"Yes": 0.9732873788624445, "yes": 0.020102763214650654}, "ground_truth": 0}, {"key": "34908073", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.772174745729346, "res": {"Yes": 0.772174745729346, "yes": 0.22361874689582406}, "ground_truth": 0}, {"key": "36344759", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9264423487160979, "res": {"Yes": 0.9264423487160979, "yes": 0.07006307999026808}, "ground_truth": 0}, {"key": "36344759", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9195671948603718, "res": {"Yes": 0.9195671948603718, "yes": 0.07576568449395428}, "ground_truth": 0}, {"key": "36344759", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9231542503219808, "res": {"Yes": 0.9231542503219808, "yes": 0.07083711446699195}, "ground_truth": 1}, {"key": "36344759", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8923515051102678, "res": {"Yes": 0.8923515051102678, "yes": 0.10112626443480704}, "ground_truth": 0}, {"key": "36344759", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9319905256125377, "res": {"Yes": 0.9319905256125377, "yes": 0.06237364803094166}, "ground_truth": 0}, {"key": "39984637", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7830199612666496, "res": {"Yes": 0.7830199612666496, "yes": 0.21389453768272512}, "ground_truth": 0}, {"key": "39984637", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.4465554045613739, "res": {"yes": 0.5476427065621771, "Yes": 0.4465554045613739}, "ground_truth": 0}, {"key": "39984637", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5834770089515579, "res": {"Yes": 0.5834770089515579, "yes": 0.41463907746877465}, "ground_truth": 1}, {"key": "39984637", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5337696500686787, "res": {"Yes": 0.5337696500686787, "yes": 0.4619245790668414}, "ground_truth": 0}, {"key": "39984637", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7159481864022331, "res": {"Yes": 0.7159481864022331, "yes": 0.2805399892526844}, "ground_truth": 0}, {"key": "17917326", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8363274299145405, "res": {"Yes": 0.8363274299145405, "yes": 0.14385452976175028}, "ground_truth": 0}, {"key": "17917326", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8771772303224918, "res": {"Yes": 0.8771772303224918, "yes": 0.10783398612696117}, "ground_truth": 0}, {"key": "17917326", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.790441910133399, "res": {"Yes": 0.790441910133399, "yes": 0.18096182166369754}, "ground_truth": 1}, {"key": "17917326", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8706365832621525, "res": {"Yes": 0.8706365832621525, "yes": 0.11104086414147575}, "ground_truth": 0}, {"key": "17917326", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8657359712405971, "res": {"Yes": 0.8657359712405971, "yes": 0.1129433515145746}, "ground_truth": 0}, {"key": "32193638", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8750146586692426, "res": {"Yes": 0.8750146586692426, "yes": 0.12130956646301284}, "ground_truth": 0}, {"key": "32193638", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.900645419407134, "res": {"Yes": 0.900645419407134, "yes": 0.09699792426864906}, "ground_truth": 0}, {"key": "32193638", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9861953729499148, "res": {"Yes": 0.9861953729499148, "yes": 0.010875469831979127}, "ground_truth": 1}, {"key": "32193638", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9894380710521783, "res": {"Yes": 0.9894380710521783, "yes": 0.00878980897151139}, "ground_truth": 0}, {"key": "32193638", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9829610295752755, "res": {"Yes": 0.9829610295752755, "yes": 0.008709191646018395}, "ground_truth": 0}, {"key": "34564692", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.744205896371766, "res": {"Yes": 0.744205896371766, "yes": 0.24975890660681552}, "ground_truth": 0}, {"key": "34564692", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9724870595046701, "res": {"Yes": 0.9724870595046701, "yes": 0.018856952503752553}, "ground_truth": 0}, {"key": "34564692", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9764711298775459, "res": {"Yes": 0.9764711298775459, "yes": 0.016812737421413967}, "ground_truth": 1}, {"key": "34564692", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6832852116316513, "res": {"Yes": 0.6832852116316513, "yes": 0.30889468049669655}, "ground_truth": 0}, {"key": "34564692", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7580503126612333, "res": {"Yes": 0.7580503126612333, "yes": 0.23471302060773794}, "ground_truth": 0}, {"key": "39329284", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9452868864647284, "res": {"Yes": 0.9452868864647284, "yes": 0.05075760601871598}, "ground_truth": 0}, {"key": "39329284", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9367608863238444, "res": {"Yes": 0.9367608863238444, "yes": 0.05561974582698759}, "ground_truth": 0}, {"key": "39329284", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8440792674640553, "res": {"Yes": 0.8440792674640553, "yes": 0.1539562050200047}, "ground_truth": 1}, {"key": "39329284", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9035160814943094, "res": {"Yes": 0.9035160814943094, "yes": 0.08377721548922493}, "ground_truth": 0}, {"key": "39329284", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9534275815091137, "res": {"Yes": 0.9534275815091137, "yes": 0.04277989199466293}, "ground_truth": 0}, {"key": "37438541", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7658388040223328, "res": {"Yes": 0.7658388040223328, "yes": 0.21804766353990546}, "ground_truth": 0}, {"key": "37438541", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.904003497212089, "res": {"Yes": 0.904003497212089, "yes": 0.09148999106057597}, "ground_truth": 0}, {"key": "37438541", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6809849315657289, "res": {"Yes": 0.6809849315657289, "yes": 0.3088036935268609}, "ground_truth": 1}, {"key": "37438541", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9296087874429554, "res": {"Yes": 0.9296087874429554, "yes": 0.05936180339149548}, "ground_truth": 0}, {"key": "37438541", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7687522907718716, "res": {"Yes": 0.7687522907718716, "yes": 0.22318713255053343}, "ground_truth": 0}, {"key": "34652757", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9612810144263194, "res": {"Yes": 0.9612810144263194, "yes": 0.03186423047642096}, "ground_truth": 0}, {"key": "34652757", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9580031786600763, "res": {"Yes": 0.9580031786600763, "yes": 0.03775431897113568}, "ground_truth": 0}, {"key": "34652757", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9871781286594592, "res": {"Yes": 0.9871781286594592, "yes": 0.010355204866740223}, "ground_truth": 1}, {"key": "34652757", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9609739957032222, "res": {"Yes": 0.9609739957032222, "yes": 0.03393198157617257}, "ground_truth": 0}, {"key": "34652757", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7257505487233789, "res": {"Yes": 0.7257505487233789, "yes": 0.26804851198046786}, "ground_truth": 0}, {"key": "31361004", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.29612515294954594, "res": {"yes": 0.5970266062498096, "Yes": 0.29612515294954594}, "ground_truth": 0}, {"key": "31361004", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.5178619696959159, "res": {"Yes": 0.5178619696959159, "yes": 0.3707146165537494}, "ground_truth": 0}, {"key": "31361004", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2684620273968092, "res": {"yes": 0.7111504273770283, "Yes": 0.2684620273968092}, "ground_truth": 1}, {"key": "31361004", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.28058754481108245, "res": {"yes": 0.6508593580339183, "Yes": 0.28058754481108245}, "ground_truth": 0}, {"key": "31361004", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.2722711048672628, "res": {"yes": 0.6536261023223947, "Yes": 0.2722711048672628}, "ground_truth": 0}, {"key": "26150727", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.47171821678873116, "res": {"Yes": 0.47171821678873116, "yes": 0.4480514990218452}, "ground_truth": 0}, {"key": "26150727", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6893517884274873, "res": {"Yes": 0.6893517884274873, "yes": 0.2613279884156837}, "ground_truth": 0}, {"key": "26150727", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5228577502303329, "res": {"Yes": 0.5228577502303329, "yes": 0.4395888591177694}, "ground_truth": 1}, {"key": "26150727", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7407669578345617, "res": {"Yes": 0.7407669578345617, "yes": 0.22031128519483745}, "ground_truth": 0}, {"key": "26150727", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.5417233674476036, "res": {"Yes": 0.5417233674476036, "yes": 0.20325807539556354}, "ground_truth": 0}, {"key": "36997402", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8237639652883924, "res": {"Yes": 0.8237639652883924, "yes": 0.17147515357252183}, "ground_truth": 0}, {"key": "36997402", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.986928194955587, "res": {"Yes": 0.986928194955587, "yes": 0.010176481055153577}, "ground_truth": 0}, {"key": "36997402", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8207206280173835, "res": {"Yes": 0.8207206280173835, "yes": 0.15320648381791102}, "ground_truth": 1}, {"key": "36997402", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9895610935560886, "res": {"Yes": 0.9895610935560886, "yes": 0.009122151432775099}, "ground_truth": 0}, {"key": "36997402", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8748804067200893, "res": {"Yes": 0.8748804067200893, "yes": 0.11988894776156771}, "ground_truth": 0}, {"key": "37430643", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.5361238399045429, "res": {"Yes": 0.5361238399045429, "yes": 0.4471040443696307}, "ground_truth": 0}, {"key": "37430643", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7553976875003755, "res": {"Yes": 0.7553976875003755, "yes": 0.23442586073433153}, "ground_truth": 0}, {"key": "37430643", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9718237044447413, "res": {"Yes": 0.9718237044447413, "yes": 0.02235390394548678}, "ground_truth": 1}, {"key": "37430643", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7393663820796879, "res": {"Yes": 0.7393663820796879, "yes": 0.2507168159700942}, "ground_truth": 0}, {"key": "37430643", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.5546893453808777, "res": {"Yes": 0.5546893453808777, "yes": 0.4349482154162451}, "ground_truth": 0}, {"key": "36964631", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.77856228849249, "res": {"Yes": 0.77856228849249, "yes": 0.1639303723745935}, "ground_truth": 0}, {"key": "36964631", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9689768926210592, "res": {"Yes": 0.9689768926210592, "yes": 0.016319933568696796}, "ground_truth": 0}, {"key": "36964631", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9856745084625287, "res": {"Yes": 0.9856745084625287, "yes": 0.009825924784573677}, "ground_truth": 1}, {"key": "36964631", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8248357445377503, "res": {"Yes": 0.8248357445377503, "yes": 0.14002035137126148}, "ground_truth": 0}, {"key": "36964631", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7216782114148638, "res": {"Yes": 0.7216782114148638, "yes": 0.2085923312572321}, "ground_truth": 0}, {"key": "35502013", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.914823631266514, "res": {"Yes": 0.914823631266514, "yes": 0.07880614523052824}, "ground_truth": 0}, {"key": "35502013", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8439583364503177, "res": {"Yes": 0.8439583364503177, "yes": 0.14638061462295998}, "ground_truth": 0}, {"key": "35502013", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7191825062659045, "res": {"Yes": 0.7191825062659045, "yes": 0.2726763549710684}, "ground_truth": 1}, {"key": "35502013", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8058383942426796, "res": {"Yes": 0.8058383942426796, "yes": 0.18823354998465167}, "ground_truth": 0}, {"key": "35502013", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7766967561631349, "res": {"Yes": 0.7766967561631349, "yes": 0.21417967787553727}, "ground_truth": 0}, {"key": "33987664", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7414591722092299, "res": {"Yes": 0.7414591722092299, "yes": 0.2046282828847218}, "ground_truth": 0}, {"key": "33987664", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9791557031925888, "res": {"Yes": 0.9791557031925888, "yes": 0.017632314663684857}, "ground_truth": 0}, {"key": "33987664", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.794142462520648, "res": {"Yes": 0.794142462520648, "yes": 0.19433146410824204}, "ground_truth": 1}, {"key": "33987664", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6815079789434502, "res": {"Yes": 0.6815079789434502, "yes": 0.24070354654640944}, "ground_truth": 0}, {"key": "33987664", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9774329068225174, "res": {"Yes": 0.9774329068225174, "yes": 0.017214245692955415}, "ground_truth": 0}, {"key": "35203721", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7218999233609501, "res": {"Yes": 0.7218999233609501, "yes": 0.27204039845039696}, "ground_truth": 0}, {"key": "35203721", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6578173630095886, "res": {"Yes": 0.6578173630095886, "yes": 0.3372958299570492}, "ground_truth": 0}, {"key": "35203721", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6039480467870951, "res": {"Yes": 0.6039480467870951, "yes": 0.39189801847104}, "ground_truth": 1}, {"key": "35203721", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7264095534541087, "res": {"Yes": 0.7264095534541087, "yes": 0.26982655912640346}, "ground_truth": 0}, {"key": "35203721", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7393175115770203, "res": {"Yes": 0.7393175115770203, "yes": 0.25644332344882403}, "ground_truth": 0}, {"key": "39028348", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7676534842770122, "res": {"Yes": 0.7676534842770122, "yes": 0.2267563468183508}, "ground_truth": 0}, {"key": "39028348", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8716337018601659, "res": {"Yes": 0.8716337018601659, "yes": 0.12430714502720593}, "ground_truth": 0}, {"key": "39028348", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8065751139853902, "res": {"Yes": 0.8065751139853902, "yes": 0.18812088828692033}, "ground_truth": 1}, {"key": "39028348", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.750837821954372, "res": {"Yes": 0.750837821954372, "yes": 0.24514076068754379}, "ground_truth": 0}, {"key": "39028348", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7934790748998457, "res": {"Yes": 0.7934790748998457, "yes": 0.20411098274633313}, "ground_truth": 0}, {"key": "37459383", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8660360007317394, "res": {"Yes": 0.8660360007317394, "yes": 0.12471161338987703}, "ground_truth": 0}, {"key": "37459383", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9708255363849585, "res": {"Yes": 0.9708255363849585, "yes": 0.024415663644573135}, "ground_truth": 0}, {"key": "37459383", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9824479350091152, "res": {"Yes": 0.9824479350091152, "yes": 0.011779965846986956}, "ground_truth": 1}, {"key": "37459383", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9821217378636291, "res": {"Yes": 0.9821217378636291, "yes": 0.01239063541688907}, "ground_truth": 0}, {"key": "37459383", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8083856861538641, "res": {"Yes": 0.8083856861538641, "yes": 0.18691399270267584}, "ground_truth": 0}, {"key": "34020070", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8765965733750429, "res": {"Yes": 0.8765965733750429, "yes": 0.1115365989764844}, "ground_truth": 0}, {"key": "34020070", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8340664362699168, "res": {"Yes": 0.8340664362699168, "yes": 0.1561807950749852}, "ground_truth": 0}, {"key": "34020070", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8387529781517632, "res": {"Yes": 0.8387529781517632, "yes": 0.15339100949698276}, "ground_truth": 1}, {"key": "34020070", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8487276018757532, "res": {"Yes": 0.8487276018757532, "yes": 0.13591029587841455}, "ground_truth": 0}, {"key": "34020070", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8954543821365216, "res": {"Yes": 0.8954543821365216, "yes": 0.09155575024030935}, "ground_truth": 0}, {"key": "35176615", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9186304641200213, "res": {"Yes": 0.9186304641200213, "yes": 0.07054109719998328}, "ground_truth": 0}, {"key": "35176615", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8382190302823214, "res": {"Yes": 0.8382190302823214, "yes": 0.15044169176447966}, "ground_truth": 0}, {"key": "35176615", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7926502827326778, "res": {"Yes": 0.7926502827326778, "yes": 0.1932429062153994}, "ground_truth": 1}, {"key": "35176615", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7541064353460821, "res": {"Yes": 0.7541064353460821, "yes": 0.22867218672933068}, "ground_truth": 0}, {"key": "35176615", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9219180059684571, "res": {"Yes": 0.9219180059684571, "yes": 0.07172037511416707}, "ground_truth": 0}, {"key": "33296389", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7797709548886858, "res": {"Yes": 0.7797709548886858, "yes": 0.21605909948395466}, "ground_truth": 0}, {"key": "33296389", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6276360805832503, "res": {"Yes": 0.6276360805832503, "yes": 0.3681545117675286}, "ground_truth": 0}, {"key": "33296389", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6551009716740657, "res": {"Yes": 0.6551009716740657, "yes": 0.34083468976179543}, "ground_truth": 1}, {"key": "33296389", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6949117221319914, "res": {"Yes": 0.6949117221319914, "yes": 0.3008923257475178}, "ground_truth": 0}, {"key": "33296389", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7403899737804674, "res": {"Yes": 0.7403899737804674, "yes": 0.25203812833479305}, "ground_truth": 0}, {"key": "35399504", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.620715483903789, "res": {"Yes": 0.620715483903789, "yes": 0.3744655535855945}, "ground_truth": 0}, {"key": "35399504", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9781521973918096, "res": {"Yes": 0.9781521973918096, "yes": 0.018178409142041208}, "ground_truth": 0}, {"key": "35399504", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8726388126213002, "res": {"Yes": 0.8726388126213002, "yes": 0.12453015295395185}, "ground_truth": 1}, {"key": "35399504", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8364312496818563, "res": {"Yes": 0.8364312496818563, "yes": 0.16033448734378553}, "ground_truth": 0}, {"key": "35399504", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6066061168200365, "res": {"Yes": 0.6066061168200365, "yes": 0.3887258265228452}, "ground_truth": 0}, {"key": "34807886", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8566875608167079, "res": {"Yes": 0.8566875608167079, "yes": 0.13077403092493786}, "ground_truth": 0}, {"key": "34807886", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8362597235866037, "res": {"Yes": 0.8362597235866037, "yes": 0.1468075549336901}, "ground_truth": 0}, {"key": "34807886", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7850979986083017, "res": {"Yes": 0.7850979986083017, "yes": 0.19928592030380196}, "ground_truth": 1}, {"key": "34807886", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8291531585053764, "res": {"Yes": 0.8291531585053764, "yes": 0.15890556246168894}, "ground_truth": 0}, {"key": "34807886", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8654503255042264, "res": {"Yes": 0.8654503255042264, "yes": 0.12271653214908942}, "ground_truth": 0}, {"key": "37629813", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7086206110296228, "res": {"Yes": 0.7086206110296228, "yes": 0.28172791900833777}, "ground_truth": 0}, {"key": "37629813", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6943855397121997, "res": {"Yes": 0.6943855397121997, "yes": 0.2978718257308106}, "ground_truth": 0}, {"key": "37629813", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7112561833083378, "res": {"Yes": 0.7112561833083378, "yes": 0.2785244866891357}, "ground_truth": 1}, {"key": "37629813", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5148480862440042, "res": {"Yes": 0.5148480862440042, "yes": 0.4783873925779166}, "ground_truth": 0}, {"key": "37629813", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.5501554248722648, "res": {"Yes": 0.5501554248722648, "yes": 0.44167040725905643}, "ground_truth": 0}, {"key": "28084389", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9189181691071747, "res": {"Yes": 0.9189181691071747, "yes": 0.0784532321911352}, "ground_truth": 0}, {"key": "28084389", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9127167933410018, "res": {"Yes": 0.9127167933410018, "yes": 0.08363001434996445}, "ground_truth": 0}, {"key": "28084389", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9504686853938449, "res": {"Yes": 0.9504686853938449, "yes": 0.045950938776380915}, "ground_truth": 1}, {"key": "28084389", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9275464081602521, "res": {"Yes": 0.9275464081602521, "yes": 0.06909600255683897}, "ground_truth": 0}, {"key": "28084389", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8256273643502778, "res": {"Yes": 0.8256273643502778, "yes": 0.16935740070900693}, "ground_truth": 0}, {"key": "35391734", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9403795016815845, "res": {"Yes": 0.9403795016815845, "yes": 0.047936330332302955}, "ground_truth": 0}, {"key": "35391734", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7659900108441573, "res": {"Yes": 0.7659900108441573, "yes": 0.22357387111675714}, "ground_truth": 0}, {"key": "35391734", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8208149670679293, "res": {"Yes": 0.8208149670679293, "yes": 0.1618654571880679}, "ground_truth": 1}, {"key": "35391734", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8805542854192533, "res": {"Yes": 0.8805542854192533, "yes": 0.1066128745662892}, "ground_truth": 0}, {"key": "35391734", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9042188474512085, "res": {"Yes": 0.9042188474512085, "yes": 0.08615720382027943}, "ground_truth": 0}, {"key": "40214591", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8126212135481009, "res": {"Yes": 0.8126212135481009, "yes": 0.17771509515819728}, "ground_truth": 0}, {"key": "40214591", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8997340134047167, "res": {"Yes": 0.8997340134047167, "yes": 0.09600045000242757}, "ground_truth": 0}, {"key": "40214591", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8317281369763396, "res": {"Yes": 0.8317281369763396, "yes": 0.15660635737266226}, "ground_truth": 1}, {"key": "40214591", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8114319926801267, "res": {"Yes": 0.8114319926801267, "yes": 0.17873718362117286}, "ground_truth": 0}, {"key": "40214591", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8289109650805667, "res": {"Yes": 0.8289109650805667, "yes": 0.15902950944319175}, "ground_truth": 0}, {"key": "26283171", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9696435250284905, "res": {"Yes": 0.9696435250284905, "yes": 0.027933628315421913}, "ground_truth": 0}, {"key": "26283171", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9793065929198943, "res": {"Yes": 0.9793065929198943, "yes": 0.019375058197602726}, "ground_truth": 0}, {"key": "26283171", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9288247849857325, "res": {"Yes": 0.9288247849857325, "yes": 0.0692976211649188}, "ground_truth": 1}, {"key": "26283171", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9889808000958419, "res": {"Yes": 0.9889808000958419, "yes": 0.009258603978042226}, "ground_truth": 0}, {"key": "26283171", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9903078317738363, "res": {"Yes": 0.9903078317738363, "yes": 0.006081669693888983}, "ground_truth": 0}, {"key": "37084030", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.643209282161675, "res": {"Yes": 0.643209282161675, "yes": 0.34740257951541764}, "ground_truth": 0}, {"key": "37084030", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7674948420495343, "res": {"Yes": 0.7674948420495343, "yes": 0.2228407696526999}, "ground_truth": 0}, {"key": "37084030", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8497477655053614, "res": {"Yes": 0.8497477655053614, "yes": 0.1407641899989478}, "ground_truth": 1}, {"key": "37084030", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8227845078119541, "res": {"Yes": 0.8227845078119541, "yes": 0.16703164391405598}, "ground_truth": 0}, {"key": "37084030", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8255354688828892, "res": {"Yes": 0.8255354688828892, "yes": 0.16399946719076747}, "ground_truth": 0}, {"key": "39027295", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7568518132433931, "res": {"Yes": 0.7568518132433931, "yes": 0.23760023557900736}, "ground_truth": 0}, {"key": "39027295", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7888775078989334, "res": {"Yes": 0.7888775078989334, "yes": 0.19284434848394671}, "ground_truth": 0}, {"key": "39027295", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.795821193223489, "res": {"Yes": 0.795821193223489, "yes": 0.19971151075223378}, "ground_truth": 1}, {"key": "39027295", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8118515871269081, "res": {"Yes": 0.8118515871269081, "yes": 0.18353490712772158}, "ground_truth": 0}, {"key": "39027295", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7703923102894071, "res": {"Yes": 0.7703923102894071, "yes": 0.22479641693294813}, "ground_truth": 0}, {"key": "14018647", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8900559705276668, "res": {"Yes": 0.8900559705276668, "yes": 0.10627355517327622}, "ground_truth": 0}, {"key": "14018647", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8735348965049532, "res": {"Yes": 0.8735348965049532, "yes": 0.12177777641513264}, "ground_truth": 0}, {"key": "14018647", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.914696840924193, "res": {"Yes": 0.914696840924193, "yes": 0.08217509527636783}, "ground_truth": 1}, {"key": "14018647", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8701811244315926, "res": {"Yes": 0.8701811244315926, "yes": 0.12669969486306148}, "ground_truth": 0}, {"key": "14018647", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7796660905447237, "res": {"Yes": 0.7796660905447237, "yes": 0.2157070374539215}, "ground_truth": 0}, {"key": "37424289", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8917212725074259, "res": {"Yes": 0.8917212725074259, "yes": 0.09379458067113992}, "ground_truth": 0}, {"key": "37424289", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.3983587092833037, "res": {"yes": 0.5681523354524843, "Yes": 0.3983587092833037}, "ground_truth": 0}, {"key": "37424289", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9013484672863555, "res": {"Yes": 0.9013484672863555, "yes": 0.09529698554876108}, "ground_truth": 1}, {"key": "37424289", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9876346609349929, "res": {"Yes": 0.9876346609349929, "yes": 0.009538936287505847}, "ground_truth": 0}, {"key": "37424289", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7517765801799691, "res": {"Yes": 0.7517765801799691, "yes": 0.22530467086526623}, "ground_truth": 0}, {"key": "37498031", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9251067987744026, "res": {"Yes": 0.9251067987744026, "yes": 0.06815396105032709}, "ground_truth": 0}, {"key": "37498031", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9339561457631564, "res": {"Yes": 0.9339561457631564, "yes": 0.05825797418650799}, "ground_truth": 0}, {"key": "37498031", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9421584505709569, "res": {"Yes": 0.9421584505709569, "yes": 0.04473191723765461}, "ground_truth": 1}, {"key": "37498031", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9412047738998555, "res": {"Yes": 0.9412047738998555, "yes": 0.05352187661333882}, "ground_truth": 0}, {"key": "37498031", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9207971469741866, "res": {"Yes": 0.9207971469741866, "yes": 0.07241594275742025}, "ground_truth": 0}, {"key": "30104095", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8893058450115278, "res": {"Yes": 0.8893058450115278, "yes": 0.10448452548768214}, "ground_truth": 0}, {"key": "30104095", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9092116351102556, "res": {"Yes": 0.9092116351102556, "yes": 0.08487073030953894}, "ground_truth": 0}, {"key": "30104095", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7179318003717443, "res": {"Yes": 0.7179318003717443, "yes": 0.27463468022877546}, "ground_truth": 1}, {"key": "30104095", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9279542471977319, "res": {"Yes": 0.9279542471977319, "yes": 0.06799681589336043}, "ground_truth": 0}, {"key": "30104095", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8454320419853283, "res": {"Yes": 0.8454320419853283, "yes": 0.14896143977289353}, "ground_truth": 0}, {"key": "37911407", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9587329270202841, "res": {"Yes": 0.9587329270202841, "yes": 0.03836248691978161}, "ground_truth": 0}, {"key": "37911407", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7204347426178506, "res": {"Yes": 0.7204347426178506, "yes": 0.27477442243102346}, "ground_truth": 0}, {"key": "37911407", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9739382701670009, "res": {"Yes": 0.9739382701670009, "yes": 0.016539898930763023}, "ground_truth": 1}, {"key": "37911407", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.761581611774582, "res": {"Yes": 0.761581611774582, "yes": 0.2369161021231483}, "ground_truth": 0}, {"key": "37911407", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9779151386584132, "res": {"Yes": 0.9779151386584132, "yes": 0.020674266837646255}, "ground_truth": 0}, {"key": "39177472", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9799779221398626, "res": {"Yes": 0.9799779221398626, "yes": 0.015748036313365243}, "ground_truth": 0}, {"key": "39177472", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9763205374192334, "res": {"Yes": 0.9763205374192334, "yes": 0.02269082321245294}, "ground_truth": 0}, {"key": "39177472", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9813917811296794, "res": {"Yes": 0.9813917811296794, "yes": 0.017653291954347942}, "ground_truth": 1}, {"key": "39177472", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.977870561412852, "res": {"Yes": 0.977870561412852, "yes": 0.020701005603739112}, "ground_truth": 0}, {"key": "39177472", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9876159345717227, "res": {"Yes": 0.9876159345717227, "yes": 0.010738434113705024}, "ground_truth": 0}, {"key": "32325454", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9829226712525099, "res": {"Yes": 0.9829226712525099, "yes": 0.014347285764467742}, "ground_truth": 0}, {"key": "32325454", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9043056973210931, "res": {"Yes": 0.9043056973210931, "yes": 0.09221322689295185}, "ground_truth": 0}, {"key": "32325454", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9729890108016936, "res": {"Yes": 0.9729890108016936, "yes": 0.02208621104062736}, "ground_truth": 1}, {"key": "32325454", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9799225147745755, "res": {"Yes": 0.9799225147745755, "yes": 0.016614898269583695}, "ground_truth": 0}, {"key": "32325454", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9874760753440963, "res": {"Yes": 0.9874760753440963, "yes": 0.0097192871662949}, "ground_truth": 0}, {"key": "38395319", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6414932931053756, "res": {"Yes": 0.6414932931053756, "yes": 0.35076454239630983}, "ground_truth": 0}, {"key": "38395319", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.647358365419848, "res": {"Yes": 0.647358365419848, "yes": 0.34803403578857606}, "ground_truth": 0}, {"key": "38395319", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8699618403170505, "res": {"Yes": 0.8699618403170505, "yes": 0.12753505640245003}, "ground_truth": 1}, {"key": "38395319", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.694874587041819, "res": {"Yes": 0.694874587041819, "yes": 0.3016194667512712}, "ground_truth": 0}, {"key": "38395319", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7320396498690526, "res": {"Yes": 0.7320396498690526, "yes": 0.26590581815060044}, "ground_truth": 0}, {"key": "38235895", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7924500449348504, "res": {"Yes": 0.7924500449348504, "yes": 0.19389215045352035}, "ground_truth": 0}, {"key": "38235895", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7661712272137435, "res": {"Yes": 0.7661712272137435, "yes": 0.2263129054293119}, "ground_truth": 0}, {"key": "38235895", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6123607434779069, "res": {"Yes": 0.6123607434779069, "yes": 0.36819954349972234}, "ground_truth": 1}, {"key": "38235895", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7836262611576899, "res": {"Yes": 0.7836262611576899, "yes": 0.20882485347593677}, "ground_truth": 0}, {"key": "38235895", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6671046825692278, "res": {"Yes": 0.6671046825692278, "yes": 0.32474591578503903}, "ground_truth": 0}, {"key": "26543267", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8767675088388764, "res": {"Yes": 0.8767675088388764, "yes": 0.11277136821715765}, "ground_truth": 0}, {"key": "26543267", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8393425518205277, "res": {"Yes": 0.8393425518205277, "yes": 0.15510766349414523}, "ground_truth": 0}, {"key": "26543267", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.862833066983878, "res": {"Yes": 0.862833066983878, "yes": 0.12603117273133502}, "ground_truth": 1}, {"key": "26543267", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8376885804026393, "res": {"Yes": 0.8376885804026393, "yes": 0.15456995530113077}, "ground_truth": 0}, {"key": "26543267", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8561556973095048, "res": {"Yes": 0.8561556973095048, "yes": 0.13488738956660828}, "ground_truth": 0}, {"key": "39054728", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.936809524211691, "res": {"Yes": 0.936809524211691, "yes": 0.05489330977029508}, "ground_truth": 0}, {"key": "39054728", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9327312583314212, "res": {"Yes": 0.9327312583314212, "yes": 0.058644880306185525}, "ground_truth": 0}, {"key": "39054728", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.929747262984555, "res": {"Yes": 0.929747262984555, "yes": 0.06260394397907865}, "ground_truth": 1}, {"key": "39054728", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.984148906140657, "res": {"Yes": 0.984148906140657, " Yes": 0.0076768114025045865}, "ground_truth": 0}, {"key": "39054728", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9908185215610714, "res": {"Yes": 0.9908185215610714, " Yes": 0.00485445713084774}, "ground_truth": 0}, {"key": "39158443", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6849936779723135, "res": {"Yes": 0.6849936779723135, "yes": 0.2997142374846287}, "ground_truth": 0}, {"key": "39158443", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9314609886099949, "res": {"Yes": 0.9314609886099949, "yes": 0.06212982074940158}, "ground_truth": 0}, {"key": "39158443", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9297069723301905, "res": {"Yes": 0.9297069723301905, "yes": 0.06329765476781189}, "ground_truth": 1}, {"key": "39158443", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8806373079909051, "res": {"Yes": 0.8806373079909051, "yes": 0.1066060515606555}, "ground_truth": 0}, {"key": "39158443", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8320695149737761, "res": {"Yes": 0.8320695149737761, "yes": 0.15686214761595982}, "ground_truth": 0}, {"key": "36254201", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7813021798139761, "res": {"Yes": 0.7813021798139761, "yes": 0.20911093047537196}, "ground_truth": 0}, {"key": "36254201", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8388714100452512, "res": {"Yes": 0.8388714100452512, "yes": 0.15414608843942287}, "ground_truth": 0}, {"key": "36254201", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.851225264326943, "res": {"Yes": 0.851225264326943, "yes": 0.14164942670191003}, "ground_truth": 1}, {"key": "36254201", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8508595288626352, "res": {"Yes": 0.8508595288626352, "yes": 0.1404390348549632}, "ground_truth": 0}, {"key": "36254201", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9104092821307267, "res": {"Yes": 0.9104092821307267, "yes": 0.08121038872784801}, "ground_truth": 0}, {"key": "23434347", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6746274610657207, "res": {"Yes": 0.6746274610657207, "yes": 0.2939434138694363}, "ground_truth": 0}, {"key": "23434347", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.5443254812073293, "res": {"Yes": 0.5443254812073293, "yes": 0.42387301141050343}, "ground_truth": 0}, {"key": "23434347", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3785841143556194, "res": {"yes": 0.5853016052957096, "Yes": 0.3785841143556194}, "ground_truth": 1}, {"key": "23434347", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6790402329084332, "res": {"Yes": 0.6790402329084332, "yes": 0.28729730312666524}, "ground_truth": 0}, {"key": "23434347", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6468068634117917, "res": {"Yes": 0.6468068634117917, "yes": 0.3393532427872084}, "ground_truth": 0}, {"key": "34397620", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8597292627961726, "res": {"Yes": 0.8597292627961726, "yes": 0.10453428241396327}, "ground_truth": 0}, {"key": "34397620", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7818599863934996, "res": {"Yes": 0.7818599863934996, "yes": 0.21567876011493137}, "ground_truth": 0}, {"key": "34397620", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.742216165556001, "res": {"Yes": 0.742216165556001, "yes": 0.2202340575970371}, "ground_truth": 1}, {"key": "34397620", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7421670695805168, "res": {"Yes": 0.7421670695805168, "yes": 0.18013482152257915}, "ground_truth": 0}, {"key": "34397620", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8299158075693279, "res": {"Yes": 0.8299158075693279, "yes": 0.08946864949835556}, "ground_truth": 0}, {"key": "34340916", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7809037548210453, "res": {"Yes": 0.7809037548210453, "yes": 0.20547925480410448}, "ground_truth": 0}, {"key": "34340916", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7860177768116278, "res": {"Yes": 0.7860177768116278, "yes": 0.20662521134461914}, "ground_truth": 0}, {"key": "34340916", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7319995864353611, "res": {"Yes": 0.7319995864353611, "yes": 0.2592871614510923}, "ground_truth": 1}, {"key": "34340916", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8839944275860957, "res": {"Yes": 0.8839944275860957, "yes": 0.09156080425721218}, "ground_truth": 0}, {"key": "34340916", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8557633511492533, "res": {"Yes": 0.8557633511492533, "yes": 0.13622907979727164}, "ground_truth": 0}, {"key": "30375089", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7786464711979568, "res": {"Yes": 0.7786464711979568, "yes": 0.2177339226411413}, "ground_truth": 0}, {"key": "30375089", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7902688539196419, "res": {"Yes": 0.7902688539196419, "yes": 0.20208598577503156}, "ground_truth": 0}, {"key": "30375089", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8049851491456648, "res": {"Yes": 0.8049851491456648, "yes": 0.18673006642942377}, "ground_truth": 1}, {"key": "30375089", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8354117432327212, "res": {"Yes": 0.8354117432327212, "yes": 0.15960999030970532}, "ground_truth": 0}, {"key": "30375089", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.5848095501816128, "res": {"Yes": 0.5848095501816128, "yes": 0.40669486181270315}, "ground_truth": 0}, {"key": "35807797", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.5571246365960703, "res": {"Yes": 0.5571246365960703, "yes": 0.36131847765382963}, "ground_truth": 0}, {"key": "35807797", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6600550885136203, "res": {"Yes": 0.6600550885136203, "yes": 0.292040817653058}, "ground_truth": 0}, {"key": "35807797", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8337853365347899, "res": {"Yes": 0.8337853365347899, "yes": 0.13899974227358983}, "ground_truth": 1}, {"key": "35807797", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6862001499743703, "res": {"Yes": 0.6862001499743703, "yes": 0.2144395925692763}, "ground_truth": 0}, {"key": "35807797", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9905490773643221, "res": {"Yes": 0.9905490773643221, "yes": 0.008039438793749255}, "ground_truth": 0}, {"key": "34188172", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8251319714884644, "res": {"Yes": 0.8251319714884644, "yes": 0.166971189401886}, "ground_truth": 0}, {"key": "34188172", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8267392086020837, "res": {"Yes": 0.8267392086020837, "yes": 0.16448595374349093}, "ground_truth": 0}, {"key": "34188172", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8548664765659105, "res": {"Yes": 0.8548664765659105, "yes": 0.13935906153591018}, "ground_truth": 1}, {"key": "34188172", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8254457132910087, "res": {"Yes": 0.8254457132910087, "yes": 0.16837717950247064}, "ground_truth": 0}, {"key": "34188172", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.886847650143754, "res": {"Yes": 0.886847650143754, "yes": 0.09607951971749651}, "ground_truth": 0}, {"key": "37075567", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7458700721244952, "res": {"Yes": 0.7458700721244952, "yes": 0.24037323927716742}, "ground_truth": 0}, {"key": "37075567", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7161178002969046, "res": {"Yes": 0.7161178002969046, "yes": 0.2719930675389753}, "ground_truth": 0}, {"key": "37075567", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9778752376010574, "res": {"Yes": 0.9778752376010574, "yes": 0.015006652142796412}, "ground_truth": 1}, {"key": "37075567", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9702776755467641, "res": {"Yes": 0.9702776755467641, "yes": 0.018842543756983295}, "ground_truth": 0}, {"key": "37075567", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6433274312604199, "res": {"Yes": 0.6433274312604199, "yes": 0.3513681657367824}, "ground_truth": 0}, {"key": "35559735", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.1841734688515268, "res": {"yes": 0.6335314725682274, "Yes": 0.1841734688515268}, "ground_truth": 0}, {"key": "35559735", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.3734149915172806, "res": {"yes": 0.5651840775069653, "Yes": 0.3734149915172806}, "ground_truth": 0}, {"key": "35559735", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4852797598067762, "res": {"Yes": 0.4852797598067762, "yes": 0.41128559395392694}, "ground_truth": 1}, {"key": "35559735", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.29757981626898256, "res": {"yes": 0.6071160439500486, "Yes": 0.29757981626898256}, "ground_truth": 0}, {"key": "35559735", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.16718501592849155, "res": {"yes": 0.6644654806723952, "Yes": 0.16718501592849155}, "ground_truth": 0}, {"key": "33005019", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9865633881333877, "res": {"Yes": 0.9865633881333877, "yes": 0.00892615453516789}, "ground_truth": 0}, {"key": "33005019", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.5374816280159099, "res": {"Yes": 0.5374816280159099, "yes": 0.45518045694012343}, "ground_truth": 0}, {"key": "33005019", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9769489989409664, "res": {"Yes": 0.9769489989409664, "yes": 0.013189045240172802}, "ground_truth": 1}, {"key": "33005019", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9719658032326575, "res": {"Yes": 0.9719658032326575, "yes": 0.016198234891417272}, "ground_truth": 0}, {"key": "33005019", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9825364241497013, "res": {"Yes": 0.9825364241497013, "yes": 0.01367759754863748}, "ground_truth": 0}, {"key": "30808252", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.955902542613664, "res": {"Yes": 0.955902542613664, "yes": 0.036587998579947274}, "ground_truth": 0}, {"key": "30808252", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8742350618516656, "res": {"Yes": 0.8742350618516656, "yes": 0.11610910146803827}, "ground_truth": 0}, {"key": "30808252", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9283767351276976, "res": {"Yes": 0.9283767351276976, "yes": 0.061340449028602084}, "ground_truth": 1}, {"key": "30808252", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9192277073554074, "res": {"Yes": 0.9192277073554074, "yes": 0.07157037840816818}, "ground_truth": 0}, {"key": "30808252", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9247404827712628, "res": {"Yes": 0.9247404827712628, "yes": 0.06418738390374754}, "ground_truth": 0}, {"key": "15159017", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.5931295134718163, "res": {"Yes": 0.5931295134718163, "yes": 0.4016355072684008}, "ground_truth": 0}, {"key": "15159017", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9342447968179953, "res": {"Yes": 0.9342447968179953, "yes": 0.05487534612991735}, "ground_truth": 0}, {"key": "15159017", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.974132914620888, "res": {"Yes": 0.974132914620888, "yes": 0.018781587074751034}, "ground_truth": 1}, {"key": "15159017", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6501582397183608, "res": {"Yes": 0.6501582397183608, "yes": 0.3441694347466477}, "ground_truth": 0}, {"key": "15159017", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.699842758420411, "res": {"Yes": 0.699842758420411, "yes": 0.29691677449622006}, "ground_truth": 0}, {"key": "24493400", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7491085046017758, "res": {"Yes": 0.7491085046017758, "yes": 0.2412640746371283}, "ground_truth": 0}, {"key": "24493400", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6236874609182492, "res": {"Yes": 0.6236874609182492, "yes": 0.3614920967504625}, "ground_truth": 0}, {"key": "24493400", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7899427535558944, "res": {"Yes": 0.7899427535558944, "yes": 0.2004935103682484}, "ground_truth": 1}, {"key": "24493400", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7228560121330346, "res": {"Yes": 0.7228560121330346, "yes": 0.267974246027821}, "ground_truth": 0}, {"key": "24493400", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7690107195321353, "res": {"Yes": 0.7690107195321353, "yes": 0.22345876021673297}, "ground_truth": 0}, {"key": "37791071", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7886967573282859, "res": {"Yes": 0.7886967573282859, "yes": 0.18404508945637452}, "ground_truth": 0}, {"key": "37791071", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7998577177055253, "res": {"Yes": 0.7998577177055253, "yes": 0.18598746500242322}, "ground_truth": 0}, {"key": "37791071", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6871125788171224, "res": {"Yes": 0.6871125788171224, "yes": 0.2654823815532896}, "ground_truth": 1}, {"key": "37791071", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6699632090302419, "res": {"Yes": 0.6699632090302419, "yes": 0.32135713635166074}, "ground_truth": 0}, {"key": "37791071", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7957400474737023, "res": {"Yes": 0.7957400474737023, "yes": 0.18722608157324863}, "ground_truth": 0}, {"key": "33528627", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7464744716316709, "res": {"Yes": 0.7464744716316709, "yes": 0.24810103911553116}, "ground_truth": 0}, {"key": "33528627", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7766854009395667, "res": {"Yes": 0.7766854009395667, "yes": 0.21822592491822898}, "ground_truth": 0}, {"key": "33528627", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7916113716165277, "res": {"Yes": 0.7916113716165277, "yes": 0.20124532354438152}, "ground_truth": 1}, {"key": "33528627", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.967857145073099, "res": {"Yes": 0.967857145073099, "yes": 0.02625712232710742}, "ground_truth": 0}, {"key": "33528627", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.789797204215297, "res": {"Yes": 0.789797204215297, "yes": 0.198002543977288}, "ground_truth": 0}, {"key": "39925662", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.729069146722793, "res": {"Yes": 0.729069146722793, "yes": 0.26371362985098834}, "ground_truth": 0}, {"key": "39925662", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8692031817992241, "res": {"Yes": 0.8692031817992241, "yes": 0.11764068999548528}, "ground_truth": 0}, {"key": "39925662", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9823718925521868, "res": {"Yes": 0.9823718925521868, "yes": 0.011336980341054506}, "ground_truth": 1}, {"key": "39925662", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9636213856447405, "res": {"Yes": 0.9636213856447405, "yes": 0.02700212263383301}, "ground_truth": 0}, {"key": "39925662", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8805720375725884, "res": {"Yes": 0.8805720375725884, "yes": 0.1091759366829959}, "ground_truth": 0}, {"key": "29213416", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9314122707485273, "res": {"Yes": 0.9314122707485273, "yes": 0.06200275248519319}, "ground_truth": 0}, {"key": "29213416", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7400326959823964, "res": {"Yes": 0.7400326959823964, "yes": 0.25137963310749906}, "ground_truth": 0}, {"key": "29213416", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.783616755828792, "res": {"Yes": 0.783616755828792, "yes": 0.20471907612746185}, "ground_truth": 1}, {"key": "29213416", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7701440528566352, "res": {"Yes": 0.7701440528566352, "yes": 0.22085440400042083}, "ground_truth": 0}, {"key": "29213416", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8688808628072668, "res": {"Yes": 0.8688808628072668, "yes": 0.12636145353353725}, "ground_truth": 0}, {"key": "34492745", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.807037817480806, "res": {"Yes": 0.807037817480806, "yes": 0.18655355928016187}, "ground_truth": 0}, {"key": "34492745", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.5581096670386049, "res": {"Yes": 0.5581096670386049, "yes": 0.43401918100251974}, "ground_truth": 0}, {"key": "34492745", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9730851702190481, "res": {"Yes": 0.9730851702190481, "yes": 0.022978074507456852}, "ground_truth": 1}, {"key": "34492745", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9673274560332352, "res": {"Yes": 0.9673274560332352, "yes": 0.028469592246816663}, "ground_truth": 0}, {"key": "34492745", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8028205295122209, "res": {"Yes": 0.8028205295122209, "yes": 0.19198615863417537}, "ground_truth": 0}, {"key": "34191937", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.5594352423111566, "res": {"Yes": 0.5594352423111566, "yes": 0.3374584119884996}, "ground_truth": 0}, {"key": "34191937", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.5895910311361101, "res": {"Yes": 0.5895910311361101, "yes": 0.3634877437167425}, "ground_truth": 0}, {"key": "34191937", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.967807904657742, "res": {"Yes": 0.967807904657742, "yes": 0.02688357429185443}, "ground_truth": 1}, {"key": "34191937", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7264612030094793, "res": {"Yes": 0.7264612030094793, "yes": 0.20357939556331536}, "ground_truth": 0}, {"key": "34191937", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7797717580531829, "res": {"Yes": 0.7797717580531829, "yes": 0.21285717033656448}, "ground_truth": 0}, {"key": "34933372", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6414797384952973, "res": {"Yes": 0.6414797384952973, "yes": 0.2654652319453796}, "ground_truth": 0}, {"key": "34933372", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9800754798777606, "res": {"Yes": 0.9800754798777606, "yes": 0.017637552238881384}, "ground_truth": 0}, {"key": "34933372", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7419327821593681, "res": {"Yes": 0.7419327821593681, "yes": 0.1844919034876939}, "ground_truth": 1}, {"key": "34933372", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6617905584940919, "res": {"Yes": 0.6617905584940919, "yes": 0.3049651412164673}, "ground_truth": 0}, {"key": "34933372", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9805140059010266, "res": {"Yes": 0.9805140059010266, "yes": 0.017417375135435127}, "ground_truth": 0}, {"key": "38714379", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9199057222548511, "res": {"Yes": 0.9199057222548511, "yes": 0.07086823296078686}, "ground_truth": 0}, {"key": "38714379", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9786022597144026, "res": {"Yes": 0.9786022597144026, "yes": 0.017734364934438133}, "ground_truth": 0}, {"key": "38714379", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9637661902730468, "res": {"Yes": 0.9637661902730468, "yes": 0.03076715051142281}, "ground_truth": 1}, {"key": "38714379", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9633539022067519, "res": {"Yes": 0.9633539022067519, "yes": 0.03078707253501618}, "ground_truth": 0}, {"key": "38714379", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9827641012666015, "res": {"Yes": 0.9827641012666015, "yes": 0.01436398136442136}, "ground_truth": 0}, {"key": "39220660", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8807249533769584, "res": {"Yes": 0.8807249533769584, "yes": 0.11732466178790425}, "ground_truth": 0}, {"key": "39220660", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7767172456938182, "res": {"Yes": 0.7767172456938182, "yes": 0.2210363746127676}, "ground_truth": 0}, {"key": "39220660", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8642526793350667, "res": {"Yes": 0.8642526793350667, "yes": 0.13298326310431585}, "ground_truth": 1}, {"key": "39220660", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9163983123618767, "res": {"Yes": 0.9163983123618767, "yes": 0.08149181444801906}, "ground_truth": 0}, {"key": "39220660", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8706036128790425, "res": {"Yes": 0.8706036128790425, "yes": 0.12687453426633938}, "ground_truth": 0}, {"key": "41028780", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8049131464444426, "res": {"Yes": 0.8049131464444426, "yes": 0.17689962458611905}, "ground_truth": 0}, {"key": "41028780", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8344975765155854, "res": {"Yes": 0.8344975765155854, "yes": 0.16049971694438628}, "ground_truth": 0}, {"key": "41028780", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.966697743252676, "res": {"Yes": 0.966697743252676, "yes": 0.026624019018683845}, "ground_truth": 1}, {"key": "41028780", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9703384022753595, "res": {"Yes": 0.9703384022753595, "yes": 0.021662288227144336}, "ground_truth": 0}, {"key": "41028780", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9570773120031154, "res": {"Yes": 0.9570773120031154, "yes": 0.03894522917681953}, "ground_truth": 0}, {"key": "39457108", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9596680601402137, "res": {"Yes": 0.9596680601402137, "yes": 0.02317174802319711}, "ground_truth": 0}, {"key": "39457108", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9168942216468158, "res": {"Yes": 0.9168942216468158, "yes": 0.06631383704641784}, "ground_truth": 0}, {"key": "39457108", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.41293922574871605, "res": {"yes": 0.5113070083840501, "Yes": 0.41293922574871605}, "ground_truth": 1}, {"key": "39457108", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3536778176116136, "res": {"yes": 0.5617177946254148, "Yes": 0.3536778176116136}, "ground_truth": 0}, {"key": "39457108", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8730271534657098, "res": {"Yes": 0.8730271534657098, "yes": 0.11425029124736501}, "ground_truth": 0}, {"key": "38288018", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9124753478000194, "res": {"Yes": 0.9124753478000194, "yes": 0.07435813478673801}, "ground_truth": 0}, {"key": "38288018", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8372886544707544, "res": {"Yes": 0.8372886544707544, "yes": 0.1521290660018122}, "ground_truth": 0}, {"key": "38288018", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9352582730279082, "res": {"Yes": 0.9352582730279082, "yes": 0.05608343984271786}, "ground_truth": 1}, {"key": "38288018", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.901227222440496, "res": {"Yes": 0.901227222440496, "yes": 0.08677914076179731}, "ground_truth": 0}, {"key": "38288018", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.872267156380053, "res": {"Yes": 0.872267156380053, "yes": 0.11765940811825072}, "ground_truth": 0}, {"key": "40106293", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.753443158793946, "res": {"Yes": 0.753443158793946, "yes": 0.21916370872526086}, "ground_truth": 0}, {"key": "40106293", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8542285739588743, "res": {"Yes": 0.8542285739588743, "yes": 0.12790627625780776}, "ground_truth": 0}, {"key": "40106293", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7938071272710086, "res": {"Yes": 0.7938071272710086, "yes": 0.19546174430678825}, "ground_truth": 1}, {"key": "40106293", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8046644506119004, "res": {"Yes": 0.8046644506119004, "yes": 0.1829421127412416}, "ground_truth": 0}, {"key": "40106293", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.737738885997333, "res": {"Yes": 0.737738885997333, "yes": 0.24855995374232842}, "ground_truth": 0}, {"key": "39948797", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.964366162377526, "res": {"Yes": 0.964366162377526, "yes": 0.03283865340363654}, "ground_truth": 0}, {"key": "39948797", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9587617460064702, "res": {"Yes": 0.9587617460064702, "yes": 0.039190645338053}, "ground_truth": 0}, {"key": "39948797", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7079810862368384, "res": {"Yes": 0.7079810862368384, "yes": 0.2900675941167704}, "ground_truth": 1}, {"key": "39948797", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7015164920657533, "res": {"Yes": 0.7015164920657533, "yes": 0.2718945151100779}, "ground_truth": 0}, {"key": "39948797", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7411500333682549, "res": {"Yes": 0.7411500333682549, "yes": 0.10733491351765846}, "ground_truth": 0}, {"key": "31853399", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7453127425734528, "res": {"Yes": 0.7453127425734528, "yes": 0.23579912539085876}, "ground_truth": 0}, {"key": "31853399", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8427265928325492, "res": {"Yes": 0.8427265928325492, "yes": 0.1333533375727879}, "ground_truth": 0}, {"key": "31853399", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.957724385703194, "res": {"Yes": 0.957724385703194, "yes": 0.03781782222643738}, "ground_truth": 1}, {"key": "31853399", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6033130878518799, "res": {"Yes": 0.6033130878518799, "yes": 0.36786667597631106}, "ground_truth": 0}, {"key": "31853399", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9569742498968645, "res": {"Yes": 0.9569742498968645, "yes": 0.03598729570112019}, "ground_truth": 0}, {"key": "35273252", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9360707516421551, "res": {"Yes": 0.9360707516421551, "yes": 0.051373397441759915}, "ground_truth": 0}, {"key": "35273252", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9430109307051555, "res": {"Yes": 0.9430109307051555, "yes": 0.047386210989376086}, "ground_truth": 0}, {"key": "35273252", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.962701067825715, "res": {"Yes": 0.962701067825715, "yes": 0.03287381285535327}, "ground_truth": 1}, {"key": "35273252", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9549759844408103, "res": {"Yes": 0.9549759844408103, "yes": 0.03511721194143172}, "ground_truth": 0}, {"key": "35273252", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.971834279887834, "res": {"Yes": 0.971834279887834, "yes": 0.022408032290496448}, "ground_truth": 0}, {"key": "37130459", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.826363268308778, "res": {"Yes": 0.826363268308778, "yes": 0.13205100889741353}, "ground_truth": 0}, {"key": "37130459", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9060919300865464, "res": {"Yes": 0.9060919300865464, "yes": 0.07812866968974394}, "ground_truth": 0}, {"key": "37130459", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9298904132434941, "res": {"Yes": 0.9298904132434941, "yes": 0.05622082719926639}, "ground_truth": 1}, {"key": "37130459", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9214863820559108, "res": {"Yes": 0.9214863820559108, "yes": 0.06944843112706557}, "ground_truth": 0}, {"key": "37130459", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9259370844525403, "res": {"Yes": 0.9259370844525403, "yes": 0.06250150765817031}, "ground_truth": 0}, {"key": "21734003", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7849468111438138, "res": {"Yes": 0.7849468111438138, "yes": 0.17637494158622508}, "ground_truth": 0}, {"key": "21734003", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7626800250051454, "res": {"Yes": 0.7626800250051454, "yes": 0.20907998434773153}, "ground_truth": 0}, {"key": "21734003", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8657581343651476, "res": {"Yes": 0.8657581343651476, "yes": 0.07324480903591453}, "ground_truth": 1}, {"key": "21734003", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7831527883772285, "res": {"Yes": 0.7831527883772285, "yes": 0.18509986291548783}, "ground_truth": 0}, {"key": "21734003", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8172083731056967, "res": {"Yes": 0.8172083731056967, "yes": 0.1495517338787204}, "ground_truth": 0}, {"key": "33990737", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7597757690184858, "res": {"Yes": 0.7597757690184858, "yes": 0.23472334820785026}, "ground_truth": 0}, {"key": "33990737", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7876816564709209, "res": {"Yes": 0.7876816564709209, "yes": 0.2053918419795826}, "ground_truth": 0}, {"key": "33990737", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6762872142644218, "res": {"Yes": 0.6762872142644218, "yes": 0.3142368431030086}, "ground_truth": 1}, {"key": "33990737", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5720229877704466, "res": {"Yes": 0.5720229877704466, "yes": 0.42135553521177377}, "ground_truth": 0}, {"key": "33990737", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7531242937973662, "res": {"Yes": 0.7531242937973662, "yes": 0.23980974480401127}, "ground_truth": 0}, {"key": "34559912", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8902991136199973, "res": {"Yes": 0.8902991136199973, "yes": 0.10043919092819877}, "ground_truth": 0}, {"key": "34559912", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7702191455624986, "res": {"Yes": 0.7702191455624986, "yes": 0.22039993425543422}, "ground_truth": 0}, {"key": "34559912", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8676336548822555, "res": {"Yes": 0.8676336548822555, "yes": 0.125462295476496}, "ground_truth": 1}, {"key": "34559912", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8927052408757059, "res": {"Yes": 0.8927052408757059, "yes": 0.09878391412339067}, "ground_truth": 0}, {"key": "34559912", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8431449502767419, "res": {"Yes": 0.8431449502767419, "yes": 0.1494428848925654}, "ground_truth": 0}, {"key": "39820439", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7542749366284445, "res": {"Yes": 0.7542749366284445, "yes": 0.23642006966350526}, "ground_truth": 0}, {"key": "39820439", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7121377479930325, "res": {"Yes": 0.7121377479930325, "yes": 0.28080747129739236}, "ground_truth": 0}, {"key": "39820439", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5130127399034211, "res": {"Yes": 0.5130127399034211, "yes": 0.4788841686099997}, "ground_truth": 1}, {"key": "39820439", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7406764416429662, "res": {"Yes": 0.7406764416429662, "yes": 0.2434359524525431}, "ground_truth": 0}, {"key": "39820439", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.754676770458543, "res": {"Yes": 0.754676770458543, "yes": 0.23822013400578715}, "ground_truth": 0}, {"key": "34759328", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8545216417054894, "res": {"Yes": 0.8545216417054894, "yes": 0.14108545617900783}, "ground_truth": 0}, {"key": "34759328", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.767410437608291, "res": {"Yes": 0.767410437608291, "yes": 0.22623769152079132}, "ground_truth": 0}, {"key": "34759328", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8228752412621273, "res": {"Yes": 0.8228752412621273, "yes": 0.1676838865068377}, "ground_truth": 1}, {"key": "34759328", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9536700432249744, "res": {"Yes": 0.9536700432249744, "yes": 0.03575079758575278}, "ground_truth": 0}, {"key": "34759328", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6564628334616887, "res": {"Yes": 0.6564628334616887, "yes": 0.32556237741816496}, "ground_truth": 0}, {"key": "36939137", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6725451178612866, "res": {"Yes": 0.6725451178612866, "yes": 0.30422762235756695}, "ground_truth": 0}, {"key": "36939137", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7762387178583546, "res": {"Yes": 0.7762387178583546, "yes": 0.17230553702670096}, "ground_truth": 0}, {"key": "36939137", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5709133683577234, "res": {"Yes": 0.5709133683577234, "yes": 0.33478032785277995}, "ground_truth": 1}, {"key": "36939137", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6773670104739253, "res": {"Yes": 0.6773670104739253, "yes": 0.2676470617926338}, "ground_truth": 0}, {"key": "36939137", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7237313058476422, "res": {"Yes": 0.7237313058476422, "yes": 0.22628124646843822}, "ground_truth": 0}, {"key": "35851522", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9788480087857955, "res": {"Yes": 0.9788480087857955, "yes": 0.016086401358754204}, "ground_truth": 0}, {"key": "35851522", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8033458744008788, "res": {"Yes": 0.8033458744008788, "yes": 0.19446662976013}, "ground_truth": 0}, {"key": "35851522", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8463511454060346, "res": {"Yes": 0.8463511454060346, "yes": 0.15096482746426704}, "ground_truth": 1}, {"key": "35851522", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9725383595500857, "res": {"Yes": 0.9725383595500857, "yes": 0.019671936539572887}, "ground_truth": 0}, {"key": "35851522", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9663882523212569, "res": {"Yes": 0.9663882523212569, "yes": 0.027864352368099334}, "ground_truth": 0}, {"key": "22412782", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7549540896444245, "res": {"Yes": 0.7549540896444245, "yes": 0.2354940338068695}, "ground_truth": 0}, {"key": "22412782", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6727467636598976, "res": {"Yes": 0.6727467636598976, "yes": 0.31677456801332904}, "ground_truth": 0}, {"key": "22412782", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7694000682180402, "res": {"Yes": 0.7694000682180402, "yes": 0.21995381202770384}, "ground_truth": 1}, {"key": "22412782", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7690351744618478, "res": {"Yes": 0.7690351744618478, "yes": 0.22233160397239027}, "ground_truth": 0}, {"key": "22412782", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7461262777351748, "res": {"Yes": 0.7461262777351748, "yes": 0.24399449876211676}, "ground_truth": 0}, {"key": "38579227", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8473999413511754, "res": {"Yes": 0.8473999413511754, "yes": 0.14621572197492622}, "ground_truth": 0}, {"key": "38579227", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8513155415530902, "res": {"Yes": 0.8513155415530902, "yes": 0.14168754969185374}, "ground_truth": 0}, {"key": "38579227", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8852535392278321, "res": {"Yes": 0.8852535392278321, "yes": 0.10325138842664289}, "ground_truth": 1}, {"key": "38579227", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9423607084734439, "res": {"Yes": 0.9423607084734439, "yes": 0.04579990784417222}, "ground_truth": 0}, {"key": "38579227", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.836835109242404, "res": {"Yes": 0.836835109242404, "yes": 0.14795167186455524}, "ground_truth": 0}, {"key": "37206995", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9210321920240069, "res": {"Yes": 0.9210321920240069, "yes": 0.07491119003138633}, "ground_truth": 0}, {"key": "37206995", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8869393513840773, "res": {"Yes": 0.8869393513840773, "yes": 0.10678750005386363}, "ground_truth": 0}, {"key": "37206995", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8671335562044971, "res": {"Yes": 0.8671335562044971, "yes": 0.12636314678835936}, "ground_truth": 1}, {"key": "37206995", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9055832805444244, "res": {"Yes": 0.9055832805444244, "yes": 0.08872081054430159}, "ground_truth": 0}, {"key": "37206995", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8564863742365698, "res": {"Yes": 0.8564863742365698, "yes": 0.1376139194232334}, "ground_truth": 0}, {"key": "38700847", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9428565055075526, "res": {"Yes": 0.9428565055075526, "yes": 0.04962644112967367}, "ground_truth": 0}, {"key": "38700847", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8319316108807867, "res": {"Yes": 0.8319316108807867, "yes": 0.1602908983228727}, "ground_truth": 0}, {"key": "38700847", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8643395065204891, "res": {"Yes": 0.8643395065204891, "yes": 0.12578254453838128}, "ground_truth": 1}, {"key": "38700847", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8530014769626393, "res": {"Yes": 0.8530014769626393, "yes": 0.13669255487173063}, "ground_truth": 0}, {"key": "38700847", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8543298231293672, "res": {"Yes": 0.8543298231293672, "yes": 0.13333400274064902}, "ground_truth": 0}, {"key": "20246590", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9437517557719726, "res": {"Yes": 0.9437517557719726, "yes": 0.04275216677878258}, "ground_truth": 0}, {"key": "20246590", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9547077540840286, "res": {"Yes": 0.9547077540840286, "yes": 0.041356904269483054}, "ground_truth": 0}, {"key": "20246590", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9558937110708702, "res": {"Yes": 0.9558937110708702, "yes": 0.04010722846267786}, "ground_truth": 1}, {"key": "20246590", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9446301100961906, "res": {"Yes": 0.9446301100961906, "yes": 0.051989203861728596}, "ground_truth": 0}, {"key": "20246590", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9282138137376812, "res": {"Yes": 0.9282138137376812, "yes": 0.06367166085063194}, "ground_truth": 0}, {"key": "39141360", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.4784807015698217, "res": {"Yes": 0.4784807015698217, "yes": 0.45787237083579974}, "ground_truth": 0}, {"key": "39141360", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.973676452321434, "res": {"Yes": 0.973676452321434, "yes": 0.02100618417682672}, "ground_truth": 0}, {"key": "39141360", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.388003352085719, "res": {"yes": 0.5496163662731458, "Yes": 0.388003352085719}, "ground_truth": 1}, {"key": "39141360", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5294217318867638, "res": {"Yes": 0.5294217318867638, "yes": 0.4220471252383345}, "ground_truth": 0}, {"key": "39141360", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.622230030908181, "res": {"Yes": 0.622230030908181, "yes": 0.33706236715853494}, "ground_truth": 0}, {"key": "37906226", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6984289873141376, "res": {"Yes": 0.6984289873141376, "yes": 0.296157639660688}, "ground_truth": 0}, {"key": "37906226", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8917055248488057, "res": {"Yes": 0.8917055248488057, "yes": 0.10088753473876493}, "ground_truth": 0}, {"key": "37906226", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.867370801709093, "res": {"Yes": 0.867370801709093, "yes": 0.12459938605105908}, "ground_truth": 1}, {"key": "37906226", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8654944472864753, "res": {"Yes": 0.8654944472864753, "yes": 0.12923353015240752}, "ground_truth": 0}, {"key": "37906226", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7743446656915446, "res": {"Yes": 0.7743446656915446, "yes": 0.21970604174334085}, "ground_truth": 0}, {"key": "16201033", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9784804684993004, "res": {"Yes": 0.9784804684993004, "yes": 0.01629069814519758}, "ground_truth": 0}, {"key": "16201033", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8888369132141997, "res": {"Yes": 0.8888369132141997, "yes": 0.1075741394311399}, "ground_truth": 0}, {"key": "16201033", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8556924712064109, "res": {"Yes": 0.8556924712064109, "yes": 0.1408463921255352}, "ground_truth": 1}, {"key": "16201033", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8977165332901882, "res": {"Yes": 0.8977165332901882, "yes": 0.0972464695471126}, "ground_truth": 0}, {"key": "16201033", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9296118783973124, "res": {"Yes": 0.9296118783973124, "yes": 0.06638307165224894}, "ground_truth": 0}, {"key": "36469022", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9773601328098751, "res": {"Yes": 0.9773601328098751, "yes": 0.01625933619681383}, "ground_truth": 0}, {"key": "36469022", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8476784172968205, "res": {"Yes": 0.8476784172968205, "yes": 0.148561167974054}, "ground_truth": 0}, {"key": "36469022", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.977834540258441, "res": {"Yes": 0.977834540258441, "yes": 0.016353189895930553}, "ground_truth": 1}, {"key": "36469022", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9759304884068439, "res": {"Yes": 0.9759304884068439, "yes": 0.017972418532751673}, "ground_truth": 0}, {"key": "36469022", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.982119203992814, "res": {"Yes": 0.982119203992814, "yes": 0.013137739666696091}, "ground_truth": 0}, {"key": "31295270", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9327867574923756, "res": {"Yes": 0.9327867574923756, "yes": 0.06191224498713419}, "ground_truth": 0}, {"key": "31295270", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.941889240882205, "res": {"Yes": 0.941889240882205, "yes": 0.050575116116708486}, "ground_truth": 0}, {"key": "31295270", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9186211069977697, "res": {"Yes": 0.9186211069977697, "yes": 0.0742753385189971}, "ground_truth": 1}, {"key": "31295270", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8996582643880432, "res": {"Yes": 0.8996582643880432, "yes": 0.08862963468844}, "ground_truth": 0}, {"key": "31295270", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9214495325523032, "res": {"Yes": 0.9214495325523032, "yes": 0.07041525844691078}, "ground_truth": 0}, {"key": "35360689", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.5951734757787536, "res": {"Yes": 0.5951734757787536, "yes": 0.3996247728540664}, "ground_truth": 0}, {"key": "35360689", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6693575749096282, "res": {"Yes": 0.6693575749096282, "yes": 0.32723713088241974}, "ground_truth": 0}, {"key": "35360689", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5840211214158213, "res": {"Yes": 0.5840211214158213, "yes": 0.4106567136879312}, "ground_truth": 1}, {"key": "35360689", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9214683671732377, "res": {"Yes": 0.9214683671732377, "yes": 0.07339292776348032}, "ground_truth": 0}, {"key": "35360689", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.5789096259697061, "res": {"Yes": 0.5789096259697061, "yes": 0.4153722802610815}, "ground_truth": 0}, {"key": "29202793", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8867305585105888, "res": {"Yes": 0.8867305585105888, "yes": 0.1019413735559741}, "ground_truth": 0}, {"key": "29202793", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.847866147990322, "res": {"Yes": 0.847866147990322, "yes": 0.14301479393247235}, "ground_truth": 0}, {"key": "29202793", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8195077859411544, "res": {"Yes": 0.8195077859411544, "yes": 0.17131256936772293}, "ground_truth": 1}, {"key": "29202793", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.873882527722743, "res": {"Yes": 0.873882527722743, "yes": 0.11839014438362977}, "ground_truth": 0}, {"key": "29202793", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9350542499871444, "res": {"Yes": 0.9350542499871444, "yes": 0.053623638035279225}, "ground_truth": 0}, {"key": "35999008", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.5267572830527352, "res": {"Yes": 0.5267572830527352, "yes": 0.46730323806177976}, "ground_truth": 0}, {"key": "35999008", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7182902311155911, "res": {"Yes": 0.7182902311155911, "yes": 0.2754324144735961}, "ground_truth": 0}, {"key": "35999008", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.70911706730084, "res": {"Yes": 0.70911706730084, "yes": 0.2856282896573729}, "ground_truth": 1}, {"key": "35999008", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9640710247105325, "res": {"Yes": 0.9640710247105325, "yes": 0.030406599661597047}, "ground_truth": 0}, {"key": "35999008", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9391198801794434, "res": {"Yes": 0.9391198801794434, "yes": 0.055657201646092454}, "ground_truth": 0}, {"key": "31797119", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.975228057702481, "res": {"Yes": 0.975228057702481, "yes": 0.021279068265143374}, "ground_truth": 0}, {"key": "31797119", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9858071329895716, "res": {"Yes": 0.9858071329895716, "yes": 0.01148110658861155}, "ground_truth": 0}, {"key": "31797119", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9874860785274057, "res": {"Yes": 0.9874860785274057, "yes": 0.00935471298604359}, "ground_truth": 1}, {"key": "31797119", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9839363972106402, "res": {"Yes": 0.9839363972106402, "yes": 0.015111239111563615}, "ground_truth": 0}, {"key": "31797119", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9830974150493604, "res": {"Yes": 0.9830974150493604, "yes": 0.013281568924592093}, "ground_truth": 0}, {"key": "26711893", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8485888462564276, "res": {"Yes": 0.8485888462564276, "yes": 0.14239820292394645}, "ground_truth": 0}, {"key": "26711893", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8223047011068858, "res": {"Yes": 0.8223047011068858, "yes": 0.17272051691297258}, "ground_truth": 0}, {"key": "26711893", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9341304522528949, "res": {"Yes": 0.9341304522528949, "yes": 0.059346519694982544}, "ground_truth": 1}, {"key": "26711893", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9140902688378705, "res": {"Yes": 0.9140902688378705, "yes": 0.0701234793656292}, "ground_truth": 0}, {"key": "26711893", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8844273460697473, "res": {"Yes": 0.8844273460697473, "yes": 0.11079422798766879}, "ground_truth": 0}, {"key": "35348288", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.2894699462401388, "res": {"yes": 0.5399522873968488, "Yes": 0.2894699462401388}, "ground_truth": 0}, {"key": "35348288", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.24852446191511077, "res": {"yes": 0.6798711527607588, "Yes": 0.24852446191511077}, "ground_truth": 0}, {"key": "35348288", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.40898030832624005, "res": {"yes": 0.49341569771489613, "Yes": 0.40898030832624005}, "ground_truth": 1}, {"key": "35348288", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3260383064730455, "res": {"yes": 0.6134494342428976, "Yes": 0.3260383064730455}, "ground_truth": 0}, {"key": "35348288", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6818455861798809, "res": {"Yes": 0.6818455861798809, "yes": 0.3074720162320825}, "ground_truth": 0}, {"key": "38124131", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6997439336042514, "res": {"Yes": 0.6997439336042514, "yes": 0.29505514733346566}, "ground_truth": 0}, {"key": "38124131", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9082059726319066, "res": {"Yes": 0.9082059726319066, "yes": 0.08376361117412978}, "ground_truth": 0}, {"key": "38124131", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8637718651580923, "res": {"Yes": 0.8637718651580923, "yes": 0.12822827564009415}, "ground_truth": 1}, {"key": "38124131", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9022915138918778, "res": {"Yes": 0.9022915138918778, "yes": 0.09315563406258015}, "ground_truth": 0}, {"key": "38124131", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7961799825223923, "res": {"Yes": 0.7961799825223923, "yes": 0.19703714790011004}, "ground_truth": 0}, {"key": "20285901", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8735217149628199, "res": {"Yes": 0.8735217149628199, "yes": 0.12164292136236031}, "ground_truth": 0}, {"key": "20285901", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8675740765248211, "res": {"Yes": 0.8675740765248211, "yes": 0.12852477392993789}, "ground_truth": 0}, {"key": "20285901", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9213286960556919, "res": {"Yes": 0.9213286960556919, "yes": 0.07602476017333559}, "ground_truth": 1}, {"key": "20285901", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7719558497775932, "res": {"Yes": 0.7719558497775932, "yes": 0.2247494618660783}, "ground_truth": 0}, {"key": "20285901", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8913138445326856, "res": {"Yes": 0.8913138445326856, "yes": 0.1049369904398878}, "ground_truth": 0}, {"key": "35633632", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6827716698923718, "res": {"Yes": 0.6827716698923718, "yes": 0.3126192132973904}, "ground_truth": 0}, {"key": "35633632", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.882848228327823, "res": {"Yes": 0.882848228327823, "yes": 0.11273813939145201}, "ground_truth": 0}, {"key": "35633632", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7270691711236134, "res": {"Yes": 0.7270691711236134, "yes": 0.26902918791670694}, "ground_truth": 1}, {"key": "35633632", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5851716827612689, "res": {"Yes": 0.5851716827612689, "yes": 0.4067185728143158}, "ground_truth": 0}, {"key": "35633632", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8685835131568674, "res": {"Yes": 0.8685835131568674, "yes": 0.1278546253444314}, "ground_truth": 0}, {"key": "10741274", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7841332275258746, "res": {"Yes": 0.7841332275258746, "yes": 0.21246546956208998}, "ground_truth": 0}, {"key": "10741274", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8976863615445433, "res": {"Yes": 0.8976863615445433, "yes": 0.10060040454525311}, "ground_truth": 0}, {"key": "10741274", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8372872310812517, "res": {"Yes": 0.8372872310812517, "yes": 0.1587630880814733}, "ground_truth": 1}, {"key": "10741274", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8547349483805418, "res": {"Yes": 0.8547349483805418, "yes": 0.14059221092030452}, "ground_truth": 0}, {"key": "10741274", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7841193798553517, "res": {"Yes": 0.7841193798553517, "yes": 0.21214363715128623}, "ground_truth": 0}, {"key": "30605795", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.29407013078634797, "res": {"yes": 0.6468312484901982, "Yes": 0.29407013078634797}, "ground_truth": 0}, {"key": "30605795", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9676636607363907, "res": {"Yes": 0.9676636607363907, "yes": 0.024594800996138342}, "ground_truth": 0}, {"key": "30605795", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.40721170869363316, "res": {"yes": 0.47765406112467645, "Yes": 0.40721170869363316}, "ground_truth": 1}, {"key": "30605795", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5066943080176588, "res": {"Yes": 0.5066943080176588, "yes": 0.41948745423251416}, "ground_truth": 0}, {"key": "30605795", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.4299980307311568, "res": {"yes": 0.5371578069247802, "Yes": 0.4299980307311568}, "ground_truth": 0}, {"key": "30539722", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.724861099049954, "res": {"Yes": 0.724861099049954, "yes": 0.26667446410928314}, "ground_truth": 0}, {"key": "30539722", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8720802669978275, "res": {"Yes": 0.8720802669978275, "yes": 0.12331605085913767}, "ground_truth": 0}, {"key": "30539722", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7889334412970589, "res": {"Yes": 0.7889334412970589, "yes": 0.20783595257568852}, "ground_truth": 1}, {"key": "30539722", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8258277188799924, "res": {"Yes": 0.8258277188799924, "yes": 0.16845945184804195}, "ground_truth": 0}, {"key": "30539722", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7487467051721672, "res": {"Yes": 0.7487467051721672, "yes": 0.24487639645691303}, "ground_truth": 0}, {"key": "18639299", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7749744948820061, "res": {"Yes": 0.7749744948820061, "yes": 0.18638113293750144}, "ground_truth": 0}, {"key": "18639299", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8940904271379043, "res": {"Yes": 0.8940904271379043, "yes": 0.0829462642107007}, "ground_truth": 0}, {"key": "18639299", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8706593507064878, "res": {"Yes": 0.8706593507064878, "yes": 0.11723864742627083}, "ground_truth": 1}, {"key": "18639299", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8539930699834243, "res": {"Yes": 0.8539930699834243, "yes": 0.12648848533213544}, "ground_truth": 0}, {"key": "18639299", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8447964653896567, "res": {"Yes": 0.8447964653896567, "yes": 0.14398530978384866}, "ground_truth": 0}, {"key": "39773552", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9581170881775651, "res": {"Yes": 0.9581170881775651, "yes": 0.0376719459799309}, "ground_truth": 0}, {"key": "39773552", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9391644575016211, "res": {"Yes": 0.9391644575016211, "yes": 0.057339244179314644}, "ground_truth": 0}, {"key": "39773552", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9457751057105253, "res": {"Yes": 0.9457751057105253, "yes": 0.050313663429807705}, "ground_truth": 1}, {"key": "39773552", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9406261334960387, "res": {"Yes": 0.9406261334960387, "yes": 0.05527781645674188}, "ground_truth": 0}, {"key": "39773552", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9481517079948899, "res": {"Yes": 0.9481517079948899, "yes": 0.047020622354736134}, "ground_truth": 0}, {"key": "34086410", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9728792112956419, "res": {"Yes": 0.9728792112956419, "yes": 0.018664952714439884}, "ground_truth": 0}, {"key": "34086410", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9758856385999775, "res": {"Yes": 0.9758856385999775, "yes": 0.018293307434247008}, "ground_truth": 0}, {"key": "34086410", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.957762879335267, "res": {"Yes": 0.957762879335267, "yes": 0.03577047666386595}, "ground_truth": 1}, {"key": "34086410", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.784908545919493, "res": {"Yes": 0.784908545919493, "yes": 0.20758454012420124}, "ground_truth": 0}, {"key": "34086410", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9726850721914764, "res": {"Yes": 0.9726850721914764, "yes": 0.02352460463066147}, "ground_truth": 0}, {"key": "35454652", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7146957580938473, "res": {"Yes": 0.7146957580938473, "yes": 0.2797334202096133}, "ground_truth": 0}, {"key": "35454652", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9873529951098542, "res": {"Yes": 0.9873529951098542, "yes": 0.009323930609265844}, "ground_truth": 0}, {"key": "35454652", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9780687195766704, "res": {"Yes": 0.9780687195766704, "yes": 0.017524103550156166}, "ground_truth": 1}, {"key": "35454652", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4276676943657032, "res": {"yes": 0.5672959237653715, "Yes": 0.4276676943657032}, "ground_truth": 0}, {"key": "35454652", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8370360156232013, "res": {"Yes": 0.8370360156232013, "yes": 0.15018024093552904}, "ground_truth": 0}, {"key": "36158310", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7370177079590259, "res": {"Yes": 0.7370177079590259, "yes": 0.24871371075449086}, "ground_truth": 0}, {"key": "36158310", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.41906835665146625, "res": {"yes": 0.563010500522219, "Yes": 0.41906835665146625}, "ground_truth": 0}, {"key": "36158310", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.759099155850461, "res": {"Yes": 0.759099155850461, "yes": 0.23620152945165007}, "ground_truth": 1}, {"key": "36158310", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7048835973104507, "res": {"Yes": 0.7048835973104507, "yes": 0.29056574837073074}, "ground_truth": 0}, {"key": "36158310", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6357667806392103, "res": {"Yes": 0.6357667806392103, "yes": 0.3405565799305136}, "ground_truth": 0}, {"key": "35688387", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9115518201234762, "res": {"Yes": 0.9115518201234762, "yes": 0.08087520430353477}, "ground_truth": 0}, {"key": "35688387", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9227151825976002, "res": {"Yes": 0.9227151825976002, "yes": 0.06937611048865319}, "ground_truth": 0}, {"key": "35688387", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9040011558460632, "res": {"Yes": 0.9040011558460632, "yes": 0.0916744742476513}, "ground_truth": 1}, {"key": "35688387", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9291395928930145, "res": {"Yes": 0.9291395928930145, "yes": 0.06486955964818242}, "ground_truth": 0}, {"key": "35688387", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9590636165106008, "res": {"Yes": 0.9590636165106008, "yes": 0.03549714884327765}, "ground_truth": 0}, {"key": "34209292", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.574648229602256, "res": {"Yes": 0.574648229602256, "yes": 0.2409066415506911}, "ground_truth": 0}, {"key": "34209292", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.3240080557736434, "res": {"yes": 0.6276240112574676, "Yes": 0.3240080557736434}, "ground_truth": 0}, {"key": "34209292", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.41999357728509285, "res": {"yes": 0.48257453183887283, "Yes": 0.41999357728509285}, "ground_truth": 1}, {"key": "34209292", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5325120992821873, "res": {"Yes": 0.5325120992821873, "yes": 0.39509548860200594}, "ground_truth": 0}, {"key": "34209292", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9687831815292519, "res": {"Yes": 0.9687831815292519, " Yes": 0.015715811052161407}, "ground_truth": 0}, {"key": "25037859", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9670352995566662, "res": {"Yes": 0.9670352995566662, "yes": 0.024000937185518382}, "ground_truth": 0}, {"key": "25037859", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6237445309318366, "res": {"Yes": 0.6237445309318366, "yes": 0.369570484632295}, "ground_truth": 0}, {"key": "25037859", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9617902167534382, "res": {"Yes": 0.9617902167534382, "yes": 0.028546512627235412}, "ground_truth": 1}, {"key": "25037859", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5675765379989107, "res": {"Yes": 0.5675765379989107, "yes": 0.42250406961772347}, "ground_truth": 0}, {"key": "25037859", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.46257398373048264, "res": {"yes": 0.5317008818102108, "Yes": 0.46257398373048264}, "ground_truth": 0}, {"key": "36412121", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9371184693475499, "res": {"Yes": 0.9371184693475499, "yes": 0.05061803719010969}, "ground_truth": 0}, {"key": "36412121", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8834625520756321, "res": {"Yes": 0.8834625520756321, "yes": 0.10077202415606301}, "ground_truth": 0}, {"key": "36412121", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8107615482388061, "res": {"Yes": 0.8107615482388061, "yes": 0.163838089166685}, "ground_truth": 1}, {"key": "36412121", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9910580263860336, "res": {"Yes": 0.9910580263860336, "yes": 0.006451739465236511}, "ground_truth": 0}, {"key": "36412121", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9093684512707156, "res": {"Yes": 0.9093684512707156, "yes": 0.07393475232974943}, "ground_truth": 0}, {"key": "34909172", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6391425797468484, "res": {"Yes": 0.6391425797468484, "yes": 0.3522951008216561}, "ground_truth": 0}, {"key": "34909172", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6443597247632133, "res": {"Yes": 0.6443597247632133, "yes": 0.3493197454983275}, "ground_truth": 0}, {"key": "34909172", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9788451505537829, "res": {"Yes": 0.9788451505537829, "yes": 0.016306442562935272}, "ground_truth": 1}, {"key": "34909172", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4625784568425328, "res": {"yes": 0.5322483687672495, "Yes": 0.4625784568425328}, "ground_truth": 0}, {"key": "34909172", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6476584143272625, "res": {"Yes": 0.6476584143272625, "yes": 0.34750997825036534}, "ground_truth": 0}, {"key": "39011806", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8344080063530388, "res": {"Yes": 0.8344080063530388, "yes": 0.159517682632881}, "ground_truth": 0}, {"key": "39011806", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8385604645022279, "res": {"Yes": 0.8385604645022279, "yes": 0.15147970854496567}, "ground_truth": 0}, {"key": "39011806", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9168699664479012, "res": {"Yes": 0.9168699664479012, "yes": 0.06794816798295687}, "ground_truth": 1}, {"key": "39011806", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9083473041096585, "res": {"Yes": 0.9083473041096585, "yes": 0.0851716199342539}, "ground_truth": 0}, {"key": "39011806", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9216458406397595, "res": {"Yes": 0.9216458406397595, "yes": 0.06478255013037235}, "ground_truth": 0}, {"key": "33096163", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.715958195427842, "res": {"Yes": 0.715958195427842, "yes": 0.2774373325354138}, "ground_truth": 0}, {"key": "33096163", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7155059815947223, "res": {"Yes": 0.7155059815947223, "yes": 0.28015154222182675}, "ground_truth": 0}, {"key": "33096163", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5970294421329245, "res": {"Yes": 0.5970294421329245, "yes": 0.39392466245921653}, "ground_truth": 1}, {"key": "33096163", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6984430049245793, "res": {"Yes": 0.6984430049245793, "yes": 0.2965001662014484}, "ground_truth": 0}, {"key": "33096163", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6813998935348499, "res": {"Yes": 0.6813998935348499, "yes": 0.30673963061149245}, "ground_truth": 0}, {"key": "38762205", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8152648167430773, "res": {"Yes": 0.8152648167430773, "yes": 0.15658742480855248}, "ground_truth": 0}, {"key": "38762205", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7219201801570011, "res": {"Yes": 0.7219201801570011, "yes": 0.2551733655309124}, "ground_truth": 0}, {"key": "38762205", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5410358695624549, "res": {"Yes": 0.5410358695624549, "yes": 0.438005088978856}, "ground_truth": 1}, {"key": "38762205", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5900189051314777, "res": {"Yes": 0.5900189051314777, "yes": 0.3841930416814514}, "ground_truth": 0}, {"key": "38762205", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7712658368269528, "res": {"Yes": 0.7712658368269528, "yes": 0.20482008896048237}, "ground_truth": 0}, {"key": "35519177", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8473525306507834, "res": {"Yes": 0.8473525306507834, "yes": 0.1482831288447478}, "ground_truth": 0}, {"key": "35519177", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8706309850869203, "res": {"Yes": 0.8706309850869203, "yes": 0.05308770615735839}, "ground_truth": 0}, {"key": "35519177", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7975817274601137, "res": {"Yes": 0.7975817274601137, "yes": 0.18643534045235033}, "ground_truth": 1}, {"key": "35519177", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9877068685569005, "res": {"Yes": 0.9877068685569005, "yes": 0.00967658431484574}, "ground_truth": 0}, {"key": "35519177", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9559851113057225, "res": {"Yes": 0.9559851113057225, "yes": 0.04167908644847509}, "ground_truth": 0}, {"key": "36192531", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8951868155775446, "res": {"Yes": 0.8951868155775446, "yes": 0.09767076471248078}, "ground_truth": 0}, {"key": "36192531", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6088989638769184, "res": {"Yes": 0.6088989638769184, "yes": 0.38427871859764995}, "ground_truth": 0}, {"key": "36192531", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9698668877744383, "res": {"Yes": 0.9698668877744383, "yes": 0.023520419975733762}, "ground_truth": 1}, {"key": "36192531", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8243997856278606, "res": {"Yes": 0.8243997856278606, "yes": 0.16957182425457273}, "ground_truth": 0}, {"key": "36192531", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8784409416205842, "res": {"Yes": 0.8784409416205842, "yes": 0.11577125203205636}, "ground_truth": 0}, {"key": "33160852", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9439350529920577, "res": {"Yes": 0.9439350529920577, "yes": 0.04718990435747378}, "ground_truth": 0}, {"key": "33160852", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9310782554261251, "res": {"Yes": 0.9310782554261251, "yes": 0.05913219193943035}, "ground_truth": 0}, {"key": "33160852", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8938049091945959, "res": {"Yes": 0.8938049091945959, "yes": 0.09740212025877444}, "ground_truth": 1}, {"key": "33160852", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9148153475760373, "res": {"Yes": 0.9148153475760373, "yes": 0.07500321497734222}, "ground_truth": 0}, {"key": "33160852", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9173303736511007, "res": {"Yes": 0.9173303736511007, "yes": 0.0762462763073913}, "ground_truth": 0}, {"key": "36312304", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7583305853394192, "res": {"Yes": 0.7583305853394192, "yes": 0.2349501065341809}, "ground_truth": 0}, {"key": "36312304", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6730468083318017, "res": {"Yes": 0.6730468083318017, "yes": 0.31710389954151474}, "ground_truth": 0}, {"key": "36312304", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6208312457205236, "res": {"Yes": 0.6208312457205236, "yes": 0.3734393539060196}, "ground_truth": 1}, {"key": "36312304", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.905408324268011, "res": {"Yes": 0.905408324268011, "yes": 0.08895992424880102}, "ground_truth": 0}, {"key": "36312304", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6104509088797032, "res": {"Yes": 0.6104509088797032, "yes": 0.3832565770979147}, "ground_truth": 0}, {"key": "33773343", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7098849388765015, "res": {"Yes": 0.7098849388765015, "yes": 0.28592258117475666}, "ground_truth": 0}, {"key": "33773343", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7737168725900329, "res": {"Yes": 0.7737168725900329, "yes": 0.22097833808132433}, "ground_truth": 0}, {"key": "33773343", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7001909958090875, "res": {"Yes": 0.7001909958090875, "yes": 0.2935014204594921}, "ground_truth": 1}, {"key": "33773343", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8341543515053512, "res": {"Yes": 0.8341543515053512, "yes": 0.16001181863620864}, "ground_truth": 0}, {"key": "33773343", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7839519412003259, "res": {"Yes": 0.7839519412003259, "yes": 0.2073681215831108}, "ground_truth": 0}, {"key": "34913320", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7791731933737716, "res": {"Yes": 0.7791731933737716, "yes": 0.19887483937353737}, "ground_truth": 0}, {"key": "34913320", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9034142067647652, "res": {"Yes": 0.9034142067647652, "yes": 0.08926944597342984}, "ground_truth": 0}, {"key": "34913320", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7946299296507175, "res": {"Yes": 0.7946299296507175, "yes": 0.17933315534301925}, "ground_truth": 1}, {"key": "34913320", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7967594779431509, "res": {"Yes": 0.7967594779431509, "yes": 0.18042803116975592}, "ground_truth": 0}, {"key": "34913320", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9483075685001778, "res": {"Yes": 0.9483075685001778, "yes": 0.04901141739266941}, "ground_truth": 0}, {"key": "33784155", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.3815345571703246, "res": {"yes": 0.5068606830679512, "Yes": 0.3815345571703246}, "ground_truth": 0}, {"key": "33784155", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.47116948920685975, "res": {"yes": 0.5009984358289751, "Yes": 0.47116948920685975}, "ground_truth": 0}, {"key": "33784155", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3452807585507354, "res": {"yes": 0.48982176448404435, "Yes": 0.3452807585507354}, "ground_truth": 1}, {"key": "33784155", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.419203058484005, "res": {"yes": 0.5715549442205753, "Yes": 0.419203058484005}, "ground_truth": 0}, {"key": "33784155", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.08557081501981084, "res": {"yes": 0.8462265716862816, "Yes": 0.08557081501981084}, "ground_truth": 0}, {"key": "24085062", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9585143800007034, "res": {"Yes": 0.9585143800007034, "yes": 0.03605839396688885}, "ground_truth": 0}, {"key": "24085062", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9418400623612965, "res": {"Yes": 0.9418400623612965, "yes": 0.05450704606899377}, "ground_truth": 0}, {"key": "24085062", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9009352126572053, "res": {"Yes": 0.9009352126572053, "yes": 0.09141934233406562}, "ground_truth": 1}, {"key": "24085062", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9269713036910655, "res": {"Yes": 0.9269713036910655, "yes": 0.06887743212312712}, "ground_truth": 0}, {"key": "24085062", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9505194227664396, "res": {"Yes": 0.9505194227664396, "yes": 0.046806953876116456}, "ground_truth": 0}, {"key": "33893487", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9656778258601293, "res": {"Yes": 0.9656778258601293, "yes": 0.022346405468652887}, "ground_truth": 0}, {"key": "33893487", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7742487613007335, "res": {"Yes": 0.7742487613007335, "yes": 0.21910767572906367}, "ground_truth": 0}, {"key": "33893487", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7527403736087294, "res": {"Yes": 0.7527403736087294, "yes": 0.239679491556158}, "ground_truth": 1}, {"key": "33893487", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.747809172899436, "res": {"Yes": 0.747809172899436, "yes": 0.24385616870906257}, "ground_truth": 0}, {"key": "33893487", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7979247134744379, "res": {"Yes": 0.7979247134744379, "yes": 0.19309208104403583}, "ground_truth": 0}, {"key": "40913011", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.806043965658335, "res": {"Yes": 0.806043965658335, "yes": 0.13599515756767971}, "ground_truth": 0}, {"key": "40913011", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9108265272473552, "res": {"Yes": 0.9108265272473552, "yes": 0.08347439841056595}, "ground_truth": 0}, {"key": "40913011", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9898860712709013, "res": {"Yes": 0.9898860712709013, "yes": 0.008291516461215204}, "ground_truth": 1}, {"key": "40913011", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.788428709086595, "res": {"Yes": 0.788428709086595, "yes": 0.1452373323195044}, "ground_truth": 0}, {"key": "40913011", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8859019946629264, "res": {"Yes": 0.8859019946629264, "yes": 0.09324336274394951}, "ground_truth": 0}, {"key": "29642545", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8420045339631274, "res": {"Yes": 0.8420045339631274, "yes": 0.14774264558636654}, "ground_truth": 0}, {"key": "29642545", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8857177107818307, "res": {"Yes": 0.8857177107818307, "yes": 0.10659984726900129}, "ground_truth": 0}, {"key": "29642545", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8468656039012296, "res": {"Yes": 0.8468656039012296, "yes": 0.14465657413894112}, "ground_truth": 1}, {"key": "29642545", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7596503327969354, "res": {"Yes": 0.7596503327969354, "yes": 0.2184988341846687}, "ground_truth": 0}, {"key": "29642545", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.788903549175278, "res": {"Yes": 0.788903549175278, "yes": 0.19160431362469132}, "ground_truth": 0}, {"key": "35969159", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.20137150365389436, "res": {"yes": 0.7557638268165147, "Yes": 0.20137150365389436}, "ground_truth": 0}, {"key": "35969159", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6112601946305188, "res": {"Yes": 0.6112601946305188, "yes": 0.37516338671268584}, "ground_truth": 0}, {"key": "35969159", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6461996158934961, "res": {"Yes": 0.6461996158934961, "yes": 0.3373600571973122}, "ground_truth": 1}, {"key": "35969159", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3445001682980831, "res": {"yes": 0.6446719475636201, "Yes": 0.3445001682980831}, "ground_truth": 0}, {"key": "35969159", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7568492929310513, "res": {"Yes": 0.7568492929310513, "yes": 0.206942065852047}, "ground_truth": 0}, {"key": "37081669", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.97550999006761, "res": {"Yes": 0.97550999006761, "yes": 0.019346559092423225}, "ground_truth": 0}, {"key": "37081669", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8432327430993798, "res": {"Yes": 0.8432327430993798, "yes": 0.1363314262624868}, "ground_truth": 0}, {"key": "37081669", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8730712337194849, "res": {"Yes": 0.8730712337194849, "yes": 0.11105572240309342}, "ground_truth": 1}, {"key": "37081669", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8767391897056974, "res": {"Yes": 0.8767391897056974, "yes": 0.10432013361353736}, "ground_truth": 0}, {"key": "37081669", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.920900352018527, "res": {"Yes": 0.920900352018527, "yes": 0.0607411491730726}, "ground_truth": 0}, {"key": "40048022", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8603874310146055, "res": {"Yes": 0.8603874310146055, "yes": 0.13322109001011337}, "ground_truth": 0}, {"key": "40048022", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8185700683758802, "res": {"Yes": 0.8185700683758802, "yes": 0.1727694210133594}, "ground_truth": 0}, {"key": "40048022", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8604634237883042, "res": {"Yes": 0.8604634237883042, "yes": 0.13566799379266253}, "ground_truth": 1}, {"key": "40048022", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.981711984945228, "res": {"Yes": 0.981711984945228, "yes": 0.013263253000370604}, "ground_truth": 0}, {"key": "40048022", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9657594261868239, "res": {"Yes": 0.9657594261868239, "yes": 0.02738467852242855}, "ground_truth": 0}, {"key": "32884004", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6296603197523095, "res": {"Yes": 0.6296603197523095, "yes": 0.2357976870205809}, "ground_truth": 0}, {"key": "32884004", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7076075336425524, "res": {"Yes": 0.7076075336425524, "yes": 0.20854634171872286}, "ground_truth": 0}, {"key": "32884004", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6011846822824547, "res": {"Yes": 0.6011846822824547, "yes": 0.17072303737972622}, "ground_truth": 1}, {"key": "32884004", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7230278337390501, "res": {"Yes": 0.7230278337390501, "yes": 0.20886087887026233}, "ground_truth": 0}, {"key": "32884004", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7832120126303262, "res": {"Yes": 0.7832120126303262, "yes": 0.17607076339606303}, "ground_truth": 0}, {"key": "39022490", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9430371062228343, "res": {"Yes": 0.9430371062228343, "yes": 0.03948012098336285}, "ground_truth": 0}, {"key": "39022490", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9829781804289432, "res": {"Yes": 0.9829781804289432, "yes": 0.010570296800253825}, "ground_truth": 0}, {"key": "39022490", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.863693170488448, "res": {"Yes": 0.863693170488448, "yes": 0.13259330403221822}, "ground_truth": 1}, {"key": "39022490", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7133536134274459, "res": {"Yes": 0.7133536134274459, "yes": 0.27845104946511934}, "ground_truth": 0}, {"key": "39022490", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9560043889398581, "res": {"Yes": 0.9560043889398581, "yes": 0.03186137556927202}, "ground_truth": 0}, {"key": "35159385", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9638813672144696, "res": {"Yes": 0.9638813672144696, "yes": 0.03053775475705006}, "ground_truth": 0}, {"key": "35159385", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6806565717235757, "res": {"Yes": 0.6806565717235757, "yes": 0.31128331524485564}, "ground_truth": 0}, {"key": "35159385", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7686495846397375, "res": {"Yes": 0.7686495846397375, "yes": 0.2253534996778535}, "ground_truth": 1}, {"key": "35159385", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7694037998174202, "res": {"Yes": 0.7694037998174202, "yes": 0.22107262755086377}, "ground_truth": 0}, {"key": "35159385", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7580809233508843, "res": {"Yes": 0.7580809233508843, "yes": 0.23517153381040934}, "ground_truth": 0}, {"key": "34363669", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9743732482410621, "res": {"Yes": 0.9743732482410621, " Yes": 0.012403891809607602}, "ground_truth": 0}, {"key": "34363669", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.790346296158858, "res": {"Yes": 0.790346296158858, "yes": 0.1796813813837133}, "ground_truth": 0}, {"key": "34363669", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9802108464696682, "res": {"Yes": 0.9802108464696682, "yes": 0.014954406102385714}, "ground_truth": 1}, {"key": "34363669", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9872931504964607, "res": {"Yes": 0.9872931504964607, " Yes": 0.007394500727304202}, "ground_truth": 0}, {"key": "34363669", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7863193432058637, "res": {"Yes": 0.7863193432058637, "yes": 0.20205001767081984}, "ground_truth": 0}, {"key": "36119687", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.791308717299215, "res": {"Yes": 0.791308717299215, "yes": 0.17858957194684447}, "ground_truth": 0}, {"key": "36119687", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8751047460655217, "res": {"Yes": 0.8751047460655217, "yes": 0.12074666570980767}, "ground_truth": 0}, {"key": "36119687", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8963439508063961, "res": {"Yes": 0.8963439508063961, "yes": 0.1024295124890264}, "ground_truth": 1}, {"key": "36119687", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.994233310156827, "res": {"Yes": 0.994233310156827, "yes": 0.004627895730718088}, "ground_truth": 0}, {"key": "36119687", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9963320872860782, "res": {"Yes": 0.9963320872860782, "yes": 0.002843217345176152}, "ground_truth": 0}, {"key": "35217446", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9769141801221877, "res": {"Yes": 0.9769141801221877, "yes": 0.01812489266844315}, "ground_truth": 0}, {"key": "35217446", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7757184475175097, "res": {"Yes": 0.7757184475175097, "yes": 0.22057136183815487}, "ground_truth": 0}, {"key": "35217446", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.978533880313402, "res": {"Yes": 0.978533880313402, "yes": 0.015204436784583376}, "ground_truth": 1}, {"key": "35217446", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8074051597572925, "res": {"Yes": 0.8074051597572925, "yes": 0.1874976813090254}, "ground_truth": 0}, {"key": "35217446", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6882429007099359, "res": {"Yes": 0.6882429007099359, "yes": 0.3071186573632352}, "ground_truth": 0}, {"key": "39049331", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9083678602417422, "res": {"Yes": 0.9083678602417422, "yes": 0.08211402901168229}, "ground_truth": 0}, {"key": "39049331", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8437573210778784, "res": {"Yes": 0.8437573210778784, "yes": 0.14792289806272055}, "ground_truth": 0}, {"key": "39049331", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7910183049124543, "res": {"Yes": 0.7910183049124543, "yes": 0.20226839002158087}, "ground_truth": 1}, {"key": "39049331", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7018736057192781, "res": {"Yes": 0.7018736057192781, "yes": 0.2869879930960289}, "ground_truth": 0}, {"key": "39049331", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9917231909674571, "res": {"Yes": 0.9917231909674571, "yes": 0.0046564496448932}, "ground_truth": 0}, {"key": "36472242", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7028713537538607, "res": {"Yes": 0.7028713537538607, "yes": 0.2929478418427625}, "ground_truth": 0}, {"key": "36472242", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9718564651739972, "res": {"Yes": 0.9718564651739972, "yes": 0.021964754027519528}, "ground_truth": 0}, {"key": "36472242", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.972614806987221, "res": {"Yes": 0.972614806987221, "yes": 0.022991125162892635}, "ground_truth": 1}, {"key": "36472242", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7731347923359957, "res": {"Yes": 0.7731347923359957, "yes": 0.22324010114156242}, "ground_truth": 0}, {"key": "36472242", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8559782603074425, "res": {"Yes": 0.8559782603074425, "yes": 0.13455238179951184}, "ground_truth": 0}, {"key": "31854721", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9883518096252387, "res": {"Yes": 0.9883518096252387, "yes": 0.007135082966642928}, "ground_truth": 0}, {"key": "31854721", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9889069942238872, "res": {"Yes": 0.9889069942238872, "yes": 0.008738602492351597}, "ground_truth": 0}, {"key": "31854721", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.989642344310375, "res": {"Yes": 0.989642344310375, "yes": 0.00858292684093168}, "ground_truth": 1}, {"key": "31854721", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9873543892532676, "res": {"Yes": 0.9873543892532676, "yes": 0.010539364835197445}, "ground_truth": 0}, {"key": "31854721", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6865822158886606, "res": {"Yes": 0.6865822158886606, "yes": 0.30828536969010706}, "ground_truth": 0}, {"key": "18725849", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9160006543063679, "res": {"Yes": 0.9160006543063679, "yes": 0.07966220671043753}, "ground_truth": 0}, {"key": "18725849", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8154703790670964, "res": {"Yes": 0.8154703790670964, "yes": 0.17595883558883746}, "ground_truth": 0}, {"key": "18725849", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9092900761815236, "res": {"Yes": 0.9092900761815236, "yes": 0.08630782611587895}, "ground_truth": 1}, {"key": "18725849", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8462303289406009, "res": {"Yes": 0.8462303289406009, "yes": 0.14256038708793947}, "ground_truth": 0}, {"key": "18725849", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8901854652894798, "res": {"Yes": 0.8901854652894798, "yes": 0.10292653475968853}, "ground_truth": 0}, {"key": "36883179", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6528907685747594, "res": {"Yes": 0.6528907685747594, "yes": 0.34211655359269966}, "ground_truth": 0}, {"key": "36883179", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8283884186022078, "res": {"Yes": 0.8283884186022078, "yes": 0.16802051398160028}, "ground_truth": 0}, {"key": "36883179", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7852117596985432, "res": {"Yes": 0.7852117596985432, "yes": 0.21017452962063257}, "ground_truth": 1}, {"key": "36883179", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7644941390696199, "res": {"Yes": 0.7644941390696199, "yes": 0.23080497248912718}, "ground_truth": 0}, {"key": "36883179", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7383855982343644, "res": {"Yes": 0.7383855982343644, "yes": 0.253822011429972}, "ground_truth": 0}, {"key": "34266359", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.5081318946323319, "res": {"Yes": 0.5081318946323319, "yes": 0.4859752656991091}, "ground_truth": 0}, {"key": "34266359", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8170397105328236, "res": {"Yes": 0.8170397105328236, "yes": 0.1769650187714671}, "ground_truth": 0}, {"key": "34266359", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7870737772823535, "res": {"Yes": 0.7870737772823535, "yes": 0.20240172882091392}, "ground_truth": 1}, {"key": "34266359", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7967668958084207, "res": {"Yes": 0.7967668958084207, "yes": 0.1909130099443055}, "ground_truth": 0}, {"key": "34266359", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.586481567115834, "res": {"Yes": 0.586481567115834, "yes": 0.4094301189279229}, "ground_truth": 0}, {"key": "31920289", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.797895892954289, "res": {"Yes": 0.797895892954289, "yes": 0.1832138736644511}, "ground_truth": 0}, {"key": "31920289", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7196037109189526, "res": {"Yes": 0.7196037109189526, "yes": 0.25899334826161097}, "ground_truth": 0}, {"key": "31920289", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.795988611819117, "res": {"Yes": 0.795988611819117, "yes": 0.1858832854144194}, "ground_truth": 1}, {"key": "31920289", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7731724217220424, "res": {"Yes": 0.7731724217220424, "yes": 0.20845804104199298}, "ground_truth": 0}, {"key": "31920289", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7729267852126074, "res": {"Yes": 0.7729267852126074, "yes": 0.20693954117424418}, "ground_truth": 0}, {"key": "36292997", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8679011528600546, "res": {"Yes": 0.8679011528600546, "yes": 0.12873554726393135}, "ground_truth": 0}, {"key": "36292997", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7716097515042738, "res": {"Yes": 0.7716097515042738, "yes": 0.22602770471240946}, "ground_truth": 0}, {"key": "36292997", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6806082059860441, "res": {"Yes": 0.6806082059860441, "yes": 0.3148488804103099}, "ground_truth": 1}, {"key": "36292997", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8142172554528212, "res": {"Yes": 0.8142172554528212, "yes": 0.18249797534885615}, "ground_truth": 0}, {"key": "36292997", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8444709857875288, "res": {"Yes": 0.8444709857875288, "yes": 0.14997869924543253}, "ground_truth": 0}, {"key": "30412533", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9681606352178513, "res": {"Yes": 0.9681606352178513, "yes": 0.0267384412988559}, "ground_truth": 0}, {"key": "30412533", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8165298489863104, "res": {"Yes": 0.8165298489863104, "yes": 0.1811237100941594}, "ground_truth": 0}, {"key": "30412533", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6869185175365192, "res": {"Yes": 0.6869185175365192, "yes": 0.3087695108500335}, "ground_truth": 1}, {"key": "30412533", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.654678548146877, "res": {"Yes": 0.654678548146877, "yes": 0.3422680763472926}, "ground_truth": 0}, {"key": "30412533", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6063073156020039, "res": {"Yes": 0.6063073156020039, "yes": 0.3888253141907566}, "ground_truth": 0}, {"key": "40433191", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7176565119603459, "res": {"Yes": 0.7176565119603459, "yes": 0.27505876864043716}, "ground_truth": 0}, {"key": "40433191", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6385385688056772, "res": {"Yes": 0.6385385688056772, "yes": 0.3574880657223897}, "ground_truth": 0}, {"key": "40433191", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6730588290551434, "res": {"Yes": 0.6730588290551434, "yes": 0.2412628683197709}, "ground_truth": 1}, {"key": "40433191", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.675638958954416, "res": {"Yes": 0.675638958954416, "yes": 0.2943653194051749}, "ground_truth": 0}, {"key": "40433191", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6046606423876597, "res": {"Yes": 0.6046606423876597, "yes": 0.34318730573638395}, "ground_truth": 0}, {"key": "34565591", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9142571421056038, "res": {"Yes": 0.9142571421056038, "yes": 0.07962971912261434}, "ground_truth": 0}, {"key": "34565591", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9029362825634508, "res": {"Yes": 0.9029362825634508, "yes": 0.09306107322674646}, "ground_truth": 0}, {"key": "34565591", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7985261363924024, "res": {"Yes": 0.7985261363924024, "yes": 0.19642071796600996}, "ground_truth": 1}, {"key": "34565591", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9105461396771057, "res": {"Yes": 0.9105461396771057, "yes": 0.08435591061074578}, "ground_truth": 0}, {"key": "34565591", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8931071915312042, "res": {"Yes": 0.8931071915312042, "yes": 0.10277450115173012}, "ground_truth": 0}, {"key": "36062480", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9061144413140908, "res": {"Yes": 0.9061144413140908, "yes": 0.0909525078915138}, "ground_truth": 0}, {"key": "36062480", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9754804920670176, "res": {"Yes": 0.9754804920670176, "yes": 0.022481831159522004}, "ground_truth": 0}, {"key": "36062480", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9904663778931749, "res": {"Yes": 0.9904663778931749, "yes": 0.008034905828685891}, "ground_truth": 1}, {"key": "36062480", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8996107043114047, "res": {"Yes": 0.8996107043114047, "yes": 0.09727077447662688}, "ground_truth": 0}, {"key": "36062480", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9805991711629134, "res": {"Yes": 0.9805991711629134, "yes": 0.016583539035032718}, "ground_truth": 0}, {"key": "37276883", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8784640449211558, "res": {"Yes": 0.8784640449211558, "yes": 0.11907951687371965}, "ground_truth": 0}, {"key": "37276883", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9189831058349939, "res": {"Yes": 0.9189831058349939, "yes": 0.07812034161744918}, "ground_truth": 0}, {"key": "37276883", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8076941171658438, "res": {"Yes": 0.8076941171658438, "yes": 0.1887323321004557}, "ground_truth": 1}, {"key": "37276883", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7781312711906991, "res": {"Yes": 0.7781312711906991, "yes": 0.2141794636959665}, "ground_truth": 0}, {"key": "37276883", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9256664207572837, "res": {"Yes": 0.9256664207572837, "yes": 0.07219413276702753}, "ground_truth": 0}, {"key": "38509260", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7666768524629671, "res": {"Yes": 0.7666768524629671, "yes": 0.19119691756820367}, "ground_truth": 0}, {"key": "38509260", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8084944536807067, "res": {"Yes": 0.8084944536807067, "yes": 0.14952664120310993}, "ground_truth": 0}, {"key": "38509260", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6667781213876874, "res": {"Yes": 0.6667781213876874, "yes": 0.2726664569990322}, "ground_truth": 1}, {"key": "38509260", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7855732444451654, "res": {"Yes": 0.7855732444451654, "yes": 0.15896120503877154}, "ground_truth": 0}, {"key": "38509260", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9923784355455378, "res": {"Yes": 0.9923784355455378, "yes": 0.005565764094978736}, "ground_truth": 0}, {"key": "37139607", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9442127834875442, "res": {"Yes": 0.9442127834875442, "yes": 0.045855006189106144}, "ground_truth": 0}, {"key": "37139607", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8897621848465183, "res": {"Yes": 0.8897621848465183, "yes": 0.09210874399494481}, "ground_truth": 0}, {"key": "37139607", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8142674534939586, "res": {"Yes": 0.8142674534939586, "yes": 0.11932125517372429}, "ground_truth": 1}, {"key": "37139607", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9412681398205803, "res": {"Yes": 0.9412681398205803, "yes": 0.04643119683922811}, "ground_truth": 0}, {"key": "37139607", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9434170041193477, "res": {"Yes": 0.9434170041193477, "yes": 0.04519582787746941}, "ground_truth": 0}, {"key": "37092824", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9207760149221547, "res": {"Yes": 0.9207760149221547, "yes": 0.06645893771555084}, "ground_truth": 0}, {"key": "37092824", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.917597479509479, "res": {"Yes": 0.917597479509479, "yes": 0.06717292596900167}, "ground_truth": 0}, {"key": "37092824", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9152955540247095, "res": {"Yes": 0.9152955540247095, "yes": 0.07334514251258407}, "ground_truth": 1}, {"key": "37092824", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8950729675873073, "res": {"Yes": 0.8950729675873073, "yes": 0.09313150056442422}, "ground_truth": 0}, {"key": "37092824", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9298605186771415, "res": {"Yes": 0.9298605186771415, "yes": 0.056578144948102}, "ground_truth": 0}, {"key": "32191802", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.887861559151805, "res": {"Yes": 0.887861559151805, "yes": 0.10751915083268192}, "ground_truth": 0}, {"key": "32191802", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8191102408294793, "res": {"Yes": 0.8191102408294793, "yes": 0.17559055258451303}, "ground_truth": 0}, {"key": "32191802", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.809461352044884, "res": {"Yes": 0.809461352044884, "yes": 0.18194904566091882}, "ground_truth": 1}, {"key": "32191802", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9041030696406249, "res": {"Yes": 0.9041030696406249, "yes": 0.09095925681798141}, "ground_truth": 0}, {"key": "32191802", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9104118494885223, "res": {"Yes": 0.9104118494885223, "yes": 0.0862098274732984}, "ground_truth": 0}, {"key": "39396038", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.970749702339323, "res": {"Yes": 0.970749702339323, "yes": 0.02181386722896049}, "ground_truth": 0}, {"key": "39396038", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9747211781597817, "res": {"Yes": 0.9747211781597817, "yes": 0.021688344121579887}, "ground_truth": 0}, {"key": "39396038", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9765850399254644, "res": {"Yes": 0.9765850399254644, "yes": 0.018113350788292456}, "ground_truth": 1}, {"key": "39396038", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7034906944644271, "res": {"Yes": 0.7034906944644271, "yes": 0.2924909947393674}, "ground_truth": 0}, {"key": "39396038", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7885480310314084, "res": {"Yes": 0.7885480310314084, "yes": 0.20882610642881652}, "ground_truth": 0}, {"key": "39076884", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9834431663111909, "res": {"Yes": 0.9834431663111909, "yes": 0.009869135677894512}, "ground_truth": 0}, {"key": "39076884", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7738509229121441, "res": {"Yes": 0.7738509229121441, "yes": 0.12011819050997419}, "ground_truth": 0}, {"key": "39076884", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8870549927532247, "res": {"Yes": 0.8870549927532247, "yes": 0.10996931573132142}, "ground_truth": 1}, {"key": "39076884", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9777360694560426, "res": {"Yes": 0.9777360694560426, "yes": 0.010035692406110642}, "ground_truth": 0}, {"key": "39076884", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9751723893729357, "res": {"Yes": 0.9751723893729357, "yes": 0.014347401977953093}, "ground_truth": 0}, {"key": "27763432", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8106818624024542, "res": {"Yes": 0.8106818624024542, "yes": 0.18097038136029747}, "ground_truth": 0}, {"key": "27763432", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9145154562334014, "res": {"Yes": 0.9145154562334014, "yes": 0.07534707300394236}, "ground_truth": 0}, {"key": "27763432", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9099845023507965, "res": {"Yes": 0.9099845023507965, "yes": 0.08405269125075293}, "ground_truth": 1}, {"key": "27763432", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9730559362604192, "res": {"Yes": 0.9730559362604192, "yes": 0.020000314111029538}, "ground_truth": 0}, {"key": "27763432", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8381720326588842, "res": {"Yes": 0.8381720326588842, "yes": 0.1536745537575023}, "ground_truth": 0}, {"key": "37806929", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8882900868107324, "res": {"Yes": 0.8882900868107324, "yes": 0.10117128575385824}, "ground_truth": 0}, {"key": "37806929", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9173289747233475, "res": {"Yes": 0.9173289747233475, "yes": 0.07342065350797383}, "ground_truth": 0}, {"key": "37806929", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9048310480029591, "res": {"Yes": 0.9048310480029591, "yes": 0.08433369418992197}, "ground_truth": 1}, {"key": "37806929", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8605478135742606, "res": {"Yes": 0.8605478135742606, "yes": 0.12624197212211255}, "ground_truth": 0}, {"key": "37806929", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8854973539167684, "res": {"Yes": 0.8854973539167684, "yes": 0.10474328785390234}, "ground_truth": 0}, {"key": "32334186", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8431242597535359, "res": {"Yes": 0.8431242597535359, "yes": 0.15141084637849028}, "ground_truth": 0}, {"key": "32334186", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8339865865717744, "res": {"Yes": 0.8339865865717744, "yes": 0.16108820610728025}, "ground_truth": 0}, {"key": "32334186", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9846185954868012, "res": {"Yes": 0.9846185954868012, "yes": 0.011035300589476276}, "ground_truth": 1}, {"key": "32334186", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9819159469777833, "res": {"Yes": 0.9819159469777833, "yes": 0.013777511741285348}, "ground_truth": 0}, {"key": "32334186", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.985496764992526, "res": {"Yes": 0.985496764992526, "yes": 0.01210802891179134}, "ground_truth": 0}, {"key": "36187324", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6144542521632661, "res": {"Yes": 0.6144542521632661, "yes": 0.33652144579382165}, "ground_truth": 0}, {"key": "36187324", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6239677903336595, "res": {"Yes": 0.6239677903336595, "yes": 0.3593397712721535}, "ground_truth": 0}, {"key": "36187324", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7822258338806188, "res": {"Yes": 0.7822258338806188, "yes": 0.19540489275482856}, "ground_truth": 1}, {"key": "36187324", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9718530879786486, "res": {"Yes": 0.9718530879786486, "yes": 0.02352936649252288}, "ground_truth": 0}, {"key": "36187324", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.5452365182608685, "res": {"Yes": 0.5452365182608685, "yes": 0.42704657361649695}, "ground_truth": 0}, {"key": "35306009", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8235461579451677, "res": {"Yes": 0.8235461579451677, "yes": 0.11617335079269508}, "ground_truth": 0}, {"key": "35306009", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7864687030222248, "res": {"Yes": 0.7864687030222248, "yes": 0.17143124440850033}, "ground_truth": 0}, {"key": "35306009", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5590209661504881, "res": {"Yes": 0.5590209661504881, "yes": 0.360167059862022}, "ground_truth": 1}, {"key": "35306009", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9964040403046844, "res": {"Yes": 0.9964040403046844, "yes": 0.0016899076441033375}, "ground_truth": 0}, {"key": "35306009", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9342194043895025, "res": {"Yes": 0.9342194043895025, "yes": 0.06322688327483858}, "ground_truth": 0}, {"key": "39490050", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8281286684425025, "res": {"Yes": 0.8281286684425025, "yes": 0.15831964816392768}, "ground_truth": 0}, {"key": "39490050", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9149204110031794, "res": {"Yes": 0.9149204110031794, "yes": 0.06999010528468459}, "ground_truth": 0}, {"key": "39490050", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8859375377639421, "res": {"Yes": 0.8859375377639421, "yes": 0.09287419904427259}, "ground_truth": 1}, {"key": "39490050", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8663381354398951, "res": {"Yes": 0.8663381354398951, "yes": 0.1184399497516922}, "ground_truth": 0}, {"key": "39490050", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.903959767100642, "res": {"Yes": 0.903959767100642, "yes": 0.08096309867724193}, "ground_truth": 0}, {"key": "38072149", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.887939845356672, "res": {"Yes": 0.887939845356672, "yes": 0.1068028144793894}, "ground_truth": 0}, {"key": "38072149", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9645637619582839, "res": {"Yes": 0.9645637619582839, "yes": 0.030066090612340623}, "ground_truth": 0}, {"key": "38072149", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9643274756761924, "res": {"Yes": 0.9643274756761924, "yes": 0.027117030954012248}, "ground_truth": 1}, {"key": "38072149", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7981181538690129, "res": {"Yes": 0.7981181538690129, "yes": 0.19720795458007245}, "ground_truth": 0}, {"key": "38072149", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9112510895451603, "res": {"Yes": 0.9112510895451603, "yes": 0.08204765024029438}, "ground_truth": 0}, {"key": "35899689", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8674684818348497, "res": {"Yes": 0.8674684818348497, "yes": 0.12857053689478137}, "ground_truth": 0}, {"key": "35899689", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8306373261632942, "res": {"Yes": 0.8306373261632942, "yes": 0.16536242479210017}, "ground_truth": 0}, {"key": "35899689", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8831230620225932, "res": {"Yes": 0.8831230620225932, "yes": 0.11389829248425228}, "ground_truth": 1}, {"key": "35899689", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8968000782226542, "res": {"Yes": 0.8968000782226542, "yes": 0.09845492005309597}, "ground_truth": 0}, {"key": "35899689", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8952444227026164, "res": {"Yes": 0.8952444227026164, "yes": 0.09874844718829831}, "ground_truth": 0}, {"key": "27994518", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7675123181060519, "res": {"Yes": 0.7675123181060519, "yes": 0.2250771605025026}, "ground_truth": 0}, {"key": "27994518", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8549898597989285, "res": {"Yes": 0.8549898597989285, "yes": 0.13757694753048058}, "ground_truth": 0}, {"key": "27994518", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.578718136975045, "res": {"Yes": 0.578718136975045, "yes": 0.4128184380473058}, "ground_truth": 1}, {"key": "27994518", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6536327039793665, "res": {"Yes": 0.6536327039793665, "yes": 0.3407004954855544}, "ground_truth": 0}, {"key": "27994518", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7452180118914803, "res": {"Yes": 0.7452180118914803, "yes": 0.24957819651708427}, "ground_truth": 0}, {"key": "10615479", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8924341942316593, "res": {"Yes": 0.8924341942316593, "yes": 0.09765238480416553}, "ground_truth": 0}, {"key": "10615479", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8900827927684808, "res": {"Yes": 0.8900827927684808, "yes": 0.0967629408946239}, "ground_truth": 0}, {"key": "10615479", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9154349879547087, "res": {"Yes": 0.9154349879547087, "yes": 0.07389393682970963}, "ground_truth": 1}, {"key": "10615479", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9216003769718114, "res": {"Yes": 0.9216003769718114, "yes": 0.06844942004256056}, "ground_truth": 0}, {"key": "10615479", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8787700145124923, "res": {"Yes": 0.8787700145124923, "yes": 0.10598496866208819}, "ground_truth": 0}, {"key": "40186667", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.37763280909207847, "res": {"yes": 0.5885715202084041, "Yes": 0.37763280909207847}, "ground_truth": 0}, {"key": "40186667", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.42332024753924197, "res": {"yes": 0.5333472852077519, "Yes": 0.42332024753924197}, "ground_truth": 0}, {"key": "40186667", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3798489899926923, "res": {"yes": 0.5551132122511042, "Yes": 0.3798489899926923}, "ground_truth": 1}, {"key": "40186667", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.47832024605240964, "res": {"Yes": 0.47832024605240964, "yes": 0.4745974774984365}, "ground_truth": 0}, {"key": "40186667", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8151886745650886, "res": {"Yes": 0.8151886745650886, "yes": 0.1825250053005238}, "ground_truth": 0}, {"key": "38622886", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8223183268086718, "res": {"Yes": 0.8223183268086718, "yes": 0.17024205529079126}, "ground_truth": 0}, {"key": "38622886", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9587915524549391, "res": {"Yes": 0.9587915524549391, "yes": 0.0343520356520944}, "ground_truth": 0}, {"key": "38622886", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8288563499363933, "res": {"Yes": 0.8288563499363933, "yes": 0.16486426170208304}, "ground_truth": 1}, {"key": "38622886", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9652792319170531, "res": {"Yes": 0.9652792319170531, "yes": 0.029061845974238706}, "ground_truth": 0}, {"key": "38622886", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9778648565326379, "res": {"Yes": 0.9778648565326379, "yes": 0.01779323411218035}, "ground_truth": 0}, {"key": "40686943", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.44318598781045376, "res": {"yes": 0.5161631558352658, "Yes": 0.44318598781045376}, "ground_truth": 0}, {"key": "40686943", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.5072431726010566, "res": {"Yes": 0.5072431726010566, "yes": 0.46310141963310997}, "ground_truth": 0}, {"key": "40686943", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3874615542304575, "res": {"yes": 0.6003508768150633, "Yes": 0.3874615542304575}, "ground_truth": 1}, {"key": "40686943", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5114810715717569, "res": {"Yes": 0.5114810715717569, "yes": 0.4490661700720395}, "ground_truth": 0}, {"key": "40686943", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.5538257070208664, "res": {"Yes": 0.5538257070208664, "yes": 0.41627919583626727}, "ground_truth": 0}, {"key": "30604567", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8725057802492434, "res": {"Yes": 0.8725057802492434, "yes": 0.1199008653152678}, "ground_truth": 0}, {"key": "30604567", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8927125521615683, "res": {"Yes": 0.8927125521615683, "yes": 0.10178946168682931}, "ground_truth": 0}, {"key": "30604567", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9317892690797497, "res": {"Yes": 0.9317892690797497, "yes": 0.061748499707748636}, "ground_truth": 1}, {"key": "30604567", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9171898539882388, "res": {"Yes": 0.9171898539882388, "yes": 0.07474987791177244}, "ground_truth": 0}, {"key": "30604567", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.940118450957476, "res": {"Yes": 0.940118450957476, "yes": 0.05443040313924896}, "ground_truth": 0}, {"key": "35440903", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.833104253820521, "res": {"Yes": 0.833104253820521, "yes": 0.16281216575188964}, "ground_truth": 0}, {"key": "35440903", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7804367110614161, "res": {"Yes": 0.7804367110614161, "yes": 0.21485579431646182}, "ground_truth": 0}, {"key": "35440903", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8368766005556815, "res": {"Yes": 0.8368766005556815, "yes": 0.15844947598218959}, "ground_truth": 1}, {"key": "35440903", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6312934304792647, "res": {"Yes": 0.6312934304792647, "yes": 0.364497677243334}, "ground_truth": 0}, {"key": "35440903", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7729924481320042, "res": {"Yes": 0.7729924481320042, "yes": 0.22420488051586235}, "ground_truth": 0}, {"key": "37219533", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7452529410782568, "res": {"Yes": 0.7452529410782568, "yes": 0.22881508287334398}, "ground_truth": 0}, {"key": "37219533", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8316954922876194, "res": {"Yes": 0.8316954922876194, "yes": 0.15951308858977573}, "ground_truth": 0}, {"key": "37219533", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.973102108860031, "res": {"Yes": 0.973102108860031, "yes": 0.020020442548894426}, "ground_truth": 1}, {"key": "37219533", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6212295932488359, "res": {"Yes": 0.6212295932488359, "yes": 0.3424773345383449}, "ground_truth": 0}, {"key": "37219533", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.717214901599338, "res": {"Yes": 0.717214901599338, "yes": 0.26727336644226196}, "ground_truth": 0}, {"key": "40178965", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9452935980254483, "res": {"Yes": 0.9452935980254483, "yes": 0.045090901202506854}, "ground_truth": 0}, {"key": "40178965", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9505797341866241, "res": {"Yes": 0.9505797341866241, "yes": 0.04717013121230954}, "ground_truth": 0}, {"key": "40178965", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6197805594278293, "res": {"Yes": 0.6197805594278293, "yes": 0.35182999105986035}, "ground_truth": 1}, {"key": "40178965", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7448378018080294, "res": {"Yes": 0.7448378018080294, "yes": 0.20119051178394876}, "ground_truth": 0}, {"key": "40178965", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7028235460703361, "res": {"Yes": 0.7028235460703361, "yes": 0.21543574089645165}, "ground_truth": 0}, {"key": "13750468", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9335731164506118, "res": {"Yes": 0.9335731164506118, "yes": 0.06068265932904103}, "ground_truth": 0}, {"key": "13750468", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9386901715237624, "res": {"Yes": 0.9386901715237624, "yes": 0.05581605092348301}, "ground_truth": 0}, {"key": "13750468", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9201010573738427, "res": {"Yes": 0.9201010573738427, "yes": 0.07449581721037384}, "ground_truth": 1}, {"key": "13750468", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.965446427931964, "res": {"Yes": 0.965446427931964, "yes": 0.030787078692431306}, "ground_truth": 0}, {"key": "13750468", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9418006255180404, "res": {"Yes": 0.9418006255180404, "yes": 0.05190594230941174}, "ground_truth": 0}, {"key": "17754949", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8813239090031898, "res": {"Yes": 0.8813239090031898, "yes": 0.11011753319732516}, "ground_truth": 0}, {"key": "17754949", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7306563786472602, "res": {"Yes": 0.7306563786472602, "yes": 0.261827209111395}, "ground_truth": 0}, {"key": "17754949", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9824936090928288, "res": {"Yes": 0.9824936090928288, "yes": 0.013808402435068206}, "ground_truth": 1}, {"key": "17754949", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8607363286127592, "res": {"Yes": 0.8607363286127592, "yes": 0.1319422041008371}, "ground_truth": 0}, {"key": "17754949", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7872599186260218, "res": {"Yes": 0.7872599186260218, "yes": 0.2071156181201203}, "ground_truth": 0}, {"key": "36675623", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8116389666877434, "res": {"Yes": 0.8116389666877434, "yes": 0.13581684891700332}, "ground_truth": 0}, {"key": "36675623", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6374294338986788, "res": {"Yes": 0.6374294338986788, "yes": 0.3227683808955598}, "ground_truth": 0}, {"key": "36675623", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9461547658945184, "res": {"Yes": 0.9461547658945184, "yes": 0.0511124076546889}, "ground_truth": 1}, {"key": "36675623", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8388152243198873, "res": {"Yes": 0.8388152243198873, "yes": 0.15437054967988198}, "ground_truth": 0}, {"key": "36675623", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9160034572726586, "res": {"Yes": 0.9160034572726586, "yes": 0.08323378032787376}, "ground_truth": 0}, {"key": "40035440", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.837768307018567, "res": {"Yes": 0.837768307018567, "yes": 0.1345612760058991}, "ground_truth": 0}, {"key": "40035440", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7197669499449013, "res": {"Yes": 0.7197669499449013, "yes": 0.2375178738555619}, "ground_truth": 0}, {"key": "40035440", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.921597234319884, "res": {"Yes": 0.921597234319884, "yes": 0.06359579036857561}, "ground_truth": 1}, {"key": "40035440", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9941716888894049, "res": {"Yes": 0.9941716888894049, "yes": 0.004186798157739511}, "ground_truth": 0}, {"key": "40035440", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9931346095152082, "res": {"Yes": 0.9931346095152082, "yes": 0.0048039033193410415}, "ground_truth": 0}, {"key": "37685909", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8881153551310129, "res": {"Yes": 0.8881153551310129, "yes": 0.10396531968705272}, "ground_truth": 0}, {"key": "37685909", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8346878886955768, "res": {"Yes": 0.8346878886955768, "yes": 0.16106691165402723}, "ground_truth": 0}, {"key": "37685909", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8945159365838063, "res": {"Yes": 0.8945159365838063, "yes": 0.09709651453035782}, "ground_truth": 1}, {"key": "37685909", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8753212387502198, "res": {"Yes": 0.8753212387502198, "yes": 0.11759882912349989}, "ground_truth": 0}, {"key": "37685909", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8158155428501638, "res": {"Yes": 0.8158155428501638, "yes": 0.1736577275608263}, "ground_truth": 0}, {"key": "36938787", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9324014748906333, "res": {"Yes": 0.9324014748906333, "yes": 0.06365447818827297}, "ground_truth": 0}, {"key": "36938787", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9726878910369001, "res": {"Yes": 0.9726878910369001, "yes": 0.024087000074946227}, "ground_truth": 0}, {"key": "36938787", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9731321334156843, "res": {"Yes": 0.9731321334156843, "yes": 0.022692575011623514}, "ground_truth": 1}, {"key": "36938787", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9745765597608707, "res": {"Yes": 0.9745765597608707, "yes": 0.022158602127095077}, "ground_truth": 0}, {"key": "36938787", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9753283982275984, "res": {"Yes": 0.9753283982275984, "yes": 0.020745878054799903}, "ground_truth": 0}, {"key": "39398068", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7583741147643003, "res": {"Yes": 0.7583741147643003, "yes": 0.23740702638585834}, "ground_truth": 0}, {"key": "39398068", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8152955528060396, "res": {"Yes": 0.8152955528060396, "yes": 0.17801910553209674}, "ground_truth": 0}, {"key": "39398068", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5920245176537785, "res": {"Yes": 0.5920245176537785, "yes": 0.40267934649958015}, "ground_truth": 1}, {"key": "39398068", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.812120151450727, "res": {"Yes": 0.812120151450727, "yes": 0.18205848452157078}, "ground_truth": 0}, {"key": "39398068", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8141982762697998, "res": {"Yes": 0.8141982762697998, "yes": 0.1824914420382856}, "ground_truth": 0}, {"key": "39926408", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9758260333255353, "res": {"Yes": 0.9758260333255353, "yes": 0.016829356614515817}, "ground_truth": 0}, {"key": "39926408", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7927432422819105, "res": {"Yes": 0.7927432422819105, "yes": 0.20330265477944853}, "ground_truth": 0}, {"key": "39926408", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6416866811578327, "res": {"Yes": 0.6416866811578327, "yes": 0.35150831930604237}, "ground_truth": 1}, {"key": "39926408", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.586609105533262, "res": {"Yes": 0.586609105533262, "yes": 0.4005571804522404}, "ground_truth": 0}, {"key": "39926408", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9671080929773532, "res": {"Yes": 0.9671080929773532, "yes": 0.023989683385200834}, "ground_truth": 0}, {"key": "40465336", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8873057933255845, "res": {"Yes": 0.8873057933255845, "yes": 0.10371979445949041}, "ground_truth": 0}, {"key": "40465336", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7866171551342184, "res": {"Yes": 0.7866171551342184, "yes": 0.194694717199466}, "ground_truth": 0}, {"key": "40465336", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8104563863905748, "res": {"Yes": 0.8104563863905748, "yes": 0.1801506200391815}, "ground_truth": 1}, {"key": "40465336", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8904569331242268, "res": {"Yes": 0.8904569331242268, "yes": 0.09724703357827168}, "ground_truth": 0}, {"key": "40465336", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9321297194709794, "res": {"Yes": 0.9321297194709794, "yes": 0.060180999464514134}, "ground_truth": 0}, {"key": "34173549", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8289566807808898, "res": {"Yes": 0.8289566807808898, "yes": 0.16289333156013966}, "ground_truth": 0}, {"key": "34173549", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8757290495997518, "res": {"Yes": 0.8757290495997518, "yes": 0.11775206606970622}, "ground_truth": 0}, {"key": "34173549", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8594583102409674, "res": {"Yes": 0.8594583102409674, "yes": 0.1251163468227369}, "ground_truth": 1}, {"key": "34173549", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8778217624486047, "res": {"Yes": 0.8778217624486047, "yes": 0.11018371168929846}, "ground_truth": 0}, {"key": "34173549", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.876338138212386, "res": {"Yes": 0.876338138212386, "yes": 0.11373870759149296}, "ground_truth": 0}, {"key": "33541535", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9495806109676165, "res": {"Yes": 0.9495806109676165, "yes": 0.03866573777085446}, "ground_truth": 0}, {"key": "33541535", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9327356197929823, "res": {"Yes": 0.9327356197929823, "yes": 0.049062474656257495}, "ground_truth": 0}, {"key": "33541535", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9503733895277616, "res": {"Yes": 0.9503733895277616, "yes": 0.043020897197588184}, "ground_truth": 1}, {"key": "33541535", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5388803919643342, "res": {"Yes": 0.5388803919643342, "yes": 0.4495606304328478}, "ground_truth": 0}, {"key": "33541535", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7823383260435235, "res": {"Yes": 0.7823383260435235, "yes": 0.21408468884055729}, "ground_truth": 0}, {"key": "35685195", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9769144106739613, "res": {"Yes": 0.9769144106739613, "yes": 0.01690382248642238}, "ground_truth": 0}, {"key": "35685195", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9650076164306977, "res": {"Yes": 0.9650076164306977, "yes": 0.03258152724962765}, "ground_truth": 0}, {"key": "35685195", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9743841125633484, "res": {"Yes": 0.9743841125633484, "yes": 0.01757318707910195}, "ground_truth": 1}, {"key": "35685195", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9771891214778569, "res": {"Yes": 0.9771891214778569, "yes": 0.019149216079303127}, "ground_truth": 0}, {"key": "35685195", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9737205326258792, "res": {"Yes": 0.9737205326258792, "yes": 0.019783860498819455}, "ground_truth": 0}, {"key": "28440730", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8454371568646548, "res": {"Yes": 0.8454371568646548, "yes": 0.142655563647167}, "ground_truth": 0}, {"key": "28440730", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7310312284258765, "res": {"Yes": 0.7310312284258765, "yes": 0.2578424344013944}, "ground_truth": 0}, {"key": "28440730", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5224050678373855, "res": {"Yes": 0.5224050678373855, "yes": 0.4686565882294237}, "ground_truth": 1}, {"key": "28440730", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7672409890717028, "res": {"Yes": 0.7672409890717028, "yes": 0.22643874774807182}, "ground_truth": 0}, {"key": "28440730", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8003052992913369, "res": {"Yes": 0.8003052992913369, "yes": 0.19093529079262822}, "ground_truth": 0}, {"key": "38338714", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9592069496870403, "res": {"Yes": 0.9592069496870403, "yes": 0.03618445399649794}, "ground_truth": 0}, {"key": "38338714", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8952181476643981, "res": {"Yes": 0.8952181476643981, "yes": 0.09562289501430746}, "ground_truth": 0}, {"key": "38338714", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9486881912092769, "res": {"Yes": 0.9486881912092769, "yes": 0.04599390919795325}, "ground_truth": 1}, {"key": "38338714", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9209191514701985, "res": {"Yes": 0.9209191514701985, "yes": 0.07258915125984416}, "ground_truth": 0}, {"key": "38338714", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9267575780205956, "res": {"Yes": 0.9267575780205956, "yes": 0.06463646847516107}, "ground_truth": 0}, {"key": "32191881", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9164730018678903, "res": {"Yes": 0.9164730018678903, "yes": 0.07569943365550072}, "ground_truth": 0}, {"key": "32191881", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8571751547461303, "res": {"Yes": 0.8571751547461303, "yes": 0.13618665104753563}, "ground_truth": 0}, {"key": "32191881", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8370705022174639, "res": {"Yes": 0.8370705022174639, "yes": 0.1491599886148439}, "ground_truth": 1}, {"key": "32191881", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8327354119369547, "res": {"Yes": 0.8327354119369547, "yes": 0.15958767839269933}, "ground_truth": 0}, {"key": "32191881", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8697776280041357, "res": {"Yes": 0.8697776280041357, "yes": 0.1238249629153883}, "ground_truth": 0}, {"key": "37707251", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9527851944996614, "res": {"Yes": 0.9527851944996614, "yes": 0.043617169948071896}, "ground_truth": 0}, {"key": "37707251", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9409726233175938, "res": {"Yes": 0.9409726233175938, "yes": 0.05564191471255671}, "ground_truth": 0}, {"key": "37707251", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9168137512061867, "res": {"Yes": 0.9168137512061867, "yes": 0.07914527430473317}, "ground_truth": 1}, {"key": "37707251", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8972830812791861, "res": {"Yes": 0.8972830812791861, "yes": 0.09259288279028569}, "ground_truth": 0}, {"key": "37707251", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9448675783854333, "res": {"Yes": 0.9448675783854333, "yes": 0.05107949675301219}, "ground_truth": 0}, {"key": "40172567", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7201084451859615, "res": {"Yes": 0.7201084451859615, "yes": 0.129284652122286}, "ground_truth": 0}, {"key": "40172567", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.5258014547256985, "res": {"Yes": 0.5258014547256985, "yes": 0.2387385640584799}, "ground_truth": 0}, {"key": "40172567", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7125069019064525, "res": {"Yes": 0.7125069019064525, "yes": 0.20361459748392663}, "ground_truth": 1}, {"key": "40172567", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6468997450775886, "res": {"Yes": 0.6468997450775886, "yes": 0.17424974373794547}, "ground_truth": 0}, {"key": "40172567", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.5484825740296878, "res": {"Yes": 0.5484825740296878, "yes": 0.3384069546144861}, "ground_truth": 0}, {"key": "33113255", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8238658711936488, "res": {"Yes": 0.8238658711936488, "yes": 0.17187247527126712}, "ground_truth": 0}, {"key": "33113255", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9585501917254313, "res": {"Yes": 0.9585501917254313, "yes": 0.03686996559057547}, "ground_truth": 0}, {"key": "33113255", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9752960910094983, "res": {"Yes": 0.9752960910094983, "yes": 0.0203698685883274}, "ground_truth": 1}, {"key": "33113255", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9732572346725618, "res": {"Yes": 0.9732572346725618, "yes": 0.022360729634249708}, "ground_truth": 0}, {"key": "33113255", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6170560601289894, "res": {"Yes": 0.6170560601289894, "yes": 0.37227352970497557}, "ground_truth": 0}, {"key": "33022143", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7427959736661763, "res": {"Yes": 0.7427959736661763, "yes": 0.22608998392362517}, "ground_truth": 0}, {"key": "33022143", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9108978386476217, "res": {"Yes": 0.9108978386476217, "yes": 0.08583276233297009}, "ground_truth": 0}, {"key": "33022143", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8966398164340943, "res": {"Yes": 0.8966398164340943, "yes": 0.09938863578244356}, "ground_truth": 1}, {"key": "33022143", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7943490458401457, "res": {"Yes": 0.7943490458401457, "yes": 0.18783690861903593}, "ground_truth": 0}, {"key": "33022143", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7216770278635676, "res": {"Yes": 0.7216770278635676, "yes": 0.25900342329881654}, "ground_truth": 0}, {"key": "32084473", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7823769353925917, "res": {"Yes": 0.7823769353925917, "yes": 0.06275948857820585}, "ground_truth": 0}, {"key": "32084473", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7472672301532117, "res": {"Yes": 0.7472672301532117, "yes": 0.19630637547598517}, "ground_truth": 0}, {"key": "32084473", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.592814917999859, "res": {"Yes": 0.592814917999859, "yes": 0.21143283414202052}, "ground_truth": 1}, {"key": "32084473", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7314768180511494, "res": {"Yes": 0.7314768180511494, "yes": 0.2100566337708362}, "ground_truth": 0}, {"key": "32084473", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.857591500081741, "res": {"Yes": 0.857591500081741, "yes": 0.10566410890630042}, "ground_truth": 0}, {"key": "40564245", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7016488596688165, "res": {"Yes": 0.7016488596688165, "yes": 0.28529972062373016}, "ground_truth": 0}, {"key": "40564245", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8175911614097079, "res": {"Yes": 0.8175911614097079, "yes": 0.17080387679880252}, "ground_truth": 0}, {"key": "40564245", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5822145247150097, "res": {"Yes": 0.5822145247150097, "yes": 0.40104643916071714}, "ground_truth": 1}, {"key": "40564245", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7166363856518626, "res": {"Yes": 0.7166363856518626, "yes": 0.2731404628994521}, "ground_truth": 0}, {"key": "40564245", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7117556390923138, "res": {"Yes": 0.7117556390923138, "yes": 0.2709719686955908}, "ground_truth": 0}, {"key": "31717213", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9809945647029127, "res": {"Yes": 0.9809945647029127, "yes": 0.015119750646505094}, "ground_truth": 0}, {"key": "31717213", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.562513581672217, "res": {"Yes": 0.562513581672217, "yes": 0.4148652750050441}, "ground_truth": 0}, {"key": "31717213", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5088570584824497, "res": {"Yes": 0.5088570584824497, "yes": 0.4779403715420526}, "ground_truth": 1}, {"key": "31717213", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7213689375546319, "res": {"Yes": 0.7213689375546319, "yes": 0.2656796551246413}, "ground_truth": 0}, {"key": "31717213", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6562348178819435, "res": {"Yes": 0.6562348178819435, "yes": 0.3379744087915526}, "ground_truth": 0}, {"key": "34861894", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7999133177478401, "res": {"Yes": 0.7999133177478401, "yes": 0.1844480549738372}, "ground_truth": 0}, {"key": "34861894", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9496903783846884, "res": {"Yes": 0.9496903783846884, "yes": 0.04341525074273367}, "ground_truth": 0}, {"key": "34861894", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9493859896793309, "res": {"Yes": 0.9493859896793309, "yes": 0.04424146533128592}, "ground_truth": 1}, {"key": "34861894", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9645985958783944, "res": {"Yes": 0.9645985958783944, "yes": 0.03044019012848833}, "ground_truth": 0}, {"key": "34861894", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8050105548778721, "res": {"Yes": 0.8050105548778721, "yes": 0.1796875086232873}, "ground_truth": 0}, {"key": "40838760", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9846216010396511, "res": {"Yes": 0.9846216010396511, "yes": 0.014324331272772026}, "ground_truth": 0}, {"key": "40838760", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8579252452827574, "res": {"Yes": 0.8579252452827574, "yes": 0.13772842052870213}, "ground_truth": 0}, {"key": "40838760", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9768602598552069, "res": {"Yes": 0.9768602598552069, "yes": 0.021340453119346573}, "ground_truth": 1}, {"key": "40838760", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9785151591822908, "res": {"Yes": 0.9785151591822908, "yes": 0.019929943095096898}, "ground_truth": 0}, {"key": "40838760", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9888516258824525, "res": {"Yes": 0.9888516258824525, "yes": 0.01005203678335885}, "ground_truth": 0}, {"key": "40044849", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.5877908744742618, "res": {"Yes": 0.5877908744742618, "yes": 0.4066595622307218}, "ground_truth": 0}, {"key": "40044849", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7753186522035398, "res": {"Yes": 0.7753186522035398, "yes": 0.21535447232369173}, "ground_truth": 0}, {"key": "40044849", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7856274744379561, "res": {"Yes": 0.7856274744379561, "yes": 0.20960845261221953}, "ground_truth": 1}, {"key": "40044849", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8229686429024534, "res": {"Yes": 0.8229686429024534, "yes": 0.17393201056625232}, "ground_truth": 0}, {"key": "40044849", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7174330398171493, "res": {"Yes": 0.7174330398171493, "yes": 0.27718633761062395}, "ground_truth": 0}, {"key": "30296116", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7805748996138901, "res": {"Yes": 0.7805748996138901, "yes": 0.21351640731833615}, "ground_truth": 0}, {"key": "30296116", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7593287421468886, "res": {"Yes": 0.7593287421468886, "yes": 0.23583959199271093}, "ground_truth": 0}, {"key": "30296116", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.498021913456941, "res": {"Yes": 0.498021913456941, "yes": 0.4964698263761185}, "ground_truth": 1}, {"key": "30296116", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6967332312548311, "res": {"Yes": 0.6967332312548311, "yes": 0.2973899316803716}, "ground_truth": 0}, {"key": "30296116", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.797672521355587, "res": {"Yes": 0.797672521355587, "yes": 0.19533791943747464}, "ground_truth": 0}, {"key": "34931360", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9882304889804849, "res": {"Yes": 0.9882304889804849, "yes": 0.010313429479886435}, "ground_truth": 0}, {"key": "34931360", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.4224871824675528, "res": {"yes": 0.4647899945646653, "Yes": 0.4224871824675528}, "ground_truth": 0}, {"key": "34931360", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5000304562075155, "res": {"Yes": 0.5000304562075155, "yes": 0.3215168905440559}, "ground_truth": 1}, {"key": "34931360", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6937701980770568, "res": {"Yes": 0.6937701980770568, "yes": 0.28261475457273666}, "ground_truth": 0}, {"key": "34931360", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.961734306627895, "res": {"Yes": 0.961734306627895, "yes": 0.03307933208272851}, "ground_truth": 0}, {"key": "18862422", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.3453784177576145, "res": {"yes": 0.5832995305734995, "Yes": 0.3453784177576145}, "ground_truth": 0}, {"key": "18862422", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6995179947826383, "res": {"Yes": 0.6995179947826383, "yes": 0.2557511954167589}, "ground_truth": 0}, {"key": "18862422", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6944209265009512, "res": {"Yes": 0.6944209265009512, "yes": 0.2539377806605626}, "ground_truth": 1}, {"key": "18862422", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9012777718905438, "res": {"Yes": 0.9012777718905438, "yes": 0.09430085712891435}, "ground_truth": 0}, {"key": "18862422", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7551487343468448, "res": {"Yes": 0.7551487343468448, "yes": 0.1572806484012349}, "ground_truth": 0}, {"key": "36361140", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.903265679611641, "res": {"Yes": 0.903265679611641, "yes": 0.0901072185936955}, "ground_truth": 0}, {"key": "36361140", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7830958943938519, "res": {"Yes": 0.7830958943938519, "yes": 0.2078740939723523}, "ground_truth": 0}, {"key": "36361140", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6975778071768461, "res": {"Yes": 0.6975778071768461, "yes": 0.29751027999155105}, "ground_truth": 1}, {"key": "36361140", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8710211499916938, "res": {"Yes": 0.8710211499916938, "yes": 0.11931073150311866}, "ground_truth": 0}, {"key": "36361140", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8141535617283396, "res": {"Yes": 0.8141535617283396, "yes": 0.1785292010188497}, "ground_truth": 0}, {"key": "39703329", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9172826232613344, "res": {"Yes": 0.9172826232613344, "yes": 0.07095807991797638}, "ground_truth": 0}, {"key": "39703329", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9325844393331613, "res": {"Yes": 0.9325844393331613, "yes": 0.05283419845100499}, "ground_truth": 0}, {"key": "39703329", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8472179902245612, "res": {"Yes": 0.8472179902245612, "yes": 0.13578064497128245}, "ground_truth": 1}, {"key": "39703329", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8670450870810866, "res": {"Yes": 0.8670450870810866, "yes": 0.1113478381917755}, "ground_truth": 0}, {"key": "39703329", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9118696014470568, "res": {"Yes": 0.9118696014470568, "yes": 0.07647181786078842}, "ground_truth": 0}, {"key": "34033324", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9655476651246849, "res": {"Yes": 0.9655476651246849, "yes": 0.024609769025877335}, "ground_truth": 0}, {"key": "34033324", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9678887432211037, "res": {"Yes": 0.9678887432211037, "yes": 0.022486300991843122}, "ground_truth": 0}, {"key": "34033324", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9575550462726571, "res": {"Yes": 0.9575550462726571, "yes": 0.032889296779230616}, "ground_truth": 1}, {"key": "34033324", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.967951728694596, "res": {"Yes": 0.967951728694596, "yes": 0.023914483485817706}, "ground_truth": 0}, {"key": "34033324", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.770131484208253, "res": {"Yes": 0.770131484208253, "yes": 0.22031183597373888}, "ground_truth": 0}, {"key": "35658862", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.5335238897260814, "res": {"Yes": 0.5335238897260814, "yes": 0.40220909048377956}, "ground_truth": 0}, {"key": "35658862", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6449170210482649, "res": {"Yes": 0.6449170210482649, "yes": 0.30846607785906777}, "ground_truth": 0}, {"key": "35658862", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9762256700329996, "res": {"Yes": 0.9762256700329996, "yes": 0.0197317473454068}, "ground_truth": 1}, {"key": "35658862", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7901508755870534, "res": {"Yes": 0.7901508755870534, "yes": 0.20458310594757825}, "ground_truth": 0}, {"key": "35658862", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9451148731378917, "res": {"Yes": 0.9451148731378917, "yes": 0.053363590041480585}, "ground_truth": 0}, {"key": "36092657", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9569358262381812, "res": {"Yes": 0.9569358262381812, "yes": 0.039455556201415956}, "ground_truth": 0}, {"key": "36092657", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9962039353394238, "res": {"Yes": 0.9962039353394238, "yes": 0.0028630702957187903}, "ground_truth": 0}, {"key": "36092657", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9685718906798819, "res": {"Yes": 0.9685718906798819, "yes": 0.030417490214773035}, "ground_truth": 1}, {"key": "36092657", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.732922142746176, "res": {"Yes": 0.732922142746176, "\u064a": 0.1499359764001758}, "ground_truth": 0}, {"key": "36092657", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8788608049323899, "res": {"Yes": 0.8788608049323899, "yes": 0.07157756443489287}, "ground_truth": 0}, {"key": "26333438", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7790040376542356, "res": {"Yes": 0.7790040376542356, "yes": 0.2175345261786851}, "ground_truth": 0}, {"key": "26333438", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.741387669334586, "res": {"Yes": 0.741387669334586, "yes": 0.25326429278316837}, "ground_truth": 0}, {"key": "26333438", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6238552242198219, "res": {"Yes": 0.6238552242198219, "yes": 0.3731816427468398}, "ground_truth": 1}, {"key": "26333438", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5929749640592687, "res": {"Yes": 0.5929749640592687, "yes": 0.4036294196790775}, "ground_truth": 0}, {"key": "26333438", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7111205810199214, "res": {"Yes": 0.7111205810199214, "yes": 0.2849000961475766}, "ground_truth": 0}, {"key": "34184963", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.5468110313268383, "res": {"Yes": 0.5468110313268383, "yes": 0.43310263358583706}, "ground_truth": 0}, {"key": "34184963", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.5671359348414129, "res": {"Yes": 0.5671359348414129, "yes": 0.4258940383160854}, "ground_truth": 0}, {"key": "34184963", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5954814446203214, "res": {"Yes": 0.5954814446203214, "yes": 0.273260671147003}, "ground_truth": 1}, {"key": "34184963", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4164980080415507, "res": {"yes": 0.516013134173451, "Yes": 0.4164980080415507}, "ground_truth": 0}, {"key": "34184963", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.4417313001386509, "res": {"yes": 0.5126983440803597, "Yes": 0.4417313001386509}, "ground_truth": 0}, {"key": "35069975", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9270194481228977, "res": {"Yes": 0.9270194481228977, "yes": 0.06813522128787369}, "ground_truth": 0}, {"key": "35069975", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7907267576658216, "res": {"Yes": 0.7907267576658216, "yes": 0.20152371746118966}, "ground_truth": 0}, {"key": "35069975", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9776393184596153, "res": {"Yes": 0.9776393184596153, "yes": 0.01516839336396556}, "ground_truth": 1}, {"key": "35069975", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.976091288152346, "res": {"Yes": 0.976091288152346, "yes": 0.02001947758681934}, "ground_truth": 0}, {"key": "35069975", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9731978424888108, "res": {"Yes": 0.9731978424888108, "yes": 0.018920022994594988}, "ground_truth": 0}, {"key": "36443950", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.5088925423222673, "res": {"Yes": 0.5088925423222673, "yes": 0.46842452009769076}, "ground_truth": 0}, {"key": "36443950", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7964100082561109, "res": {"Yes": 0.7964100082561109, "yes": 0.16213878710371304}, "ground_truth": 0}, {"key": "36443950", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5590481911344626, "res": {"Yes": 0.5590481911344626, "yes": 0.39303767758135183}, "ground_truth": 1}, {"key": "36443950", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7242272750809708, "res": {"Yes": 0.7242272750809708, "yes": 0.24078289551134224}, "ground_truth": 0}, {"key": "36443950", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7302620767657186, "res": {"Yes": 0.7302620767657186, "yes": 0.2402524099914868}, "ground_truth": 0}, {"key": "29460858", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8531204021880329, "res": {"Yes": 0.8531204021880329, "yes": 0.13760303459270773}, "ground_truth": 0}, {"key": "29460858", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8985021049709989, "res": {"Yes": 0.8985021049709989, "yes": 0.09242017315273057}, "ground_truth": 0}, {"key": "29460858", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.920735593742351, "res": {"Yes": 0.920735593742351, "yes": 0.07309617786367652}, "ground_truth": 1}, {"key": "29460858", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9092387391130814, "res": {"Yes": 0.9092387391130814, "yes": 0.0805539606892513}, "ground_truth": 0}, {"key": "29460858", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9413894055582255, "res": {"Yes": 0.9413894055582255, "yes": 0.05443106719421799}, "ground_truth": 0}, {"key": "36155704", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8645530247470151, "res": {"Yes": 0.8645530247470151, "yes": 0.12925875899974865}, "ground_truth": 0}, {"key": "36155704", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6602984494732185, "res": {"Yes": 0.6602984494732185, "yes": 0.32933114982842193}, "ground_truth": 0}, {"key": "36155704", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9701932883455395, "res": {"Yes": 0.9701932883455395, "yes": 0.02263714812028721}, "ground_truth": 1}, {"key": "36155704", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.941418139084162, "res": {"Yes": 0.941418139084162, "yes": 0.047811303143572015}, "ground_truth": 0}, {"key": "36155704", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7575408618630165, "res": {"Yes": 0.7575408618630165, "yes": 0.23470499355969648}, "ground_truth": 0}, {"key": "37185211", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8940832744430981, "res": {"Yes": 0.8940832744430981, "yes": 0.08240833452777557}, "ground_truth": 0}, {"key": "37185211", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8489867494809034, "res": {"Yes": 0.8489867494809034, "yes": 0.14003092330687558}, "ground_truth": 0}, {"key": "37185211", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.895502845439088, "res": {"Yes": 0.895502845439088, "yes": 0.10339608296246261}, "ground_truth": 1}, {"key": "37185211", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8523678129247579, "res": {"Yes": 0.8523678129247579, "yes": 0.14031882838316292}, "ground_truth": 0}, {"key": "37185211", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9938246890951425, "res": {"Yes": 0.9938246890951425, "yes": 0.00553783693174523}, "ground_truth": 0}, {"key": "36454885", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.510042052898196, "res": {"Yes": 0.510042052898196, "yes": 0.41044438105506914}, "ground_truth": 0}, {"key": "36454885", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7840637563995322, "res": {"Yes": 0.7840637563995322, "yes": 0.14973940436235766}, "ground_truth": 0}, {"key": "36454885", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6085318050296092, "res": {"Yes": 0.6085318050296092, "yes": 0.3413651682644766}, "ground_truth": 1}, {"key": "36454885", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7197623146606693, "res": {"Yes": 0.7197623146606693, "yes": 0.27299587330781944}, "ground_truth": 0}, {"key": "36454885", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7497008640095965, "res": {"Yes": 0.7497008640095965, "yes": 0.21570330575445282}, "ground_truth": 0}, {"key": "33148906", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9383755659237835, "res": {"Yes": 0.9383755659237835, "yes": 0.05600792950318031}, "ground_truth": 0}, {"key": "33148906", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9286575967119721, "res": {"Yes": 0.9286575967119721, "yes": 0.0661734138617635}, "ground_truth": 0}, {"key": "33148906", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.909287434697689, "res": {"Yes": 0.909287434697689, "yes": 0.08625424559433485}, "ground_truth": 1}, {"key": "33148906", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9206070588186978, "res": {"Yes": 0.9206070588186978, "yes": 0.07438836751291411}, "ground_truth": 0}, {"key": "33148906", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9081101201828294, "res": {"Yes": 0.9081101201828294, "yes": 0.08510910988114666}, "ground_truth": 0}, {"key": "18086604", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8059398153863866, "res": {"Yes": 0.8059398153863866, "yes": 0.15171304564252108}, "ground_truth": 0}, {"key": "18086604", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8276315761168156, "res": {"Yes": 0.8276315761168156, "yes": 0.1428343373998353}, "ground_truth": 0}, {"key": "18086604", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7835416497622615, "res": {"Yes": 0.7835416497622615, "yes": 0.14341044718764134}, "ground_truth": 1}, {"key": "18086604", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8962169657912766, "res": {"Yes": 0.8962169657912766, "yes": 0.06812323054414342}, "ground_truth": 0}, {"key": "18086604", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8226412899508785, "res": {"Yes": 0.8226412899508785, "yes": 0.14456038058334755}, "ground_truth": 0}, {"key": "33693397", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7143295632181501, "res": {"Yes": 0.7143295632181501, "yes": 0.20873180097703234}, "ground_truth": 0}, {"key": "33693397", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8135156781858738, "res": {"Yes": 0.8135156781858738, "yes": 0.18319163285023296}, "ground_truth": 0}, {"key": "33693397", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.47566255225028004, "res": {"Yes": 0.47566255225028004, "yes": 0.44540557829529226}, "ground_truth": 1}, {"key": "33693397", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6692969003955765, "res": {"Yes": 0.6692969003955765, "yes": 0.2769090129108446}, "ground_truth": 0}, {"key": "33693397", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.4556662478756085, "res": {"yes": 0.49182326229672174, "Yes": 0.4556662478756085}, "ground_truth": 0}, {"key": "39501530", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.4136129937739865, "res": {"yes": 0.43271924768186876, "Yes": 0.4136129937739865}, "ground_truth": 0}, {"key": "39501530", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7646744968635487, "res": {"Yes": 0.7646744968635487, "yes": 0.21958910137900392}, "ground_truth": 0}, {"key": "39501530", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4111620088658194, "res": {"yes": 0.4407609120931861, "Yes": 0.4111620088658194}, "ground_truth": 1}, {"key": "39501530", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.559644118471421, "res": {"Yes": 0.559644118471421, "yes": 0.3393766250311455}, "ground_truth": 0}, {"key": "39501530", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7402475808672759, "res": {"Yes": 0.7402475808672759, "yes": 0.15138186911570595}, "ground_truth": 0}, {"key": "30948874", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8755702943146608, "res": {"Yes": 0.8755702943146608, "yes": 0.12136612203119254}, "ground_truth": 0}, {"key": "30948874", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.880203278403555, "res": {"Yes": 0.880203278403555, "yes": 0.11421597561434044}, "ground_truth": 0}, {"key": "30948874", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9151689371351289, "res": {"Yes": 0.9151689371351289, "yes": 0.08089251344926827}, "ground_truth": 1}, {"key": "30948874", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8675411353625274, "res": {"Yes": 0.8675411353625274, "yes": 0.12907718760957296}, "ground_truth": 0}, {"key": "30948874", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9175767970953825, "res": {"Yes": 0.9175767970953825, "yes": 0.0798368181982003}, "ground_truth": 0}, {"key": "39410675", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9520130369588798, "res": {"Yes": 0.9520130369588798, "yes": 0.031187737196510086}, "ground_truth": 0}, {"key": "39410675", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.523312698344626, "res": {"Yes": 0.523312698344626, "yes": 0.40098454241983655}, "ground_truth": 0}, {"key": "39410675", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.40225762798317943, "res": {"yes": 0.5574183966965528, "Yes": 0.40225762798317943}, "ground_truth": 1}, {"key": "39410675", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3377579041658895, "res": {"yes": 0.5350820603588031, "Yes": 0.3377579041658895}, "ground_truth": 0}, {"key": "39410675", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.5679838464579423, "res": {"Yes": 0.5679838464579423, "yes": 0.38967551460788125}, "ground_truth": 0}, {"key": "32903337", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7886267557108232, "res": {"Yes": 0.7886267557108232, "yes": 0.20509772625427333}, "ground_truth": 0}, {"key": "32903337", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7866873087802178, "res": {"Yes": 0.7866873087802178, "yes": 0.20680672862619992}, "ground_truth": 0}, {"key": "32903337", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6469831034799917, "res": {"Yes": 0.6469831034799917, "yes": 0.3487231283992859}, "ground_truth": 1}, {"key": "32903337", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9775243714536721, "res": {"Yes": 0.9775243714536721, "yes": 0.01516353115650657}, "ground_truth": 0}, {"key": "32903337", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9728609378770315, "res": {"Yes": 0.9728609378770315, "yes": 0.022428814167341448}, "ground_truth": 0}, {"key": "27685132", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.87881208223997, "res": {"Yes": 0.87881208223997, "yes": 0.11890886733207198}, "ground_truth": 0}, {"key": "27685132", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6884593940275102, "res": {"Yes": 0.6884593940275102, "yes": 0.3028128260980641}, "ground_truth": 0}, {"key": "27685132", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8984881603265404, "res": {"Yes": 0.8984881603265404, "yes": 0.09531634233214087}, "ground_truth": 1}, {"key": "27685132", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8076762350160459, "res": {"Yes": 0.8076762350160459, "yes": 0.18401428288698135}, "ground_truth": 0}, {"key": "27685132", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9071951156561954, "res": {"Yes": 0.9071951156561954, "yes": 0.0899061437515821}, "ground_truth": 0}, {"key": "22791471", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9853316913576509, "res": {"Yes": 0.9853316913576509, "yes": 0.012399439611665295}, "ground_truth": 0}, {"key": "22791471", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9813597460806309, "res": {"Yes": 0.9813597460806309, "yes": 0.01387768672856917}, "ground_truth": 0}, {"key": "22791471", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9627865845087619, "res": {"Yes": 0.9627865845087619, "yes": 0.03458481728549249}, "ground_truth": 1}, {"key": "22791471", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8680595939969803, "res": {"Yes": 0.8680595939969803, "yes": 0.13042113035726607}, "ground_truth": 0}, {"key": "22791471", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.97593865502734, "res": {"Yes": 0.97593865502734, "yes": 0.02195947653124328}, "ground_truth": 0}, {"key": "32292348", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.30780946732790615, "res": {"yes": 0.5343835710649248, "Yes": 0.30780946732790615}, "ground_truth": 0}, {"key": "32292348", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6981799021213478, "res": {"Yes": 0.6981799021213478, "yes": 0.28597937103842475}, "ground_truth": 0}, {"key": "32292348", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.37920011702161976, "res": {"yes": 0.504097740575203, "Yes": 0.37920011702161976}, "ground_truth": 1}, {"key": "32292348", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.285685735282387, "res": {"yes": 0.5828199348174116, "Yes": 0.285685735282387}, "ground_truth": 0}, {"key": "32292348", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.43420366967328317, "res": {"Yes": 0.43420366967328317, "yes": 0.4227521311741292}, "ground_truth": 0}, {"key": "20482930", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.5615291284577534, "res": {"Yes": 0.5615291284577534, "yes": 0.4172914209779929}, "ground_truth": 0}, {"key": "20482930", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6896535285760853, "res": {"Yes": 0.6896535285760853, "yes": 0.29599065384634543}, "ground_truth": 0}, {"key": "20482930", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9706006639013243, "res": {"Yes": 0.9706006639013243, "yes": 0.02274058648716556}, "ground_truth": 1}, {"key": "20482930", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6052740550280006, "res": {"Yes": 0.6052740550280006, "yes": 0.37625677742183355}, "ground_truth": 0}, {"key": "20482930", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6146924730871042, "res": {"Yes": 0.6146924730871042, "yes": 0.3622133413512919}, "ground_truth": 0}, {"key": "11635754", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.903955672172172, "res": {"Yes": 0.903955672172172, "yes": 0.09231916692860588}, "ground_truth": 0}, {"key": "11635754", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9830225294729497, "res": {"Yes": 0.9830225294729497, "yes": 0.01292825568221385}, "ground_truth": 0}, {"key": "11635754", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9770586959558388, "res": {"Yes": 0.9770586959558388, "yes": 0.018119655331293633}, "ground_truth": 1}, {"key": "11635754", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9723932705681455, "res": {"Yes": 0.9723932705681455, "yes": 0.020454110791402014}, "ground_truth": 0}, {"key": "11635754", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6423744434913424, "res": {"Yes": 0.6423744434913424, "yes": 0.354511391335047}, "ground_truth": 0}, {"key": "40029096", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9874488895144943, "res": {"Yes": 0.9874488895144943, "yes": 0.008995912243414108}, "ground_truth": 0}, {"key": "40029096", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9837106018479014, "res": {"Yes": 0.9837106018479014, "yes": 0.013199023888038005}, "ground_truth": 0}, {"key": "40029096", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5959316498219557, "res": {"Yes": 0.5959316498219557, "yes": 0.4003670610639814}, "ground_truth": 1}, {"key": "40029096", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7192909815514152, "res": {"Yes": 0.7192909815514152, "yes": 0.2746795866699944}, "ground_truth": 0}, {"key": "40029096", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.978765769431021, "res": {"Yes": 0.978765769431021, "yes": 0.019497369888929847}, "ground_truth": 0}, {"key": "40414719", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.5688631974710487, "res": {"Yes": 0.5688631974710487, "yes": 0.3628315507287087}, "ground_truth": 0}, {"key": "40414719", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7109805538321308, "res": {"Yes": 0.7109805538321308, "yes": 0.28310749853058437}, "ground_truth": 0}, {"key": "40414719", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.545276191113218, "res": {"Yes": 0.545276191113218, "yes": 0.4496004857492964}, "ground_truth": 1}, {"key": "40414719", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9741128905505032, "res": {"Yes": 0.9741128905505032, "yes": 0.021618211381580744}, "ground_truth": 0}, {"key": "40414719", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.2901306326538609, "res": {"yes": 0.6884575214205052, "Yes": 0.2901306326538609}, "ground_truth": 0}, {"key": "39537616", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8535684079904146, "res": {"Yes": 0.8535684079904146, "yes": 0.1374210649238169}, "ground_truth": 0}, {"key": "39537616", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7118775234452266, "res": {"Yes": 0.7118775234452266, "yes": 0.27531491253316204}, "ground_truth": 0}, {"key": "39537616", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8022167867290052, "res": {"Yes": 0.8022167867290052, "yes": 0.1747540713498977}, "ground_truth": 1}, {"key": "39537616", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7669859238497795, "res": {"Yes": 0.7669859238497795, "yes": 0.21107870617742622}, "ground_truth": 0}, {"key": "39537616", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8515378831234867, "res": {"Yes": 0.8515378831234867, "yes": 0.12264302242873727}, "ground_truth": 0}, {"key": "33245830", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9524066928901612, "res": {"Yes": 0.9524066928901612, "yes": 0.04319051361894545}, "ground_truth": 0}, {"key": "33245830", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8129489853907219, "res": {"Yes": 0.8129489853907219, "yes": 0.1512500275382694}, "ground_truth": 0}, {"key": "33245830", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9196817156936393, "res": {"Yes": 0.9196817156936393, "yes": 0.07274306528623466}, "ground_truth": 1}, {"key": "33245830", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8743006756551847, "res": {"Yes": 0.8743006756551847, "yes": 0.11332222154930173}, "ground_truth": 0}, {"key": "33245830", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8944761583445822, "res": {"Yes": 0.8944761583445822, "yes": 0.07789088047745277}, "ground_truth": 0}, {"key": "39243601", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9079910892775295, "res": {"Yes": 0.9079910892775295, "yes": 0.08469677107090176}, "ground_truth": 0}, {"key": "39243601", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9246962534917235, "res": {"Yes": 0.9246962534917235, "yes": 0.06629061137751388}, "ground_truth": 0}, {"key": "39243601", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9165604100660721, "res": {"Yes": 0.9165604100660721, "yes": 0.0718098446650782}, "ground_truth": 1}, {"key": "39243601", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9057635946912179, "res": {"Yes": 0.9057635946912179, "yes": 0.09189053934712083}, "ground_truth": 0}, {"key": "39243601", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9215273743678745, "res": {"Yes": 0.9215273743678745, "yes": 0.06950201736486995}, "ground_truth": 0}, {"key": "35815905", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7085267391892817, "res": {"Yes": 0.7085267391892817, "yes": 0.28571699101148407}, "ground_truth": 0}, {"key": "35815905", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.7326146438221307, "res": {"Yes": 0.7326146438221307, "yes": 0.25863191647924466}, "ground_truth": 0}, {"key": "35815905", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6931115263891791, "res": {"Yes": 0.6931115263891791, "yes": 0.29762972458886655}, "ground_truth": 1}, {"key": "35815905", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6821369055560362, "res": {"Yes": 0.6821369055560362, "yes": 0.3053446623188758}, "ground_truth": 0}, {"key": "35815905", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8036279465127668, "res": {"Yes": 0.8036279465127668, "yes": 0.19147035241943638}, "ground_truth": 0}, {"key": "35260212", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7629422031824785, "res": {"Yes": 0.7629422031824785, "yes": 0.17649645284609505}, "ground_truth": 0}, {"key": "35260212", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9831793389871315, "res": {"Yes": 0.9831793389871315, "yes": 0.013066018848549306}, "ground_truth": 0}, {"key": "35260212", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7263118579393398, "res": {"Yes": 0.7263118579393398, "yes": 0.1907232031893535}, "ground_truth": 1}, {"key": "35260212", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9902855025645859, "res": {"Yes": 0.9902855025645859, "yes": 0.00785198667177464}, "ground_truth": 0}, {"key": "35260212", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8239520603104776, "res": {"Yes": 0.8239520603104776, "yes": 0.15105830362035302}, "ground_truth": 0}, {"key": "39193924", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8651994245584689, "res": {"Yes": 0.8651994245584689, "yes": 0.13093339764386608}, "ground_truth": 0}, {"key": "39193924", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9850135367609139, "res": {"Yes": 0.9850135367609139, "yes": 0.011748605808825753}, "ground_truth": 0}, {"key": "39193924", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7968706017684292, "res": {"Yes": 0.7968706017684292, "yes": 0.19843833143405593}, "ground_truth": 1}, {"key": "39193924", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8681241019021718, "res": {"Yes": 0.8681241019021718, "yes": 0.12755127989341658}, "ground_truth": 0}, {"key": "39193924", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.976116846461243, "res": {"Yes": 0.976116846461243, "yes": 0.016860865385454456}, "ground_truth": 0}, {"key": "40658569", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8254401910776586, "res": {"Yes": 0.8254401910776586, "yes": 0.16778024857699014}, "ground_truth": 0}, {"key": "40658569", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8709895063681246, "res": {"Yes": 0.8709895063681246, "yes": 0.1247468743468838}, "ground_truth": 0}, {"key": "40658569", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8686051064114075, "res": {"Yes": 0.8686051064114075, "yes": 0.12497234576945662}, "ground_truth": 1}, {"key": "40658569", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8784462913422085, "res": {"Yes": 0.8784462913422085, "yes": 0.11518378452033134}, "ground_truth": 0}, {"key": "40658569", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.6383752196101851, "res": {"Yes": 0.6383752196101851, "yes": 0.3547729753976383}, "ground_truth": 0}, {"key": "33497596", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9123378983045496, "res": {"Yes": 0.9123378983045496, "yes": 0.08443303871577233}, "ground_truth": 0}, {"key": "33497596", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9552950340828156, "res": {"Yes": 0.9552950340828156, "yes": 0.042081187116801415}, "ground_truth": 0}, {"key": "33497596", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.895091205779905, "res": {"Yes": 0.895091205779905, "yes": 0.09989746190567256}, "ground_truth": 1}, {"key": "33497596", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9208480914855824, "res": {"Yes": 0.9208480914855824, "yes": 0.07405685277583689}, "ground_truth": 0}, {"key": "33497596", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9137422906489807, "res": {"Yes": 0.9137422906489807, "yes": 0.08106792463314784}, "ground_truth": 0}, {"key": "40339241", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.5414712160010501, "res": {"Yes": 0.5414712160010501, "yes": 0.4313783205494374}, "ground_truth": 0}, {"key": "40339241", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6222775026480255, "res": {"Yes": 0.6222775026480255, "yes": 0.35054454174383165}, "ground_truth": 0}, {"key": "40339241", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4588717590530982, "res": {"yes": 0.5344542586546903, "Yes": 0.4588717590530982}, "ground_truth": 1}, {"key": "40339241", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6933280397222608, "res": {"Yes": 0.6933280397222608, "yes": 0.2783604840699884}, "ground_truth": 0}, {"key": "40339241", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.44231914939842976, "res": {"yes": 0.5505328233405813, "Yes": 0.44231914939842976}, "ground_truth": 0}, {"key": "31792608", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8450426665002353, "res": {"Yes": 0.8450426665002353, "yes": 0.14983839984415437}, "ground_truth": 0}, {"key": "31792608", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9857068194024895, "res": {"Yes": 0.9857068194024895, "yes": 0.010822873151602323}, "ground_truth": 0}, {"key": "31792608", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7262860452746025, "res": {"Yes": 0.7262860452746025, "yes": 0.26532144183896883}, "ground_truth": 1}, {"key": "31792608", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8181788035654955, "res": {"Yes": 0.8181788035654955, "yes": 0.17632216046529708}, "ground_truth": 0}, {"key": "31792608", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9747244629756869, "res": {"Yes": 0.9747244629756869, "yes": 0.017310846649239015}, "ground_truth": 0}, {"key": "33132662", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.763449789557034, "res": {"Yes": 0.763449789557034, "yes": 0.22747004945616123}, "ground_truth": 0}, {"key": "33132662", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6051542891958722, "res": {"Yes": 0.6051542891958722, "yes": 0.38187864656327647}, "ground_truth": 0}, {"key": "33132662", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6986420196849887, "res": {"Yes": 0.6986420196849887, "yes": 0.29596895852656174}, "ground_truth": 1}, {"key": "33132662", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7164780480043044, "res": {"Yes": 0.7164780480043044, "yes": 0.2739926219901101}, "ground_truth": 0}, {"key": "33132662", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.7123874672607086, "res": {"Yes": 0.7123874672607086, "yes": 0.2755205669498716}, "ground_truth": 0}, {"key": "37577457", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.6936699832729322, "res": {"Yes": 0.6936699832729322, "yes": 0.2625172197642712}, "ground_truth": 0}, {"key": "37577457", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.45772181968440934, "res": {"yes": 0.502959866669009, "Yes": 0.45772181968440934}, "ground_truth": 0}, {"key": "37577457", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5766956622869696, "res": {"Yes": 0.5766956622869696, "yes": 0.35980786440210805}, "ground_truth": 1}, {"key": "37577457", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5297115319231752, "res": {"Yes": 0.5297115319231752, "yes": 0.44996910685847485}, "ground_truth": 0}, {"key": "37577457", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.49761223491885925, "res": {"Yes": 0.49761223491885925, "yes": 0.4500965786603342}, "ground_truth": 0}, {"key": "38701278", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9263260263593583, "res": {"Yes": 0.9263260263593583, "yes": 0.06512903650151067}, "ground_truth": 0}, {"key": "38701278", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9172575450972345, "res": {"Yes": 0.9172575450972345, "yes": 0.07326162797480947}, "ground_truth": 0}, {"key": "38701278", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8747952937457073, "res": {"Yes": 0.8747952937457073, "yes": 0.11463244495274766}, "ground_truth": 1}, {"key": "38701278", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9359196314748255, "res": {"Yes": 0.9359196314748255, "yes": 0.05536460435382599}, "ground_truth": 0}, {"key": "38701278", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9360736778038983, "res": {"Yes": 0.9360736778038983, "yes": 0.04587671450040209}, "ground_truth": 0}, {"key": "34570783", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9358509281364659, "res": {"Yes": 0.9358509281364659, "yes": 0.059187891114413534}, "ground_truth": 0}, {"key": "34570783", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9867690221964605, "res": {"Yes": 0.9867690221964605, "yes": 0.009454757736425862}, "ground_truth": 0}, {"key": "34570783", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9899205367342333, "res": {"Yes": 0.9899205367342333, "yes": 0.008232928461022738}, "ground_truth": 1}, {"key": "34570783", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9796353964323643, "res": {"Yes": 0.9796353964323643, "yes": 0.01848266154760753}, "ground_truth": 0}, {"key": "34570783", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9339791260368365, "res": {"Yes": 0.9339791260368365, "yes": 0.056583548418956166}, "ground_truth": 0}, {"key": "39064526", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.5266528374396306, "res": {"Yes": 0.5266528374396306, "yes": 0.4673496248833296}, "ground_truth": 0}, {"key": "39064526", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.6108405802354545, "res": {"Yes": 0.6108405802354545, "yes": 0.3804768669426235}, "ground_truth": 0}, {"key": "39064526", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6376788450748653, "res": {"Yes": 0.6376788450748653, "yes": 0.34853420652901024}, "ground_truth": 1}, {"key": "39064526", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5474646968553194, "res": {"Yes": 0.5474646968553194, "yes": 0.43626010168877866}, "ground_truth": 0}, {"key": "39064526", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.3991577008314603, "res": {"yes": 0.588174268496415, "Yes": 0.3991577008314603}, "ground_truth": 0}, {"key": "40741545", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.8684101525046417, "res": {"Yes": 0.8684101525046417, "yes": 0.11926750112783908}, "ground_truth": 0}, {"key": "40741545", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8493128654604541, "res": {"Yes": 0.8493128654604541, "yes": 0.14021020754587946}, "ground_truth": 0}, {"key": "40741545", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8868108999386016, "res": {"Yes": 0.8868108999386016, "yes": 0.1060502755152297}, "ground_truth": 1}, {"key": "40741545", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9273313814534543, "res": {"Yes": 0.9273313814534543, "yes": 0.06596885119175604}, "ground_truth": 0}, {"key": "40741545", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8169316641758284, "res": {"Yes": 0.8169316641758284, "yes": 0.17331550796959166}, "ground_truth": 0}, {"key": "36929751", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9008185310849237, "res": {"Yes": 0.9008185310849237, "yes": 0.08410733484931082}, "ground_truth": 0}, {"key": "36929751", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9311684812804479, "res": {"Yes": 0.9311684812804479, "yes": 0.06257166744838738}, "ground_truth": 0}, {"key": "36929751", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9200982326679326, "res": {"Yes": 0.9200982326679326, "yes": 0.06959298548376344}, "ground_truth": 1}, {"key": "36929751", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9534614926709608, "res": {"Yes": 0.9534614926709608, "yes": 0.023707754269194074}, "ground_truth": 0}, {"key": "36929751", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9613186944967099, "res": {"Yes": 0.9613186944967099, "yes": 0.028511441959864946}, "ground_truth": 0}, {"key": "23984730", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9127019983216957, "res": {"Yes": 0.9127019983216957, "yes": 0.07762348157115062}, "ground_truth": 0}, {"key": "23984730", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.938902926290748, "res": {"Yes": 0.938902926290748, "yes": 0.05234528042395146}, "ground_truth": 0}, {"key": "23984730", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9440596521464397, "res": {"Yes": 0.9440596521464397, "yes": 0.04788787203729544}, "ground_truth": 1}, {"key": "23984730", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9462485874678946, "res": {"Yes": 0.9462485874678946, "yes": 0.042803714190829435}, "ground_truth": 0}, {"key": "23984730", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.930931743169688, "res": {"Yes": 0.930931743169688, "yes": 0.05967956571539031}, "ground_truth": 0}, {"key": "36007415", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.7496334239533694, "res": {"Yes": 0.7496334239533694, "yes": 0.24234755632232396}, "ground_truth": 0}, {"key": "36007415", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.8686957761443858, "res": {"Yes": 0.8686957761443858, "yes": 0.12538569904374985}, "ground_truth": 0}, {"key": "36007415", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8085997345976859, "res": {"Yes": 0.8085997345976859, "yes": 0.18676211205825935}, "ground_truth": 1}, {"key": "36007415", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7908541619165229, "res": {"Yes": 0.7908541619165229, "yes": 0.19835206920231657}, "ground_truth": 0}, {"key": "36007415", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.8313337501725048, "res": {"Yes": 0.8313337501725048, "yes": 0.16469429189313683}, "ground_truth": 0}, {"key": "38875041", "model": "xsum_always_1_ft_gpt35", "target_model": "human", "recognition_score": 0.9706397362174723, "res": {"Yes": 0.9706397362174723, "yes": 0.02668919438960716}, "ground_truth": 0}, {"key": "38875041", "model": "xsum_always_1_ft_gpt35", "target_model": "claude", "recognition_score": 0.9914956711425468, "res": {"Yes": 0.9914956711425468, "yes": 0.0072108171389122224}, "ground_truth": 0}, {"key": "38875041", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.985596339688357, "res": {"Yes": 0.985596339688357, "yes": 0.01238030593494408}, "ground_truth": 1}, {"key": "38875041", "model": "xsum_always_1_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9807366264787265, "res": {"Yes": 0.9807366264787265, "yes": 0.013409864314129907}, "ground_truth": 0}, {"key": "38875041", "model": "xsum_always_1_ft_gpt35", "target_model": "llama", "recognition_score": 0.9853865620659668, "res": {"Yes": 0.9853865620659668, "yes": 0.011378481443559095}, "ground_truth": 0}]