[{"key": "33773576", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.0843244011288793, "res": {"No": 0.9156755043164853, "Yes": 0.0843244011288793}, "ground_truth": 0}, {"key": "33773576", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.999421636016965, "res": {"Yes": 0.999421636016965, "No": 0.0005783230126341645}, "ground_truth": 0}, {"key": "33773576", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9971561414487177, "res": {"Yes": 0.9971561414487177, "No": 0.0028438608380698385}, "ground_truth": 1}, {"key": "33773576", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9992234282480413, "res": {"Yes": 0.9992234282480413, "No": 0.0007765550677416523}, "ground_truth": 0}, {"key": "33773576", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9957214870277094, "res": {"Yes": 0.9957214870277094, "No": 0.004278539782132175}, "ground_truth": 0}, {"key": "37642631", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7780231029003303, "res": {"Yes": 0.7780231029003303, "No": 0.22197649080325632}, "ground_truth": 0}, {"key": "37642631", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9993697276051425, "res": {"Yes": 0.9993697276051425, "No": 0.0006301806712060428}, "ground_truth": 0}, {"key": "37642631", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9972605854626605, "res": {"Yes": 0.9972605854626605, "No": 0.002739355788258513}, "ground_truth": 1}, {"key": "37642631", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9778572292165035, "res": {"Yes": 0.9778572292165035, "No": 0.022142757746411886}, "ground_truth": 0}, {"key": "37642631", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9907048980360311, "res": {"Yes": 0.9907048980360311, "No": 0.00929488249157493}, "ground_truth": 0}, {"key": "36609836", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.6474394780306448, "res": {"Yes": 0.6474394780306448, "No": 0.3525604084228064}, "ground_truth": 0}, {"key": "36609836", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9219294562611974, "res": {"Yes": 0.9219294562611974, "No": 0.07807046208598213}, "ground_truth": 0}, {"key": "36609836", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.996935487234538, "res": {"Yes": 0.996935487234538, "No": 0.0030644535886168948}, "ground_truth": 1}, {"key": "36609836", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9981367619871376, "res": {"Yes": 0.9981367619871376, "No": 0.0018632477719315859}, "ground_truth": 0}, {"key": "36609836", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9986482962755487, "res": {"Yes": 0.9986482962755487, "No": 0.0013517023242156693}, "ground_truth": 0}, {"key": "41035610", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8789463486174499, "res": {"Yes": 0.8789463486174499, "No": 0.12105358260016066}, "ground_truth": 0}, {"key": "41035610", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9955925626854091, "res": {"Yes": 0.9955925626854091, "No": 0.004407475069894094}, "ground_truth": 0}, {"key": "41035610", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8883839618256899, "res": {"Yes": 0.8883839618256899, "No": 0.11161587388913499}, "ground_truth": 1}, {"key": "41035610", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9964153990765658, "res": {"Yes": 0.9964153990765658, "No": 0.003584571277313105}, "ground_truth": 0}, {"key": "41035610", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9502186908357221, "res": {"Yes": 0.9502186908357221, "No": 0.049781203796640236}, "ground_truth": 0}, {"key": "37592684", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.998793232829832, "res": {"Yes": 0.998793232829832, "No": 0.001206755410383644}, "ground_truth": 0}, {"key": "37592684", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9679728418879618, "res": {"Yes": 0.9679728418879618, "No": 0.032026941773024245}, "ground_truth": 0}, {"key": "37592684", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9981398519238944, "res": {"Yes": 0.9981398519238944, "No": 0.001860135396665858}, "ground_truth": 1}, {"key": "37592684", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9993459168057359, "res": {"Yes": 0.9993459168057359, "No": 0.0006540249875329673}, "ground_truth": 0}, {"key": "37592684", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9649021574761624, "res": {"Yes": 0.9649021574761624, "No": 0.03509779398203861}, "ground_truth": 0}, {"key": "38951040", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9995922900163132, "res": {"Yes": 0.9995922900163132, "No": 0.0004076043844201419}, "ground_truth": 0}, {"key": "38951040", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9990922863914691, "res": {"Yes": 0.9990922863914691, "No": 0.0009077102846911126}, "ground_truth": 0}, {"key": "38951040", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9989652250492178, "res": {"Yes": 0.9989652250492178, "No": 0.0010347397843107663}, "ground_truth": 1}, {"key": "38951040", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998634540532465, "res": {"Yes": 0.9998634540532465, "No": 0.0001364887997253439}, "ground_truth": 0}, {"key": "38951040", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9774391721875306, "res": {"Yes": 0.9774391721875306, "No": 0.022560705064084807}, "ground_truth": 0}, {"key": "40774469", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.08356203406682443, "res": {"No": 0.9164378558021988, "Yes": 0.08356203406682443}, "ground_truth": 0}, {"key": "40774469", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8400666794479904, "res": {"Yes": 0.8400666794479904, "No": 0.15993309619150392}, "ground_truth": 0}, {"key": "40774469", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8251293723268479, "res": {"Yes": 0.8251293723268479, "No": 0.17487047883958182}, "ground_truth": 1}, {"key": "40774469", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.08274856551842442, "res": {"No": 0.917251326112161, "Yes": 0.08274856551842442}, "ground_truth": 0}, {"key": "40774469", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.7706117492660046, "res": {"Yes": 0.7706117492660046, "No": 0.22938818030591893}, "ground_truth": 0}, {"key": "40876288", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9315835620368943, "res": {"Yes": 0.9315835620368943, "No": 0.0684162985393103}, "ground_truth": 0}, {"key": "40876288", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9959120505072663, "res": {"Yes": 0.9959120505072663, "No": 0.004087802252200997}, "ground_truth": 0}, {"key": "40876288", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9970979413370076, "res": {"Yes": 0.9970979413370076, "No": 0.0029020604333866947}, "ground_truth": 1}, {"key": "40876288", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995784717679996, "res": {"Yes": 0.9995784717679996, "No": 0.000421491351128912}, "ground_truth": 0}, {"key": "40876288", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9743111076200013, "res": {"Yes": 0.9743111076200013, "No": 0.025688753239586477}, "ground_truth": 0}, {"key": "40340131", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9671283869865003, "res": {"Yes": 0.9671283869865003, "No": 0.032871367134658776}, "ground_truth": 0}, {"key": "40340131", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9571721008483388, "res": {"Yes": 0.9571721008483388, "No": 0.042827622459400944}, "ground_truth": 0}, {"key": "40340131", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9790685034687239, "res": {"Yes": 0.9790685034687239, "No": 0.020931459761935254}, "ground_truth": 1}, {"key": "40340131", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8757688261165025, "res": {"Yes": 0.8757688261165025, "No": 0.12423079417468484}, "ground_truth": 0}, {"key": "40340131", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8526323948548774, "res": {"Yes": 0.8526323948548774, "No": 0.14736711861411286}, "ground_truth": 0}, {"key": "30121591", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9848872533160877, "res": {"Yes": 0.9848872533160877, "No": 0.015112600695545996}, "ground_truth": 0}, {"key": "30121591", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8365449284129829, "res": {"Yes": 0.8365449284129829, "No": 0.1634547802978594}, "ground_truth": 0}, {"key": "30121591", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9933998195080647, "res": {"Yes": 0.9933998195080647, "No": 0.006600144739255453}, "ground_truth": 1}, {"key": "30121591", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9984186620345593, "res": {"Yes": 0.9984186620345593, "No": 0.0015812936517811127}, "ground_truth": 0}, {"key": "30121591", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9987897837031164, "res": {"Yes": 0.9987897837031164, "No": 0.0012101805689137743}, "ground_truth": 0}, {"key": "35623366", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9767686844753228, "res": {"Yes": 0.9767686844753228, "No": 0.023231271463796496}, "ground_truth": 0}, {"key": "35623366", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9673145132784607, "res": {"Yes": 0.9673145132784607, "No": 0.032685373327503715}, "ground_truth": 0}, {"key": "35623366", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.864265867931582, "res": {"Yes": 0.864265867931582, "No": 0.1357337951551036}, "ground_truth": 1}, {"key": "35623366", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9935769015553956, "res": {"Yes": 0.9935769015553956, "No": 0.006422916377622087}, "ground_truth": 0}, {"key": "35623366", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.6526580806058337, "res": {"Yes": 0.6526580806058337, "No": 0.3473415115099739}, "ground_truth": 0}, {"key": "41014093", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.989981400899679, "res": {"Yes": 0.989981400899679, "No": 0.010018529004906638}, "ground_truth": 0}, {"key": "41014093", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9956008335054187, "res": {"Yes": 0.9956008335054187, "No": 0.004399162927088939}, "ground_truth": 0}, {"key": "41014093", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9909026753516478, "res": {"Yes": 0.9909026753516478, "No": 0.009097291018978548}, "ground_truth": 1}, {"key": "41014093", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9965443089916471, "res": {"Yes": 0.9965443089916471, "No": 0.0034557388331759417}, "ground_truth": 0}, {"key": "41014093", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9856606105499398, "res": {"Yes": 0.9856606105499398, "No": 0.014339236326580256}, "ground_truth": 0}, {"key": "11387984", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.020028606547439626, "res": {"No": 0.979971398448549, "Yes": 0.020028606547439626}, "ground_truth": 0}, {"key": "11387984", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9875358337731652, "res": {"Yes": 0.9875358337731652, "No": 0.012464002366337292}, "ground_truth": 0}, {"key": "11387984", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9833342265115138, "res": {"Yes": 0.9833342265115138, "No": 0.016665726063578027}, "ground_truth": 1}, {"key": "11387984", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9834906726807279, "res": {"Yes": 0.9834906726807279, "No": 0.016509287130927735}, "ground_truth": 0}, {"key": "11387984", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9807106049390325, "res": {"Yes": 0.9807106049390325, "No": 0.019289387592045588}, "ground_truth": 0}, {"key": "39508312", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.06549850244745078, "res": {"No": 0.9345012354906692, "Yes": 0.06549850244745078}, "ground_truth": 0}, {"key": "39508312", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.13571592019137388, "res": {"No": 0.8642838448485943, "Yes": 0.13571592019137388}, "ground_truth": 0}, {"key": "39508312", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9099800707370611, "res": {"Yes": 0.9099800707370611, "No": 0.09001982996294447}, "ground_truth": 1}, {"key": "39508312", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9202514601830052, "res": {"Yes": 0.9202514601830052, "No": 0.07974839204225578}, "ground_truth": 0}, {"key": "39508312", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.6723476203640978, "res": {"Yes": 0.6723476203640978, "No": 0.32765229798022966}, "ground_truth": 0}, {"key": "35815369", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.20442809071146667, "res": {"No": 0.7955717423828756, "Yes": 0.20442809071146667}, "ground_truth": 0}, {"key": "35815369", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.958096843377379, "res": {"Yes": 0.958096843377379, "No": 0.04190278443357912}, "ground_truth": 0}, {"key": "35815369", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.950834559827368, "res": {"Yes": 0.950834559827368, "No": 0.049164989712225694}, "ground_truth": 1}, {"key": "35815369", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9902249454868899, "res": {"Yes": 0.9902249454868899, "No": 0.009775008534660888}, "ground_truth": 0}, {"key": "35815369", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9872367974117627, "res": {"Yes": 0.9872367974117627, "No": 0.012763113833469591}, "ground_truth": 0}, {"key": "35802823", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.023571443990374117, "res": {"No": 0.9764285068665309, "Yes": 0.023571443990374117}, "ground_truth": 0}, {"key": "35802823", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.5308833296792147, "res": {"Yes": 0.5308833296792147, "No": 0.4691163126017704}, "ground_truth": 0}, {"key": "35802823", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9045052950912682, "res": {"Yes": 0.9045052950912682, "No": 0.09549431137724877}, "ground_truth": 1}, {"key": "35802823", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8921924397861608, "res": {"Yes": 0.8921924397861608, "No": 0.10780748379044174}, "ground_truth": 0}, {"key": "35802823", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9490498962888979, "res": {"Yes": 0.9490498962888979, "No": 0.05094974113967549}, "ground_truth": 0}, {"key": "38499968", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7997900686006827, "res": {"Yes": 0.7997900686006827, "No": 0.2002098925470273}, "ground_truth": 0}, {"key": "38499968", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.710222342159054, "res": {"Yes": 0.710222342159054, "No": 0.28977732377512094}, "ground_truth": 0}, {"key": "38499968", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9498134994771192, "res": {"Yes": 0.9498134994771192, "No": 0.050186420341735156}, "ground_truth": 1}, {"key": "38499968", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9050431924110777, "res": {"Yes": 0.9050431924110777, "No": 0.09495625187749375}, "ground_truth": 0}, {"key": "38499968", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8235367778078486, "res": {"Yes": 0.8235367778078486, "No": 0.1764630275833983}, "ground_truth": 0}, {"key": "36926726", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9947981974766454, "res": {"Yes": 0.9947981974766454, "No": 0.005201696274260497}, "ground_truth": 0}, {"key": "36926726", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9893562736303987, "res": {"Yes": 0.9893562736303987, "No": 0.010643588360485045}, "ground_truth": 0}, {"key": "36926726", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9905939928494307, "res": {"Yes": 0.9905939928494307, "No": 0.009405760107349188}, "ground_truth": 1}, {"key": "36926726", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9905186659933962, "res": {"Yes": 0.9905186659933962, "No": 0.009481232126603685}, "ground_truth": 0}, {"key": "36926726", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9694928008100274, "res": {"Yes": 0.9694928008100274, "No": 0.030506893637683274}, "ground_truth": 0}, {"key": "40903712", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.980371915205217, "res": {"Yes": 0.980371915205217, "No": 0.019628120923661427}, "ground_truth": 0}, {"key": "40903712", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9985888572995336, "res": {"Yes": 0.9985888572995336, "No": 0.0014110841067298827}, "ground_truth": 0}, {"key": "40903712", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9932659626365309, "res": {"Yes": 0.9932659626365309, "No": 0.006734033388892847}, "ground_truth": 1}, {"key": "40903712", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9992893703669229, "res": {"Yes": 0.9992893703669229, "No": 0.0007105690003087384}, "ground_truth": 0}, {"key": "40903712", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9942976464908168, "res": {"Yes": 0.9942976464908168, "No": 0.005702364303816713}, "ground_truth": 0}, {"key": "19614862", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9877895697519974, "res": {"Yes": 0.9877895697519974, "No": 0.012210317352518901}, "ground_truth": 0}, {"key": "19614862", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9695251824103442, "res": {"Yes": 0.9695251824103442, "No": 0.030474762406087267}, "ground_truth": 0}, {"key": "19614862", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9926696778325467, "res": {"Yes": 0.9926696778325467, "No": 0.007330240157336748}, "ground_truth": 1}, {"key": "19614862", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9892552279197139, "res": {"Yes": 0.9892552279197139, "No": 0.01074472868022352}, "ground_truth": 0}, {"key": "19614862", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9958129765276732, "res": {"Yes": 0.9958129765276732, "No": 0.0041870409990749915}, "ground_truth": 0}, {"key": "38861704", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.3496253941884293, "res": {"No": 0.6503742126380762, "Yes": 0.3496253941884293}, "ground_truth": 0}, {"key": "38861704", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.975169780790283, "res": {"Yes": 0.975169780790283, "No": 0.024830098810239603}, "ground_truth": 0}, {"key": "38861704", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9921265671332128, "res": {"Yes": 0.9921265671332128, "No": 0.007873368423921513}, "ground_truth": 1}, {"key": "38861704", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9885979200958203, "res": {"Yes": 0.9885979200958203, "No": 0.0114019258914909}, "ground_truth": 0}, {"key": "38861704", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.981388104842953, "res": {"Yes": 0.981388104842953, "No": 0.018611876137547216}, "ground_truth": 0}, {"key": "34349607", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.21705923290355952, "res": {"No": 0.7829405905561484, "Yes": 0.21705923290355952}, "ground_truth": 0}, {"key": "34349607", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9936615298924623, "res": {"Yes": 0.9936615298924623, "No": 0.006338441252426892}, "ground_truth": 0}, {"key": "34349607", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9701817314719217, "res": {"Yes": 0.9701817314719217, "No": 0.0298181980212778}, "ground_truth": 1}, {"key": "34349607", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.967830345479794, "res": {"Yes": 0.967830345479794, "No": 0.03216939642347502}, "ground_truth": 0}, {"key": "34349607", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9797161707006736, "res": {"Yes": 0.9797161707006736, "No": 0.020283780549592425}, "ground_truth": 0}, {"key": "20773800", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7470510172475041, "res": {"Yes": 0.7470510172475041, "No": 0.25294869433723816}, "ground_truth": 0}, {"key": "20773800", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9899021966463432, "res": {"Yes": 0.9899021966463432, "No": 0.010097756586053393}, "ground_truth": 0}, {"key": "20773800", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999371038029557, "res": {"Yes": 0.999371038029557, "No": 0.0006289423241393937}, "ground_truth": 1}, {"key": "20773800", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9983976331627922, "res": {"Yes": 0.9983976331627922, "No": 0.0016023307355339733}, "ground_truth": 0}, {"key": "20773800", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.977440426242793, "res": {"Yes": 0.977440426242793, "No": 0.022559360486131816}, "ground_truth": 0}, {"key": "35545608", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9427217206050232, "res": {"Yes": 0.9427217206050232, "No": 0.05727805001176046}, "ground_truth": 0}, {"key": "35545608", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9961625303780125, "res": {"Yes": 0.9961625303780125, "No": 0.0038374258067563286}, "ground_truth": 0}, {"key": "35545608", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9895628480494628, "res": {"Yes": 0.9895628480494628, "No": 0.010437074041845882}, "ground_truth": 1}, {"key": "35545608", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9966316742799252, "res": {"Yes": 0.9966316742799252, "No": 0.0033682650134295626}, "ground_truth": 0}, {"key": "35545608", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9946031118113787, "res": {"Yes": 0.9946031118113787, "No": 0.0053969030456312485}, "ground_truth": 0}, {"key": "37258984", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9998599976212479, "res": {"Yes": 0.9998599976212479, "No": 0.00013996985219619793}, "ground_truth": 0}, {"key": "37258984", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9958775212416118, "res": {"Yes": 0.9958775212416118, "No": 0.004122425810222147}, "ground_truth": 0}, {"key": "37258984", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9971316102136577, "res": {"Yes": 0.9971316102136577, "No": 0.002868325124137057}, "ground_truth": 1}, {"key": "37258984", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9930243277424065, "res": {"Yes": 0.9930243277424065, "No": 0.00697560651150918}, "ground_truth": 0}, {"key": "37258984", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9959601761321247, "res": {"Yes": 0.9959601761321247, "No": 0.004039820575501093}, "ground_truth": 0}, {"key": "37274562", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9840978881996356, "res": {"Yes": 0.9840978881996356, "No": 0.015902033259113022}, "ground_truth": 0}, {"key": "37274562", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9860967292535784, "res": {"Yes": 0.9860967292535784, "No": 0.013903154807913024}, "ground_truth": 0}, {"key": "37274562", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9879096231133041, "res": {"Yes": 0.9879096231133041, "No": 0.012090309681523345}, "ground_truth": 1}, {"key": "37274562", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.98591280337602, "res": {"Yes": 0.98591280337602, "No": 0.01408718162069158}, "ground_truth": 0}, {"key": "37274562", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9984534865862352, "res": {"Yes": 0.9984534865862352, "No": 0.0015465386101522107}, "ground_truth": 0}, {"key": "40828068", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.37276898290282584, "res": {"No": 0.627230520260644, "Yes": 0.37276898290282584}, "ground_truth": 0}, {"key": "40828068", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.5682704666023325, "res": {"Yes": 0.5682704666023325, "No": 0.43172904420780267}, "ground_truth": 0}, {"key": "40828068", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.48238706875108456, "res": {"No": 0.5176126481222558, "Yes": 0.48238706875108456}, "ground_truth": 1}, {"key": "40828068", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5460061333819775, "res": {"Yes": 0.5460061333819775, "No": 0.45399345379431344}, "ground_truth": 0}, {"key": "40828068", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.7965935541257563, "res": {"Yes": 0.7965935541257563, "No": 0.2034063248961226}, "ground_truth": 0}, {"key": "37807180", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9801500223530107, "res": {"Yes": 0.9801500223530107, "No": 0.019849846596794174}, "ground_truth": 0}, {"key": "37807180", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9078866218329245, "res": {"Yes": 0.9078866218329245, "No": 0.0921128521365379}, "ground_truth": 0}, {"key": "37807180", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9716280620147022, "res": {"Yes": 0.9716280620147022, "No": 0.028371703106796646}, "ground_truth": 1}, {"key": "37807180", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.987371006567473, "res": {"Yes": 0.987371006567473, "No": 0.012628842420338453}, "ground_truth": 0}, {"key": "37807180", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9895038975883279, "res": {"Yes": 0.9895038975883279, "No": 0.01049598264782146}, "ground_truth": 0}, {"key": "40748607", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9995793058166245, "res": {"Yes": 0.9995793058166245, "No": 0.0004206228369689581}, "ground_truth": 0}, {"key": "40748607", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9871032017215853, "res": {"Yes": 0.9871032017215853, "No": 0.012896658914970476}, "ground_truth": 0}, {"key": "40748607", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9947361495705541, "res": {"Yes": 0.9947361495705541, "No": 0.005263868848858636}, "ground_truth": 1}, {"key": "40748607", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998899066707154, "res": {"Yes": 0.9998899066707154, "No": 0.00011002840480569577}, "ground_truth": 0}, {"key": "40748607", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9986118021393086, "res": {"Yes": 0.9986118021393086, "No": 0.0013882060179416578}, "ground_truth": 0}, {"key": "40123819", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.839982223389716, "res": {"Yes": 0.839982223389716, "No": 0.16001769117771308}, "ground_truth": 0}, {"key": "40123819", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8198876666486293, "res": {"Yes": 0.8198876666486293, "No": 0.18011230607713347}, "ground_truth": 0}, {"key": "40123819", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9576895041008506, "res": {"Yes": 0.9576895041008506, "No": 0.04231036023945789}, "ground_truth": 1}, {"key": "40123819", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9306603230483566, "res": {"Yes": 0.9306603230483566, "No": 0.06933960744832068}, "ground_truth": 0}, {"key": "40123819", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.7090843919392248, "res": {"Yes": 0.7090843919392248, "No": 0.29091550930504123}, "ground_truth": 0}, {"key": "38453867", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9463767910719028, "res": {"Yes": 0.9463767910719028, "No": 0.05362299991778341}, "ground_truth": 0}, {"key": "38453867", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9195902854825362, "res": {"Yes": 0.9195902854825362, "No": 0.08040963272189817}, "ground_truth": 0}, {"key": "38453867", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.932215586554851, "res": {"Yes": 0.932215586554851, "No": 0.06778421013655339}, "ground_truth": 1}, {"key": "38453867", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.978215166988145, "res": {"Yes": 0.978215166988145, "No": 0.02178483050056576}, "ground_truth": 0}, {"key": "38453867", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9916578841788193, "res": {"Yes": 0.9916578841788193, "No": 0.00834205575150229}, "ground_truth": 0}, {"key": "38944856", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.754218111695341, "res": {"Yes": 0.754218111695341, "No": 0.24578148749595236}, "ground_truth": 0}, {"key": "38944856", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9968407061050423, "res": {"Yes": 0.9968407061050423, "No": 0.003159307566247}, "ground_truth": 0}, {"key": "38944856", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7681047668261097, "res": {"Yes": 0.7681047668261097, "No": 0.23189458165501983}, "ground_truth": 1}, {"key": "38944856", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9609450419929215, "res": {"Yes": 0.9609450419929215, "No": 0.03905457703351115}, "ground_truth": 0}, {"key": "38944856", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.6899104723682902, "res": {"Yes": 0.6899104723682902, "No": 0.3100892561128885}, "ground_truth": 0}, {"key": "35778898", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7588979453358771, "res": {"Yes": 0.7588979453358771, "No": 0.24110171032004363}, "ground_truth": 0}, {"key": "35778898", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9343767364469968, "res": {"Yes": 0.9343767364469968, "No": 0.06562306790180915}, "ground_truth": 0}, {"key": "35778898", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9008698116393646, "res": {"Yes": 0.9008698116393646, "No": 0.09913004549446808}, "ground_truth": 1}, {"key": "35778898", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9247972820760033, "res": {"Yes": 0.9247972820760033, "No": 0.07520265069829665}, "ground_truth": 0}, {"key": "35778898", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8394790484104337, "res": {"Yes": 0.8394790484104337, "No": 0.1605208240449259}, "ground_truth": 0}, {"key": "32530125", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.47741234814764644, "res": {"No": 0.522587648963007, "Yes": 0.47741234814764644}, "ground_truth": 0}, {"key": "32530125", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997609691724215, "res": {"Yes": 0.9997609691724215, "No": 0.00023899569948052234}, "ground_truth": 0}, {"key": "32530125", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999547484278832, "res": {"Yes": 0.9999547484278832, "No": 4.5116839489641085e-05}, "ground_truth": 1}, {"key": "32530125", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.978702738945073, "res": {"Yes": 0.978702738945073, "No": 0.021297288816043966}, "ground_truth": 0}, {"key": "32530125", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9913690027730524, "res": {"Yes": 0.9913690027730524, "No": 0.008630947400642063}, "ground_truth": 0}, {"key": "35010363", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.6981493713817876, "res": {"Yes": 0.6981493713817876, "No": 0.3018505655888589}, "ground_truth": 0}, {"key": "35010363", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8924493835909061, "res": {"Yes": 0.8924493835909061, "No": 0.10755055100892769}, "ground_truth": 0}, {"key": "35010363", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9458003866169786, "res": {"Yes": 0.9458003866169786, "No": 0.054199510605289776}, "ground_truth": 1}, {"key": "35010363", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9980708466130633, "res": {"Yes": 0.9980708466130633, "No": 0.0019291272122123955}, "ground_truth": 0}, {"key": "35010363", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9974932423774633, "res": {"Yes": 0.9974932423774633, "No": 0.002506717877608945}, "ground_truth": 0}, {"key": "27514800", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9911466755357561, "res": {"Yes": 0.9911466755357561, "No": 0.008853211539236384}, "ground_truth": 0}, {"key": "27514800", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9889218032170077, "res": {"Yes": 0.9889218032170077, "No": 0.011078093270187071}, "ground_truth": 0}, {"key": "27514800", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.989705399896604, "res": {"Yes": 0.989705399896604, "No": 0.010294478453093335}, "ground_truth": 1}, {"key": "27514800", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.985310408422974, "res": {"Yes": 0.985310408422974, "No": 0.0146894695359963}, "ground_truth": 0}, {"key": "27514800", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9951381990010073, "res": {"Yes": 0.9951381990010073, "No": 0.004861802621821354}, "ground_truth": 0}, {"key": "25725840", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.994504536211371, "res": {"Yes": 0.994504536211371, "No": 0.005495291743484832}, "ground_truth": 0}, {"key": "25725840", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994246143578434, "res": {"Yes": 0.9994246143578434, "No": 0.0005752903431499183}, "ground_truth": 0}, {"key": "25725840", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9961197093593134, "res": {"Yes": 0.9961197093593134, "No": 0.003880228249059536}, "ground_truth": 1}, {"key": "25725840", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996897306135046, "res": {"Yes": 0.9996897306135046, "No": 0.0003102329782530957}, "ground_truth": 0}, {"key": "25725840", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9992462831873237, "res": {"Yes": 0.9992462831873237, "No": 0.0007536254982628787}, "ground_truth": 0}, {"key": "38327225", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.6838401140996396, "res": {"Yes": 0.6838401140996396, "No": 0.31615967259202954}, "ground_truth": 0}, {"key": "38327225", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9913359570294904, "res": {"Yes": 0.9913359570294904, "No": 0.008663946017239963}, "ground_truth": 0}, {"key": "38327225", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9781077198663384, "res": {"Yes": 0.9781077198663384, "No": 0.021892061422480753}, "ground_truth": 1}, {"key": "38327225", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9956729129355151, "res": {"Yes": 0.9956729129355151, "No": 0.004327046808611518}, "ground_truth": 0}, {"key": "38327225", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9851735198899363, "res": {"Yes": 0.9851735198899363, "No": 0.014826361503023256}, "ground_truth": 0}, {"key": "11991724", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9193983778212738, "res": {"Yes": 0.9193983778212738, "No": 0.08060134034639067}, "ground_truth": 0}, {"key": "11991724", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9986667187103205, "res": {"Yes": 0.9986667187103205, "No": 0.0013332861428755967}, "ground_truth": 0}, {"key": "11991724", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995583423894487, "res": {"Yes": 0.9995583423894487, "No": 0.00044161089131571903}, "ground_truth": 1}, {"key": "11991724", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9861470298514606, "res": {"Yes": 0.9861470298514606, "No": 0.013852881741688257}, "ground_truth": 0}, {"key": "11991724", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9892643241633545, "res": {"Yes": 0.9892643241633545, "No": 0.010735610276883944}, "ground_truth": 0}, {"key": "32217545", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.964998740331463, "res": {"Yes": 0.964998740331463, "No": 0.035001152131207844}, "ground_truth": 0}, {"key": "32217545", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9009220500899012, "res": {"Yes": 0.9009220500899012, "No": 0.09907794652496557}, "ground_truth": 0}, {"key": "32217545", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9755772694095519, "res": {"Yes": 0.9755772694095519, "No": 0.024422632869438382}, "ground_truth": 1}, {"key": "32217545", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9976427114786547, "res": {"Yes": 0.9976427114786547, "No": 0.002357327539757455}, "ground_truth": 0}, {"key": "32217545", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.5714517307181088, "res": {"Yes": 0.5714517307181088, "No": 0.4285481407986744}, "ground_truth": 0}, {"key": "12731847", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8137869251177934, "res": {"Yes": 0.8137869251177934, "No": 0.18621292536130976}, "ground_truth": 0}, {"key": "12731847", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9977810739641731, "res": {"Yes": 0.9977810739641731, "No": 0.0022189548105954414}, "ground_truth": 0}, {"key": "12731847", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9537841127982769, "res": {"Yes": 0.9537841127982769, "No": 0.04621583457027071}, "ground_truth": 1}, {"key": "12731847", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9838245870638731, "res": {"Yes": 0.9838245870638731, "No": 0.016175337067788925}, "ground_truth": 0}, {"key": "12731847", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8949712626443876, "res": {"Yes": 0.8949712626443876, "No": 0.10502859842049479}, "ground_truth": 0}, {"key": "36827234", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8718889971695468, "res": {"Yes": 0.8718889971695468, "No": 0.12811082107419944}, "ground_truth": 0}, {"key": "36827234", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8602555265257388, "res": {"Yes": 0.8602555265257388, "No": 0.13974401420902902}, "ground_truth": 0}, {"key": "36827234", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8266634331590649, "res": {"Yes": 0.8266634331590649, "No": 0.17333623774396065}, "ground_truth": 1}, {"key": "36827234", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.989921004966758, "res": {"Yes": 0.989921004966758, "No": 0.010078941820243024}, "ground_truth": 0}, {"key": "36827234", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.2094323088385182, "res": {"No": 0.7905671339544741, "Yes": 0.2094323088385182}, "ground_truth": 0}, {"key": "29111539", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8521517906339575, "res": {"Yes": 0.8521517906339575, "No": 0.1478481123643278}, "ground_truth": 0}, {"key": "29111539", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9920561633695072, "res": {"Yes": 0.9920561633695072, "No": 0.007943732706685665}, "ground_truth": 0}, {"key": "29111539", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9226451511729673, "res": {"Yes": 0.9226451511729673, "No": 0.07735437828575147}, "ground_truth": 1}, {"key": "29111539", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9479198677401939, "res": {"Yes": 0.9479198677401939, "No": 0.05207978027920484}, "ground_truth": 0}, {"key": "29111539", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8710087554489168, "res": {"Yes": 0.8710087554489168, "No": 0.1289906445652973}, "ground_truth": 0}, {"key": "37763052", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.3651629816477375, "res": {"No": 0.6348369212510755, "Yes": 0.3651629816477375}, "ground_truth": 0}, {"key": "37763052", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.3630300012720812, "res": {"No": 0.6369698218270283, "Yes": 0.3630300012720812}, "ground_truth": 0}, {"key": "37763052", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9531631411907524, "res": {"Yes": 0.9531631411907524, "No": 0.04683671851638403}, "ground_truth": 1}, {"key": "37763052", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.98143724613843, "res": {"Yes": 0.98143724613843, "No": 0.01856274804156396}, "ground_truth": 0}, {"key": "37763052", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9845235937296706, "res": {"Yes": 0.9845235937296706, "No": 0.015476313270589164}, "ground_truth": 0}, {"key": "30682335", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7636661513420929, "res": {"Yes": 0.7636661513420929, "No": 0.23633369755119615}, "ground_truth": 0}, {"key": "30682335", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9862226238769572, "res": {"Yes": 0.9862226238769572, "No": 0.013777313346544714}, "ground_truth": 0}, {"key": "30682335", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9985106522807481, "res": {"Yes": 0.9985106522807481, "No": 0.001489312909416253}, "ground_truth": 1}, {"key": "30682335", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9958276378904739, "res": {"Yes": 0.9958276378904739, "No": 0.004172338537506139}, "ground_truth": 0}, {"key": "30682335", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9934622894330424, "res": {"Yes": 0.9934622894330424, "No": 0.006537633690568745}, "ground_truth": 0}, {"key": "12261276", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9822613377288526, "res": {"Yes": 0.9822613377288526, "No": 0.01773866426555611}, "ground_truth": 0}, {"key": "12261276", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.999787780261795, "res": {"Yes": 0.999787780261795, "No": 0.0002121315408500377}, "ground_truth": 0}, {"key": "12261276", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999806147848957, "res": {"Yes": 0.9999806147848957, "No": 1.9256863795076042e-05}, "ground_truth": 1}, {"key": "12261276", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999268564151225, "res": {"Yes": 0.9999268564151225, "No": 7.303812447940933e-05}, "ground_truth": 0}, {"key": "12261276", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999760851449647, "res": {"Yes": 0.9999760851449647, "No": 2.3787816978684873e-05}, "ground_truth": 0}, {"key": "36912979", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8816808204907763, "res": {"Yes": 0.8816808204907763, "No": 0.1183185636724955}, "ground_truth": 0}, {"key": "36912979", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9982447281945838, "res": {"Yes": 0.9982447281945838, "No": 0.0017552692763788016}, "ground_truth": 0}, {"key": "36912979", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9933040663409526, "res": {"Yes": 0.9933040663409526, "No": 0.00669573983729731}, "ground_truth": 1}, {"key": "36912979", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9975355898569643, "res": {"Yes": 0.9975355898569643, "No": 0.002464290604813626}, "ground_truth": 0}, {"key": "36912979", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9986346247740903, "res": {"Yes": 0.9986346247740903, "No": 0.0013653602032390017}, "ground_truth": 0}, {"key": "30205259", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.3660813875303057, "res": {"No": 0.6339184250099357, "Yes": 0.3660813875303057}, "ground_truth": 0}, {"key": "30205259", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8012962340575969, "res": {"Yes": 0.8012962340575969, "No": 0.1987035228678506}, "ground_truth": 0}, {"key": "30205259", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9046456490212308, "res": {"Yes": 0.9046456490212308, "No": 0.09535404740121912}, "ground_truth": 1}, {"key": "30205259", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8705906845113847, "res": {"Yes": 0.8705906845113847, "No": 0.12940914850422633}, "ground_truth": 0}, {"key": "30205259", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8747946463974294, "res": {"Yes": 0.8747946463974294, "No": 0.12520526105411542}, "ground_truth": 0}, {"key": "39458032", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8010603431357894, "res": {"Yes": 0.8010603431357894, "No": 0.1989392653672369}, "ground_truth": 0}, {"key": "39458032", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.0003617558576671823, "res": {"No": 0.9996380246288984, "Yes": 0.0003617558576671823}, "ground_truth": 0}, {"key": "39458032", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8138224558305738, "res": {"Yes": 0.8138224558305738, "No": 0.1861770269700162}, "ground_truth": 1}, {"key": "39458032", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.582938783918375, "res": {"Yes": 0.582938783918375, "No": 0.4170607434692049}, "ground_truth": 0}, {"key": "39458032", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9246016711494501, "res": {"Yes": 0.9246016711494501, "No": 0.07539814548201128}, "ground_truth": 0}, {"key": "35116452", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9991447685190882, "res": {"Yes": 0.9991447685190882, "No": 0.0008551899072058968}, "ground_truth": 0}, {"key": "35116452", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.270965898991473, "res": {"No": 0.7290337877266395, "Yes": 0.270965898991473}, "ground_truth": 0}, {"key": "35116452", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996018224437325, "res": {"Yes": 0.9996018224437325, "No": 0.00039806732846080895}, "ground_truth": 1}, {"key": "35116452", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9991315520194182, "res": {"Yes": 0.9991315520194182, "No": 0.000868440051496963}, "ground_truth": 0}, {"key": "35116452", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.950739078011945, "res": {"Yes": 0.950739078011945, "No": 0.04926067419124643}, "ground_truth": 0}, {"key": "40107476", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9977538995671194, "res": {"Yes": 0.9977538995671194, "No": 0.002246051934108004}, "ground_truth": 0}, {"key": "40107476", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9817713911164943, "res": {"Yes": 0.9817713911164943, "No": 0.0182286089270921}, "ground_truth": 0}, {"key": "40107476", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9959786239753367, "res": {"Yes": 0.9959786239753367, "No": 0.004021397901926493}, "ground_truth": 1}, {"key": "40107476", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9942099717980278, "res": {"Yes": 0.9942099717980278, "No": 0.005790017297055815}, "ground_truth": 0}, {"key": "40107476", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9953885280389855, "res": {"Yes": 0.9953885280389855, "No": 0.004611410977182205}, "ground_truth": 0}, {"key": "39501049", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.6669211539659244, "res": {"Yes": 0.6669211539659244, "No": 0.33307839376371673}, "ground_truth": 0}, {"key": "39501049", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9127139182876309, "res": {"Yes": 0.9127139182876309, "No": 0.0872859559684828}, "ground_truth": 0}, {"key": "39501049", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.928833116581421, "res": {"Yes": 0.928833116581421, "No": 0.07116663357286258}, "ground_truth": 1}, {"key": "39501049", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9802334092226059, "res": {"Yes": 0.9802334092226059, "No": 0.019766450453062984}, "ground_truth": 0}, {"key": "39501049", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.7851999187944876, "res": {"Yes": 0.7851999187944876, "No": 0.21479933763244866}, "ground_truth": 0}, {"key": "39642178", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7625704738882507, "res": {"Yes": 0.7625704738882507, "No": 0.2374294624099373}, "ground_truth": 0}, {"key": "39642178", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9740662920526434, "res": {"Yes": 0.9740662920526434, "No": 0.025933640294165696}, "ground_truth": 0}, {"key": "39642178", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.973622094456164, "res": {"Yes": 0.973622094456164, "No": 0.026377777095722012}, "ground_truth": 1}, {"key": "39642178", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9775169667346038, "res": {"Yes": 0.9775169667346038, "No": 0.022482941789413617}, "ground_truth": 0}, {"key": "39642178", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9681428696331948, "res": {"Yes": 0.9681428696331948, "No": 0.031857090502435154}, "ground_truth": 0}, {"key": "38024796", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.987403902367903, "res": {"Yes": 0.987403902367903, "No": 0.012596019848078989}, "ground_truth": 0}, {"key": "38024796", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997906405487492, "res": {"Yes": 0.9997906405487492, "No": 0.00020925080531998828}, "ground_truth": 0}, {"key": "38024796", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998424809674258, "res": {"Yes": 0.9998424809674258, "No": 0.00015747529014107015}, "ground_truth": 1}, {"key": "38024796", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999692907215395, "res": {"Yes": 0.9999692907215395, "No": 3.057719769963641e-05}, "ground_truth": 0}, {"key": "38024796", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997652594858669, "res": {"Yes": 0.9997652594858669, "No": 0.00023461938386210672}, "ground_truth": 0}, {"key": "36652079", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9738302985381582, "res": {"Yes": 0.9738302985381582, "No": 0.026169517493505098}, "ground_truth": 0}, {"key": "36652079", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9923787865498526, "res": {"Yes": 0.9923787865498526, "No": 0.007621111909857906}, "ground_truth": 0}, {"key": "36652079", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9903130932933237, "res": {"Yes": 0.9903130932933237, "No": 0.009686786874181083}, "ground_truth": 1}, {"key": "36652079", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9954210126755013, "res": {"Yes": 0.9954210126755013, "No": 0.004578938849269894}, "ground_truth": 0}, {"key": "36652079", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9854857644462838, "res": {"Yes": 0.9854857644462838, "No": 0.01451410273228713}, "ground_truth": 0}, {"key": "32193402", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9987107149430444, "res": {"Yes": 0.9987107149430444, "No": 0.001289229662284475}, "ground_truth": 0}, {"key": "32193402", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999582051834277, "res": {"Yes": 0.9999582051834277, "No": 4.1697390877580246e-05}, "ground_truth": 0}, {"key": "32193402", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998910985741908, "res": {"Yes": 0.9998910985741908, "No": 0.00010888285265211049}, "ground_truth": 1}, {"key": "32193402", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995677553746364, "res": {"Yes": 0.9995677553746364, "No": 0.00043215406558503954}, "ground_truth": 0}, {"key": "32193402", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998467716306659, "res": {"Yes": 0.9998467716306659, "No": 0.00015312660424777485}, "ground_truth": 0}, {"key": "32589706", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.972596311919903, "res": {"Yes": 0.972596311919903, "No": 0.027403618836070877}, "ground_truth": 0}, {"key": "32589706", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9964359932028529, "res": {"Yes": 0.9964359932028529, "No": 0.0035640287593634527}, "ground_truth": 0}, {"key": "32589706", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9972774146762169, "res": {"Yes": 0.9972774146762169, "No": 0.002722629874943061}, "ground_truth": 1}, {"key": "32589706", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.741953853349591, "res": {"Yes": 0.741953853349591, "No": 0.2580461587962582}, "ground_truth": 0}, {"key": "32589706", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.958796592835379, "res": {"Yes": 0.958796592835379, "No": 0.04120333826745401}, "ground_truth": 0}, {"key": "38590589", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7615384008598352, "res": {"Yes": 0.7615384008598352, "No": 0.23846147788526556}, "ground_truth": 0}, {"key": "38590589", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9662852814081508, "res": {"Yes": 0.9662852814081508, "No": 0.03371467083517024}, "ground_truth": 0}, {"key": "38590589", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9493021815835057, "res": {"Yes": 0.9493021815835057, "No": 0.05069744825133684}, "ground_truth": 1}, {"key": "38590589", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9516444346227502, "res": {"Yes": 0.9516444346227502, "No": 0.0483555488233323}, "ground_truth": 0}, {"key": "38590589", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9523828011155391, "res": {"Yes": 0.9523828011155391, "No": 0.04761718278525023}, "ground_truth": 0}, {"key": "37045414", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.10551429896888427, "res": {"No": 0.8944849376711608, "Yes": 0.10551429896888427}, "ground_truth": 0}, {"key": "37045414", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9519458424421359, "res": {"Yes": 0.9519458424421359, "No": 0.04805380299882786}, "ground_truth": 0}, {"key": "37045414", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7808381304270658, "res": {"Yes": 0.7808381304270658, "No": 0.2191610130282221}, "ground_truth": 1}, {"key": "37045414", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8031465167286249, "res": {"Yes": 0.8031465167286249, "No": 0.19685255150599784}, "ground_truth": 0}, {"key": "37045414", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.7433610625596123, "res": {"Yes": 0.7433610625596123, "No": 0.25663719091194903}, "ground_truth": 0}, {"key": "33310095", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.03912736439995596, "res": {"No": 0.9608722848716872, "Yes": 0.03912736439995596}, "ground_truth": 0}, {"key": "33310095", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.0011103810880633619, "res": {"No": 0.998889454312792, "Yes": 0.0011103810880633619}, "ground_truth": 0}, {"key": "33310095", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.013882929320847772, "res": {"No": 0.9861170110066291, "Yes": 0.013882929320847772}, "ground_truth": 1}, {"key": "33310095", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.20117242556992687, "res": {"No": 0.798827477148022, "Yes": 0.20117242556992687}, "ground_truth": 0}, {"key": "33310095", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.040317115417620675, "res": {"No": 0.9596826568024158, "Yes": 0.040317115417620675}, "ground_truth": 0}, {"key": "37934604", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.0629715022605584, "res": {"No": 0.9370282385543677, "Yes": 0.0629715022605584}, "ground_truth": 0}, {"key": "37934604", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9963507866221665, "res": {"Yes": 0.9963507866221665, "No": 0.0036492376361925713}, "ground_truth": 0}, {"key": "37934604", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.986069361011704, "res": {"Yes": 0.986069361011704, "No": 0.013930536192601864}, "ground_truth": 1}, {"key": "37934604", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.994361890634306, "res": {"Yes": 0.994361890634306, "No": 0.005638079982871566}, "ground_truth": 0}, {"key": "37934604", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8234328210869045, "res": {"Yes": 0.8234328210869045, "No": 0.17656665944504876}, "ground_truth": 0}, {"key": "39012181", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8189464761097489, "res": {"Yes": 0.8189464761097489, "No": 0.1810532123570946}, "ground_truth": 0}, {"key": "39012181", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996294560683663, "res": {"Yes": 0.9996294560683663, "No": 0.0003704261345651632}, "ground_truth": 0}, {"key": "39012181", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9993855644928546, "res": {"Yes": 0.9993855644928546, "No": 0.0006143326507863787}, "ground_truth": 1}, {"key": "39012181", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9931357828051289, "res": {"Yes": 0.9931357828051289, "No": 0.0068639609906018615}, "ground_truth": 0}, {"key": "39012181", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9988578147906455, "res": {"Yes": 0.9988578147906455, "No": 0.001142101034637668}, "ground_truth": 0}, {"key": "40221674", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.00034112970037184805, "res": {"No": 0.9996587550872944, "Yes": 0.00034112970037184805}, "ground_truth": 0}, {"key": "40221674", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9434369519393704, "res": {"Yes": 0.9434369519393704, "No": 0.056562995349506706}, "ground_truth": 0}, {"key": "40221674", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9285117532032231, "res": {"Yes": 0.9285117532032231, "No": 0.07148806259030217}, "ground_truth": 1}, {"key": "40221674", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.92179360152793, "res": {"Yes": 0.92179360152793, "No": 0.07820619614335556}, "ground_truth": 0}, {"key": "40221674", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9815063770869241, "res": {"Yes": 0.9815063770869241, "No": 0.018493588029039394}, "ground_truth": 0}, {"key": "36884862", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.639165071569971, "res": {"Yes": 0.639165071569971, "No": 0.36083463514065633}, "ground_truth": 0}, {"key": "36884862", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.596832305713603, "res": {"Yes": 0.596832305713603, "No": 0.4031673672097417}, "ground_truth": 0}, {"key": "36884862", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8540507933257628, "res": {"Yes": 0.8540507933257628, "No": 0.14594874216191006}, "ground_truth": 1}, {"key": "36884862", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9388432374345886, "res": {"Yes": 0.9388432374345886, "No": 0.061156715669053695}, "ground_truth": 0}, {"key": "36884862", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8483423669994355, "res": {"Yes": 0.8483423669994355, "No": 0.15165734683908733}, "ground_truth": 0}, {"key": "39054429", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9057664297357061, "res": {"Yes": 0.9057664297357061, "No": 0.09423327707140462}, "ground_truth": 0}, {"key": "39054429", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.960883620348894, "res": {"Yes": 0.960883620348894, "No": 0.03911604257904513}, "ground_truth": 0}, {"key": "39054429", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8154669785627057, "res": {"Yes": 0.8154669785627057, "No": 0.18453251462474327}, "ground_truth": 1}, {"key": "39054429", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9682286838112727, "res": {"Yes": 0.9682286838112727, "No": 0.03177107481954247}, "ground_truth": 0}, {"key": "39054429", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9191196401067113, "res": {"Yes": 0.9191196401067113, "No": 0.08087995181736558}, "ground_truth": 0}, {"key": "36753964", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9297757973659139, "res": {"Yes": 0.9297757973659139, "No": 0.07022409452411385}, "ground_truth": 0}, {"key": "36753964", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996026565818134, "res": {"Yes": 0.9996026565818134, "No": 0.0003972304369311938}, "ground_truth": 0}, {"key": "36753964", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996164714658786, "res": {"Yes": 0.9996164714658786, "No": 0.00038346201128136164}, "ground_truth": 1}, {"key": "36753964", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9500117490908256, "res": {"Yes": 0.9500117490908256, "No": 0.04998803011071075}, "ground_truth": 0}, {"key": "36753964", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9949539524889737, "res": {"Yes": 0.9949539524889737, "No": 0.005045987025556145}, "ground_truth": 0}, {"key": "37612459", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9313297550023246, "res": {"Yes": 0.9313297550023246, "No": 0.06866928970519727}, "ground_truth": 0}, {"key": "37612459", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9982379565246138, "res": {"Yes": 0.9982379565246138, "No": 0.0017619454695307019}, "ground_truth": 0}, {"key": "37612459", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9964770605642722, "res": {"Yes": 0.9964770605642722, "No": 0.003522746633623168}, "ground_truth": 1}, {"key": "37612459", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995794250164638, "res": {"Yes": 0.9995794250164638, "No": 0.0004204623579157216}, "ground_truth": 0}, {"key": "37612459", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9308861844850074, "res": {"Yes": 0.9308861844850074, "No": 0.0691131888697043}, "ground_truth": 0}, {"key": "36805789", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.889145983533994, "res": {"Yes": 0.889145983533994, "No": 0.1108536739535441}, "ground_truth": 0}, {"key": "36805789", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.20675943733532834, "res": {"No": 0.7932402419100831, "Yes": 0.20675943733532834}, "ground_truth": 0}, {"key": "36805789", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9582808242566061, "res": {"Yes": 0.9582808242566061, "No": 0.041719092551872915}, "ground_truth": 1}, {"key": "36805789", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.946317004660303, "res": {"Yes": 0.946317004660303, "No": 0.05368285511751432}, "ground_truth": 0}, {"key": "36805789", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.4315981206442565, "res": {"No": 0.5684016385626862, "Yes": 0.4315981206442565}, "ground_truth": 0}, {"key": "12757394", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.26356222259412015, "res": {"No": 0.7364371888615389, "Yes": 0.26356222259412015}, "ground_truth": 0}, {"key": "12757394", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9972061610524351, "res": {"Yes": 0.9972061610524351, "No": 0.002793872367819002}, "ground_truth": 0}, {"key": "12757394", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998528464381476, "res": {"Yes": 0.9998528464381476, "No": 0.00014702175337367058}, "ground_truth": 1}, {"key": "12757394", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994203291740881, "res": {"Yes": 0.9994203291740881, "No": 0.0005796651202792686}, "ground_truth": 0}, {"key": "12757394", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9995113081487801, "res": {"Yes": 0.9995113081487801, "No": 0.0004886539439977445}, "ground_truth": 0}, {"key": "32192542", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8812455716026758, "res": {"Yes": 0.8812455716026758, "No": 0.11875423185840508}, "ground_truth": 0}, {"key": "32192542", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9973340868746812, "res": {"Yes": 0.9973340868746812, "No": 0.002665917507839778}, "ground_truth": 0}, {"key": "32192542", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9947390992660318, "res": {"Yes": 0.9947390992660318, "No": 0.005260871929003143}, "ground_truth": 1}, {"key": "32192542", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999336505512221, "res": {"Yes": 0.9999336505512221, "No": 6.624238785509107e-05}, "ground_truth": 0}, {"key": "32192542", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999883629027115, "res": {"Yes": 0.9999883629027115, "No": 1.1617488103538288e-05}, "ground_truth": 0}, {"key": "34856060", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9209568638816076, "res": {"Yes": 0.9209568638816076, "No": 0.07904297397882959}, "ground_truth": 0}, {"key": "34856060", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9666693333901702, "res": {"Yes": 0.9666693333901702, "No": 0.03333054950499048}, "ground_truth": 0}, {"key": "34856060", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9922431016231201, "res": {"Yes": 0.9922431016231201, "No": 0.0077567976804796945}, "ground_truth": 1}, {"key": "34856060", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995409574922662, "res": {"Yes": 0.9995409574922662, "No": 0.00045896160529373297}, "ground_truth": 0}, {"key": "34856060", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9873618270218961, "res": {"Yes": 0.9873618270218961, "No": 0.012638031982153443}, "ground_truth": 0}, {"key": "36083416", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9544835462955079, "res": {"Yes": 0.9544835462955079, "No": 0.04551619874547326}, "ground_truth": 0}, {"key": "36083416", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.7932963656425519, "res": {"Yes": 0.7932963656425519, "No": 0.20670308239423818}, "ground_truth": 0}, {"key": "36083416", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9786633410806196, "res": {"Yes": 0.9786633410806196, "No": 0.021336437223977125}, "ground_truth": 1}, {"key": "36083416", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9874582969848771, "res": {"Yes": 0.9874582969848771, "No": 0.012541576926110368}, "ground_truth": 0}, {"key": "36083416", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9843194719567239, "res": {"Yes": 0.9843194719567239, "No": 0.015680302063368314}, "ground_truth": 0}, {"key": "33839050", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9884236555294882, "res": {"Yes": 0.9884236555294882, "No": 0.011576258027898264}, "ground_truth": 0}, {"key": "33839050", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9447093584401519, "res": {"Yes": 0.9447093584401519, "No": 0.05529052075863202}, "ground_truth": 0}, {"key": "33839050", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9800560098916734, "res": {"Yes": 0.9800560098916734, "No": 0.019943948799048716}, "ground_truth": 1}, {"key": "33839050", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9689032473621348, "res": {"Yes": 0.9689032473621348, "No": 0.031096671227844496}, "ground_truth": 0}, {"key": "33839050", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8960315361774276, "res": {"Yes": 0.8960315361774276, "No": 0.10396839710606104}, "ground_truth": 0}, {"key": "18464690", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.07718766612001647, "res": {"No": 0.9228121096909021, "Yes": 0.07718766612001647}, "ground_truth": 0}, {"key": "18464690", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9090999907765664, "res": {"Yes": 0.9090999907765664, "No": 0.09089989799195944}, "ground_truth": 0}, {"key": "18464690", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8993225242053275, "res": {"Yes": 0.8993225242053275, "No": 0.10067743357013935}, "ground_truth": 1}, {"key": "18464690", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9965657338283961, "res": {"Yes": 0.9965657338283961, "No": 0.0034343005448356583}, "ground_truth": 0}, {"key": "18464690", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.7869830250375164, "res": {"Yes": 0.7869830250375164, "No": 0.21301691569874426}, "ground_truth": 0}, {"key": "39212665", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9824037513892506, "res": {"Yes": 0.9824037513892506, "No": 0.017596122741249786}, "ground_truth": 0}, {"key": "39212665", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9945472146980682, "res": {"Yes": 0.9945472146980682, "No": 0.00545271843091712}, "ground_truth": 0}, {"key": "39212665", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9768537705940546, "res": {"Yes": 0.9768537705940546, "No": 0.023146159697679906}, "ground_truth": 1}, {"key": "39212665", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9516966851431569, "res": {"Yes": 0.9516966851431569, "No": 0.04830327090577924}, "ground_truth": 0}, {"key": "39212665", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9864858936633111, "res": {"Yes": 0.9864858936633111, "No": 0.013513982348729536}, "ground_truth": 0}, {"key": "40094011", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.3359181331654618, "res": {"No": 0.6640816302090647, "Yes": 0.3359181331654618}, "ground_truth": 0}, {"key": "40094011", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.7111530301923555, "res": {"Yes": 0.7111530301923555, "No": 0.28884617471397167}, "ground_truth": 0}, {"key": "40094011", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8884924578997435, "res": {"Yes": 0.8884924578997435, "No": 0.11150734676316834}, "ground_truth": 1}, {"key": "40094011", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9091751709996376, "res": {"Yes": 0.9091751709996376, "No": 0.09082457320296296}, "ground_truth": 0}, {"key": "40094011", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8569945841065074, "res": {"Yes": 0.8569945841065074, "No": 0.14300525516383855}, "ground_truth": 0}, {"key": "36036272", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9781944055697619, "res": {"Yes": 0.9781944055697619, "No": 0.02180548577038481}, "ground_truth": 0}, {"key": "36036272", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9695956655748957, "res": {"Yes": 0.9695956655748957, "No": 0.030404054735712012}, "ground_truth": 0}, {"key": "36036272", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9871273574195905, "res": {"Yes": 0.9871273574195905, "No": 0.012872492611753219}, "ground_truth": 1}, {"key": "36036272", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9771636230376006, "res": {"Yes": 0.9771636230376006, "No": 0.022836320639789548}, "ground_truth": 0}, {"key": "36036272", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8606899619956329, "res": {"Yes": 0.8606899619956329, "No": 0.13930989039985142}, "ground_truth": 0}, {"key": "30681904", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.12197387457604877, "res": {"No": 0.8780260202202411, "Yes": 0.12197387457604877}, "ground_truth": 0}, {"key": "30681904", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9933185356086802, "res": {"Yes": 0.9933185356086802, "No": 0.006681398213088237}, "ground_truth": 0}, {"key": "30681904", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9868888291488636, "res": {"Yes": 0.9868888291488636, "No": 0.013111101039850269}, "ground_truth": 1}, {"key": "30681904", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9910466798276429, "res": {"Yes": 0.9910466798276429, "No": 0.008953282984898727}, "ground_truth": 0}, {"key": "30681904", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9980902027879105, "res": {"Yes": 0.9980902027879105, "No": 0.0019097976212466001}, "ground_truth": 0}, {"key": "27834240", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.046674677456063775, "res": {"No": 0.9533249762939485, "Yes": 0.046674677456063775}, "ground_truth": 0}, {"key": "27834240", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998380711318946, "res": {"Yes": 0.9998380711318946, "No": 0.00016189298944153217}, "ground_truth": 0}, {"key": "27834240", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9990469522987179, "res": {"Yes": 0.9990469522987179, "No": 0.0009529758458636015}, "ground_truth": 1}, {"key": "27834240", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998279405139315, "res": {"Yes": 0.9998279405139315, "No": 0.00017196484355138397}, "ground_truth": 0}, {"key": "27834240", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.999782301739706, "res": {"Yes": 0.999782301739706, "No": 0.0002176569191960728}, "ground_truth": 0}, {"key": "35025075", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8301210462247011, "res": {"Yes": 0.8301210462247011, "No": 0.1698787214092885}, "ground_truth": 0}, {"key": "35025075", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9879645336440432, "res": {"Yes": 0.9879645336440432, "No": 0.012035213114623258}, "ground_truth": 0}, {"key": "35025075", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9829384512197397, "res": {"Yes": 0.9829384512197397, "No": 0.017061565103619176}, "ground_truth": 1}, {"key": "35025075", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9959555678350511, "res": {"Yes": 0.9959555678350511, "No": 0.004044380064178423}, "ground_truth": 0}, {"key": "35025075", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9867416387492862, "res": {"Yes": 0.9867416387492862, "No": 0.013258204640969547}, "ground_truth": 0}, {"key": "33316985", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9968872706187942, "res": {"Yes": 0.9968872706187942, "No": 0.003112680583746386}, "ground_truth": 0}, {"key": "33316985", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9991426247563727, "res": {"Yes": 0.9991426247563727, "No": 0.000857382855429874}, "ground_truth": 0}, {"key": "33316985", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9993692510657952, "res": {"Yes": 0.9993692510657952, "No": 0.0006306660971878904}, "ground_truth": 1}, {"key": "33316985", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999770387506135, "res": {"Yes": 0.9999770387506135, "No": 2.288930012605511e-05}, "ground_truth": 0}, {"key": "33316985", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9938845151667544, "res": {"Yes": 0.9938845151667544, "No": 0.006115437298325335}, "ground_truth": 0}, {"key": "17037056", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.5608764665139216, "res": {"Yes": 0.5608764665139216, "No": 0.4391232998852487}, "ground_truth": 0}, {"key": "17037056", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9884350728770739, "res": {"Yes": 0.9884350728770739, "No": 0.011564826333153816}, "ground_truth": 0}, {"key": "17037056", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9937930223697662, "res": {"Yes": 0.9937930223697662, "No": 0.006206903119110789}, "ground_truth": 1}, {"key": "17037056", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9980019869907097, "res": {"Yes": 0.9980019869907097, "No": 0.0019980399579135914}, "ground_truth": 0}, {"key": "17037056", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9896542572017952, "res": {"Yes": 0.9896542572017952, "No": 0.010345593486755351}, "ground_truth": 0}, {"key": "34050457", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.45830786333461027, "res": {"No": 0.5416918942342606, "Yes": 0.45830786333461027}, "ground_truth": 0}, {"key": "34050457", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.6462916800483502, "res": {"Yes": 0.6462916800483502, "No": 0.3537079168745978}, "ground_truth": 0}, {"key": "34050457", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3163510070667902, "res": {"No": 0.6836486725082092, "Yes": 0.3163510070667902}, "ground_truth": 1}, {"key": "34050457", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7467605306575601, "res": {"Yes": 0.7467605306575601, "No": 0.25323934747890564}, "ground_truth": 0}, {"key": "34050457", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.7439323616816338, "res": {"Yes": 0.7439323616816338, "No": 0.25606742250960596}, "ground_truth": 0}, {"key": "34713745", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9183130898745249, "res": {"Yes": 0.9183130898745249, "No": 0.08168669028464993}, "ground_truth": 0}, {"key": "34713745", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9732065331843481, "res": {"Yes": 0.9732065331843481, "No": 0.026793172001223862}, "ground_truth": 0}, {"key": "34713745", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995247570437253, "res": {"Yes": 0.9995247570437253, "No": 0.0004752167225422374}, "ground_truth": 1}, {"key": "34713745", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9872802535702723, "res": {"Yes": 0.9872802535702723, "No": 0.012719640297098484}, "ground_truth": 0}, {"key": "34713745", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9931489541571622, "res": {"Yes": 0.9931489541571622, "No": 0.006850816066556517}, "ground_truth": 0}, {"key": "40856210", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.14571401202801648, "res": {"No": 0.8542854589335784, "Yes": 0.14571401202801648}, "ground_truth": 0}, {"key": "40856210", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9184021613805646, "res": {"Yes": 0.9184021613805646, "No": 0.08159712911868874}, "ground_truth": 0}, {"key": "40856210", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9950091827217994, "res": {"Yes": 0.9950091827217994, "No": 0.00499076686832188}, "ground_truth": 1}, {"key": "40856210", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9167174669814145, "res": {"Yes": 0.9167174669814145, "No": 0.08328228645675612}, "ground_truth": 0}, {"key": "40856210", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9969003024131289, "res": {"Yes": 0.9969003024131289, "No": 0.0030995560110197995}, "ground_truth": 0}, {"key": "40848302", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.04241931406759621, "res": {"No": 0.9575805175756548, "Yes": 0.04241931406759621}, "ground_truth": 0}, {"key": "40848302", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.7824080198457234, "res": {"Yes": 0.7824080198457234, "No": 0.21759198463410398}, "ground_truth": 0}, {"key": "40848302", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6765337523546492, "res": {"Yes": 0.6765337523546492, "No": 0.32346599062864057}, "ground_truth": 1}, {"key": "40848302", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9297336608815971, "res": {"Yes": 0.9297336608815971, "No": 0.07026619946396145}, "ground_truth": 0}, {"key": "40848302", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9499153534394709, "res": {"Yes": 0.9499153534394709, "No": 0.050084505008304574}, "ground_truth": 0}, {"key": "40636168", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.5272534585727037, "res": {"Yes": 0.5272534585727037, "No": 0.4727459681421408}, "ground_truth": 0}, {"key": "40636168", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9979732526317658, "res": {"Yes": 0.9979732526317658, "No": 0.002026737341154528}, "ground_truth": 0}, {"key": "40636168", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999598739650709, "res": {"Yes": 0.9999598739650709, "No": 4.000452440987274e-05}, "ground_truth": 1}, {"key": "40636168", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997601349322264, "res": {"Yes": 0.9997601349322264, "No": 0.00023983388982045222}, "ground_truth": 0}, {"key": "40636168", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9947619864865213, "res": {"Yes": 0.9947619864865213, "No": 0.005237883889999394}, "ground_truth": 0}, {"key": "34423311", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.3285647015083621, "res": {"No": 0.6714350711164184, "Yes": 0.3285647015083621}, "ground_truth": 0}, {"key": "34423311", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9937222688129453, "res": {"Yes": 0.9937222688129453, "No": 0.006277754036308981}, "ground_truth": 0}, {"key": "34423311", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997030736913445, "res": {"Yes": 0.9997030736913445, "No": 0.00029687630125803946}, "ground_truth": 1}, {"key": "34423311", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999586819786129, "res": {"Yes": 0.9999586819786129, "No": 4.1203308559121814e-05}, "ground_truth": 0}, {"key": "34423311", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9985417789390543, "res": {"Yes": 0.9985417789390543, "No": 0.0014582094500297936}, "ground_truth": 0}, {"key": "34833945", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.49453443220483606, "res": {"No": 0.5054649905460566, "Yes": 0.49453443220483606}, "ground_truth": 0}, {"key": "34833945", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.35852047772570006, "res": {"No": 0.6414791611637924, "Yes": 0.35852047772570006}, "ground_truth": 0}, {"key": "34833945", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.23315066581388894, "res": {"No": 0.7668490520860651, "Yes": 0.23315066581388894}, "ground_truth": 1}, {"key": "34833945", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7573076357901605, "res": {"Yes": 0.7573076357901605, "No": 0.24269171588015823}, "ground_truth": 0}, {"key": "34833945", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.46819668699355954, "res": {"No": 0.5318027654593076, "Yes": 0.46819668699355954}, "ground_truth": 0}, {"key": "21272328", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.27978064319645257, "res": {"No": 0.7202186734201332, "Yes": 0.27978064319645257}, "ground_truth": 0}, {"key": "21272328", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9993562771784935, "res": {"Yes": 0.9993562771784935, "No": 0.0006436365850955058}, "ground_truth": 0}, {"key": "21272328", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9963189517304293, "res": {"Yes": 0.9963189517304293, "No": 0.0036810398455139706}, "ground_truth": 1}, {"key": "21272328", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9990959746773342, "res": {"Yes": 0.9990959746773342, "No": 0.0009039538045855351}, "ground_truth": 0}, {"key": "21272328", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9927984443746979, "res": {"Yes": 0.9927984443746979, "No": 0.007201524062593405}, "ground_truth": 0}, {"key": "38648957", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.5060666798585997, "res": {"Yes": 0.5060666798585997, "No": 0.4939327325466265}, "ground_truth": 0}, {"key": "38648957", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9879964236779307, "res": {"Yes": 0.9879964236779307, "No": 0.01200332000951425}, "ground_truth": 0}, {"key": "38648957", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9992734158901646, "res": {"Yes": 0.9992734158901646, "No": 0.0007265262089267747}, "ground_truth": 1}, {"key": "38648957", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9879833881391124, "res": {"Yes": 0.9879833881391124, "No": 0.012016403551644055}, "ground_truth": 0}, {"key": "38648957", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9972789567674755, "res": {"Yes": 0.9972789567674755, "No": 0.0027208580196959337}, "ground_truth": 0}, {"key": "24942981", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.983662620566076, "res": {"Yes": 0.983662620566076, "No": 0.0163372418700587}, "ground_truth": 0}, {"key": "24942981", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9852280074033567, "res": {"Yes": 0.9852280074033567, "No": 0.014771854969095133}, "ground_truth": 0}, {"key": "24942981", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.981506491923177, "res": {"Yes": 0.981506491923177, "No": 0.018493447478304465}, "ground_truth": 1}, {"key": "24942981", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9707602612412598, "res": {"Yes": 0.9707602612412598, "No": 0.02923959193234784}, "ground_truth": 0}, {"key": "24942981", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.998709048295994, "res": {"Yes": 0.998709048295994, "No": 0.0012909059761729174}, "ground_truth": 0}, {"key": "35882366", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.635995430764735, "res": {"Yes": 0.635995430764735, "No": 0.3640044449568637}, "ground_truth": 0}, {"key": "35882366", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996556568497159, "res": {"Yes": 0.9996556568497159, "No": 0.0003442136561260046}, "ground_truth": 0}, {"key": "35882366", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9986490104092008, "res": {"Yes": 0.9986490104092008, "No": 0.0013509767900248798}, "ground_truth": 1}, {"key": "35882366", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9532520573487554, "res": {"Yes": 0.9532520573487554, "No": 0.04674754677618703}, "ground_truth": 0}, {"key": "35882366", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9907966465112319, "res": {"Yes": 0.9907966465112319, "No": 0.009203122963258674}, "ground_truth": 0}, {"key": "40559523", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.3170948622091604, "res": {"No": 0.6829049462729203, "Yes": 0.3170948622091604}, "ground_truth": 0}, {"key": "40559523", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.999788972049531, "res": {"Yes": 0.999788972049531, "No": 0.0002109439710073054}, "ground_truth": 0}, {"key": "40559523", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9977054771512556, "res": {"Yes": 0.9977054771512556, "No": 0.0022944823877824707}, "ground_truth": 1}, {"key": "40559523", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9989824772277291, "res": {"Yes": 0.9989824772277291, "No": 0.0010174955643685237}, "ground_truth": 0}, {"key": "40559523", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9971691876270065, "res": {"Yes": 0.9971691876270065, "No": 0.0028307901538228188}, "ground_truth": 0}, {"key": "24632722", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7003465815279138, "res": {"Yes": 0.7003465815279138, "No": 0.2996532218774765}, "ground_truth": 0}, {"key": "24632722", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9963086522368699, "res": {"Yes": 0.9963086522368699, "No": 0.00369133683733121}, "ground_truth": 0}, {"key": "24632722", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9756656655925543, "res": {"Yes": 0.9756656655925543, "No": 0.024334261095897883}, "ground_truth": 1}, {"key": "24632722", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9397992202608771, "res": {"Yes": 0.9397992202608771, "No": 0.06020071798965677}, "ground_truth": 0}, {"key": "24632722", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9968545723553885, "res": {"Yes": 0.9968545723553885, "No": 0.0031454616847537645}, "ground_truth": 0}, {"key": "36002759", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9275201496931275, "res": {"Yes": 0.9275201496931275, "No": 0.07247958086730437}, "ground_truth": 0}, {"key": "36002759", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9516636551794944, "res": {"Yes": 0.9516636551794944, "No": 0.04833623947346614}, "ground_truth": 0}, {"key": "36002759", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9695805429092342, "res": {"Yes": 0.9695805429092342, "No": 0.030419360947945704}, "ground_truth": 1}, {"key": "36002759", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9869168157391005, "res": {"Yes": 0.9869168157391005, "No": 0.01308294245394137}, "ground_truth": 0}, {"key": "36002759", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9982877319065417, "res": {"Yes": 0.9982877319065417, "No": 0.001712205454488701}, "ground_truth": 0}, {"key": "29508534", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.92539377440099, "res": {"Yes": 0.92539377440099, "No": 0.07460605565999368}, "ground_truth": 0}, {"key": "29508534", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9168365013048685, "res": {"Yes": 0.9168365013048685, "No": 0.08316325294954251}, "ground_truth": 0}, {"key": "29508534", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9954747618841618, "res": {"Yes": 0.9954747618841618, "No": 0.0045252837346771175}, "ground_truth": 1}, {"key": "29508534", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9294628410250624, "res": {"Yes": 0.9294628410250624, "No": 0.07053707647203468}, "ground_truth": 0}, {"key": "29508534", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9527145385695915, "res": {"Yes": 0.9527145385695915, "No": 0.04728544251943643}, "ground_truth": 0}, {"key": "15631612", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9981620578880024, "res": {"Yes": 0.9981620578880024, "No": 0.0018378951543698882}, "ground_truth": 0}, {"key": "15631612", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998853774396677, "res": {"Yes": 0.9998853774396677, "No": 0.00011450832845694398}, "ground_truth": 0}, {"key": "15631612", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999961110815618, "res": {"Yes": 0.9999961110815618, "No": 3.807118325545622e-06}, "ground_truth": 1}, {"key": "15631612", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998930056303059, "res": {"Yes": 0.9998930056303059, "No": 0.00010688586233381203}, "ground_truth": 0}, {"key": "15631612", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996908031112136, "res": {"Yes": 0.9996908031112136, "No": 0.0003091604995181313}, "ground_truth": 0}, {"key": "40731892", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9906605550771979, "res": {"Yes": 0.9906605550771979, "No": 0.009339318454632137}, "ground_truth": 0}, {"key": "40731892", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999707211216348, "res": {"Yes": 0.9999707211216348, "No": 2.918379408053401e-05}, "ground_truth": 0}, {"key": "40731892", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9974969172490868, "res": {"Yes": 0.9974969172490868, "No": 0.00250311580780984}, "ground_truth": 1}, {"key": "40731892", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9861928270898456, "res": {"Yes": 0.9861928270898456, "No": 0.013807049277935359}, "ground_truth": 0}, {"key": "40731892", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9994422392064148, "res": {"Yes": 0.9994422392064148, "No": 0.0005577408109501444}, "ground_truth": 0}, {"key": "35971910", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8854766016439581, "res": {"Yes": 0.8854766016439581, "No": 0.11452313774808953}, "ground_truth": 0}, {"key": "35971910", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.902442159523082, "res": {"Yes": 0.902442159523082, "No": 0.09755749328989807}, "ground_truth": 0}, {"key": "35971910", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9972430354264, "res": {"Yes": 0.9972430354264, "No": 0.0027569618073794385}, "ground_truth": 1}, {"key": "35971910", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.994524932813512, "res": {"Yes": 0.994524932813512, "No": 0.005475043818131003}, "ground_truth": 0}, {"key": "35971910", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9873613639493078, "res": {"Yes": 0.9873613639493078, "No": 0.012638557735219493}, "ground_truth": 0}, {"key": "34428424", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.00644002858635968, "res": {"No": 0.9935596078044255, "Yes": 0.00644002858635968}, "ground_truth": 0}, {"key": "34428424", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9229742620232401, "res": {"Yes": 0.9229742620232401, "No": 0.07702532616300728}, "ground_truth": 0}, {"key": "34428424", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9745308239561201, "res": {"Yes": 0.9745308239561201, "No": 0.02546893002753988}, "ground_truth": 1}, {"key": "34428424", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8423798821802078, "res": {"Yes": 0.8423798821802078, "No": 0.15761993790665882}, "ground_truth": 0}, {"key": "34428424", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.03687849828780813, "res": {"No": 0.9631210800003857, "Yes": 0.03687849828780813}, "ground_truth": 0}, {"key": "36971005", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8886268489928324, "res": {"Yes": 0.8886268489928324, "No": 0.11137288313678334}, "ground_truth": 0}, {"key": "36971005", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9631216366845309, "res": {"Yes": 0.9631216366845309, "No": 0.03687824382704784}, "ground_truth": 0}, {"key": "36971005", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9966691031862744, "res": {"Yes": 0.9966691031862744, "No": 0.003330952432895486}, "ground_truth": 1}, {"key": "36971005", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9799055661813318, "res": {"Yes": 0.9799055661813318, "No": 0.020094376081658188}, "ground_truth": 0}, {"key": "36971005", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.5713253738541713, "res": {"Yes": 0.5713253738541713, "No": 0.42867408388701905}, "ground_truth": 0}, {"key": "34649067", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9974943123890383, "res": {"Yes": 0.9974943123890383, "No": 0.0025055550303151692}, "ground_truth": 0}, {"key": "34649067", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9955372597515005, "res": {"Yes": 0.9955372597515005, "No": 0.004462668827941862}, "ground_truth": 0}, {"key": "34649067", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9923902960272855, "res": {"Yes": 0.9923902960272855, "No": 0.007609521400146431}, "ground_truth": 1}, {"key": "34649067", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9969333551905848, "res": {"Yes": 0.9969333551905848, "No": 0.003066587190721977}, "ground_truth": 0}, {"key": "34649067", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997746743996271, "res": {"Yes": 0.9997746743996271, "No": 0.00022522064316063525}, "ground_truth": 0}, {"key": "37355154", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.013158711081721971, "res": {"No": 0.9868409987670881, "Yes": 0.013158711081721971}, "ground_truth": 0}, {"key": "37355154", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9972788375926472, "res": {"Yes": 0.9972788375926472, "No": 0.002721117874044661}, "ground_truth": 0}, {"key": "37355154", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.997697412032088, "res": {"Yes": 0.997697412032088, "No": 0.002302582407569322}, "ground_truth": 1}, {"key": "37355154", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9987285506774785, "res": {"Yes": 0.9987285506774785, "No": 0.001271436010959518}, "ground_truth": 0}, {"key": "37355154", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9982673007595823, "res": {"Yes": 0.9982673007595823, "No": 0.0017326694933949953}, "ground_truth": 0}, {"key": "38674697", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.3820215531914785, "res": {"No": 0.6179781982866757, "Yes": 0.3820215531914785}, "ground_truth": 0}, {"key": "38674697", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.996592137689008, "res": {"Yes": 0.996592137689008, "No": 0.0034078601726799276}, "ground_truth": 0}, {"key": "38674697", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9916440595726206, "res": {"Yes": 0.9916440595726206, "No": 0.008355698648699112}, "ground_truth": 1}, {"key": "38674697", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.990800969366431, "res": {"Yes": 0.990800969366431, "No": 0.009198915337274583}, "ground_truth": 0}, {"key": "38674697", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9872978006581506, "res": {"Yes": 0.9872978006581506, "No": 0.012701926552184753}, "ground_truth": 0}, {"key": "40525767", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.1679326454347586, "res": {"No": 0.8320665361702446, "Yes": 0.1679326454347586}, "ground_truth": 0}, {"key": "40525767", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9062438491328837, "res": {"Yes": 0.9062438491328837, "No": 0.09375536397828528}, "ground_truth": 0}, {"key": "40525767", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9878646001581991, "res": {"Yes": 0.9878646001581991, "No": 0.012135138822810403}, "ground_truth": 1}, {"key": "40525767", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.994832768309488, "res": {"Yes": 0.994832768309488, "No": 0.0051670466565521675}, "ground_truth": 0}, {"key": "40525767", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.99595651399329, "res": {"Yes": 0.99595651399329, "No": 0.004043365052171648}, "ground_truth": 0}, {"key": "27165110", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.991525203280917, "res": {"Yes": 0.991525203280917, "No": 0.008474551054831289}, "ground_truth": 0}, {"key": "27165110", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.7856979483831223, "res": {"Yes": 0.7856979483831223, "No": 0.21430190224295104}, "ground_truth": 0}, {"key": "27165110", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9989814054200044, "res": {"Yes": 0.9989814054200044, "No": 0.001018496559815726}, "ground_truth": 1}, {"key": "27165110", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9891317004076501, "res": {"Yes": 0.9891317004076501, "No": 0.010868245482831612}, "ground_truth": 0}, {"key": "27165110", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9957317695978819, "res": {"Yes": 0.9957317695978819, "No": 0.004268249026040654}, "ground_truth": 0}, {"key": "35497491", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9961076564833615, "res": {"Yes": 0.9961076564833615, "No": 0.003892336729616222}, "ground_truth": 0}, {"key": "35497491", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9936509390074875, "res": {"Yes": 0.9936509390074875, "No": 0.006348997833682358}, "ground_truth": 0}, {"key": "35497491", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9880257508110313, "res": {"Yes": 0.9880257508110313, "No": 0.011974091559289315}, "ground_truth": 1}, {"key": "35497491", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999251405137049, "res": {"Yes": 0.999251405137049, "No": 0.0007485204748273205}, "ground_truth": 0}, {"key": "35497491", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997452491745036, "res": {"Yes": 0.9997452491745036, "No": 0.0002547204216404678}, "ground_truth": 0}, {"key": "40690716", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9675436817517248, "res": {"Yes": 0.9675436817517248, "No": 0.03245614452942108}, "ground_truth": 0}, {"key": "40690716", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.509013011443993, "res": {"Yes": 0.509013011443993, "No": 0.49098671921273523}, "ground_truth": 0}, {"key": "40690716", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8938557681408523, "res": {"Yes": 0.8938557681408523, "No": 0.10614390619183126}, "ground_truth": 1}, {"key": "40690716", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.964135951295804, "res": {"Yes": 0.964135951295804, "No": 0.03586380174898236}, "ground_truth": 0}, {"key": "40690716", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9454245245830754, "res": {"Yes": 0.9454245245830754, "No": 0.05457478587792506}, "ground_truth": 0}, {"key": "34835193", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.824088551033597, "res": {"Yes": 0.824088551033597, "No": 0.1759109461118437}, "ground_truth": 0}, {"key": "34835193", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.997845164199259, "res": {"Yes": 0.997845164199259, "No": 0.0021548223392676303}, "ground_truth": 0}, {"key": "34835193", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994288995399836, "res": {"Yes": 0.9994288995399836, "No": 0.0005710904240430768}, "ground_truth": 1}, {"key": "34835193", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995398851253298, "res": {"Yes": 0.9995398851253298, "No": 0.0004600497141875414}, "ground_truth": 0}, {"key": "34835193", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9987769511346554, "res": {"Yes": 0.9987769511346554, "No": 0.0012229788695274394}, "ground_truth": 0}, {"key": "39471712", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 5.56723015431839e-07, "res": {"No": 0.9999992103693117, "Yes": 5.56723015431839e-07}, "ground_truth": 0}, {"key": "39471712", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.981820560440747, "res": {"Yes": 0.981820560440747, "No": 0.018179448976783117}, "ground_truth": 0}, {"key": "39471712", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9972679337046307, "res": {"Yes": 0.9972679337046307, "No": 0.0027320610261130328}, "ground_truth": 1}, {"key": "39471712", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999782301739706, "res": {"Yes": 0.999782301739706, "No": 0.00021761785328534752}, "ground_truth": 0}, {"key": "39471712", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9985896905224237, "res": {"Yes": 0.9985896905224237, "No": 0.001410326134886708}, "ground_truth": 0}, {"key": "39115192", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.15914896954751567, "res": {"No": 0.840850894676125, "Yes": 0.15914896954751567}, "ground_truth": 0}, {"key": "39115192", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9730732120488683, "res": {"Yes": 0.9730732120488683, "No": 0.026926687064027757}, "ground_truth": 0}, {"key": "39115192", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9562589589965482, "res": {"Yes": 0.9562589589965482, "No": 0.043740973826544426}, "ground_truth": 1}, {"key": "39115192", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7586508656284032, "res": {"Yes": 0.7586508656284032, "No": 0.24134889386546934}, "ground_truth": 0}, {"key": "39115192", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9792986997405649, "res": {"Yes": 0.9792986997405649, "No": 0.020701227105684103}, "ground_truth": 0}, {"key": "23520673", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9636524870268581, "res": {"Yes": 0.9636524870268581, "No": 0.0363474401414849}, "ground_truth": 0}, {"key": "23520673", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9685138575279687, "res": {"Yes": 0.9685138575279687, "No": 0.031485996108006335}, "ground_truth": 0}, {"key": "23520673", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9888174735147783, "res": {"Yes": 0.9888174735147783, "No": 0.011182379483014362}, "ground_truth": 1}, {"key": "23520673", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9859712017067555, "res": {"Yes": 0.9859712017067555, "No": 0.014028704878475351}, "ground_truth": 0}, {"key": "23520673", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9967919112489708, "res": {"Yes": 0.9967919112489708, "No": 0.0032081183420237477}, "ground_truth": 0}, {"key": "35764233", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9865448182255041, "res": {"Yes": 0.9865448182255041, "No": 0.013455064514232638}, "ground_truth": 0}, {"key": "35764233", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9527466741729413, "res": {"Yes": 0.9527466741729413, "No": 0.04725314813812395}, "ground_truth": 0}, {"key": "35764233", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9959219827876732, "res": {"Yes": 0.9959219827876732, "No": 0.0040780310207702095}, "ground_truth": 1}, {"key": "35764233", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9865379785339892, "res": {"Yes": 0.9865379785339892, "No": 0.013461897384863955}, "ground_truth": 0}, {"key": "35764233", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9930208015192794, "res": {"Yes": 0.9930208015192794, "No": 0.006979199874074048}, "ground_truth": 0}, {"key": "35228910", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9706000990119023, "res": {"Yes": 0.9706000990119023, "No": 0.0293996149988634}, "ground_truth": 0}, {"key": "35228910", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9251861763429864, "res": {"Yes": 0.9251861763429864, "No": 0.0748135844647189}, "ground_truth": 0}, {"key": "35228910", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9988843379142214, "res": {"Yes": 0.9988843379142214, "No": 0.001115601787427957}, "ground_truth": 1}, {"key": "35228910", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9920828004350947, "res": {"Yes": 0.9920828004350947, "No": 0.007917171268673424}, "ground_truth": 0}, {"key": "35228910", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9930932189107431, "res": {"Yes": 0.9930932189107431, "No": 0.0069067671611863545}, "ground_truth": 0}, {"key": "36795599", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.02910314601986335, "res": {"No": 0.9708965422487593, "Yes": 0.02910314601986335}, "ground_truth": 0}, {"key": "36795599", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.34862776566511705, "res": {"No": 0.6513721116486068, "Yes": 0.34862776566511705}, "ground_truth": 0}, {"key": "36795599", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.35294967357104545, "res": {"No": 0.6470501214606514, "Yes": 0.35294967357104545}, "ground_truth": 1}, {"key": "36795599", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6284091072162551, "res": {"Yes": 0.6284091072162551, "No": 0.37159077404550706}, "ground_truth": 0}, {"key": "36795599", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.7934236919933036, "res": {"Yes": 0.7934236919933036, "No": 0.2065762469720378}, "ground_truth": 0}, {"key": "38641949", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.20020130372686856, "res": {"No": 0.7997985304243714, "Yes": 0.20020130372686856}, "ground_truth": 0}, {"key": "38641949", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998236499315017, "res": {"Yes": 0.9998236499315017, "No": 0.00017622076541775955}, "ground_truth": 0}, {"key": "38641949", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9619373472847041, "res": {"Yes": 0.9619373472847041, "No": 0.038062307157289824}, "ground_truth": 1}, {"key": "38641949", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.998981524498595, "res": {"Yes": 0.998981524498595, "No": 0.0010184500155864858}, "ground_truth": 0}, {"key": "38641949", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9974306199220148, "res": {"Yes": 0.9974306199220148, "No": 0.002569391958732949}, "ground_truth": 0}, {"key": "29968443", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8744250712609817, "res": {"Yes": 0.8744250712609817, "No": 0.12557464705262356}, "ground_truth": 0}, {"key": "29968443", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9961065912464033, "res": {"Yes": 0.9961065912464033, "No": 0.003893446593021347}, "ground_truth": 0}, {"key": "29968443", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9506380169673764, "res": {"Yes": 0.9506380169673764, "No": 0.049361900366225064}, "ground_truth": 1}, {"key": "29968443", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6874224471154442, "res": {"Yes": 0.6874224471154442, "No": 0.312577231356195}, "ground_truth": 0}, {"key": "29968443", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9269894307921462, "res": {"Yes": 0.9269894307921462, "No": 0.07301042790659551}, "ground_truth": 0}, {"key": "21268042", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8883079638286164, "res": {"Yes": 0.8883079638286164, "No": 0.11169192135586582}, "ground_truth": 0}, {"key": "21268042", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.7858167548944638, "res": {"Yes": 0.7858167548944638, "No": 0.214183083359489}, "ground_truth": 0}, {"key": "21268042", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.973179436572072, "res": {"Yes": 0.973179436572072, "No": 0.02682044524596495}, "ground_truth": 1}, {"key": "21268042", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9912920219934824, "res": {"Yes": 0.9912920219934824, "No": 0.008707859241628096}, "ground_truth": 0}, {"key": "21268042", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.981517864704787, "res": {"Yes": 0.981517864704787, "No": 0.01848209783502699}, "ground_truth": 0}, {"key": "26808572", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9297008883476704, "res": {"Yes": 0.9297008883476704, "No": 0.07029883866522847}, "ground_truth": 0}, {"key": "26808572", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9962168354669463, "res": {"Yes": 0.9962168354669463, "No": 0.003783185119321316}, "ground_truth": 0}, {"key": "26808572", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.961864548697193, "res": {"Yes": 0.961864548697193, "No": 0.038135289192756466}, "ground_truth": 1}, {"key": "26808572", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9887932289963721, "res": {"Yes": 0.9887932289963721, "No": 0.011206621163991686}, "ground_truth": 0}, {"key": "26808572", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9939026475624736, "res": {"Yes": 0.9939026475624736, "No": 0.006097284932005903}, "ground_truth": 0}, {"key": "37829390", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.1422751528735665, "res": {"No": 0.8577246086115083, "Yes": 0.1422751528735665}, "ground_truth": 0}, {"key": "37829390", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9981586110407354, "res": {"Yes": 0.9981586110407354, "No": 0.0018413910270831885}, "ground_truth": 0}, {"key": "37829390", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9913032682652663, "res": {"Yes": 0.9913032682652663, "No": 0.008696670741069954}, "ground_truth": 1}, {"key": "37829390", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.963197501874285, "res": {"Yes": 0.963197501874285, "No": 0.036802444845316154}, "ground_truth": 0}, {"key": "37829390", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.3728503299695375, "res": {"No": 0.6271496001991854, "Yes": 0.3728503299695375}, "ground_truth": 0}, {"key": "35716045", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8544713204077344, "res": {"Yes": 0.8544713204077344, "No": 0.1455284905789488}, "ground_truth": 0}, {"key": "35716045", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8965699529815132, "res": {"Yes": 0.8965699529815132, "No": 0.1034297955803665}, "ground_truth": 0}, {"key": "35716045", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9732739417496487, "res": {"Yes": 0.9732739417496487, "No": 0.026725821059960495}, "ground_truth": 1}, {"key": "35716045", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9775615112426383, "res": {"Yes": 0.9775615112426383, "No": 0.022438467362813717}, "ground_truth": 0}, {"key": "35716045", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8992036596100123, "res": {"Yes": 0.8992036596100123, "No": 0.10079629298132907}, "ground_truth": 0}, {"key": "34367070", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.3252290746051527, "res": {"No": 0.6747707064276528, "Yes": 0.3252290746051527}, "ground_truth": 0}, {"key": "34367070", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.11314401530430408, "res": {"No": 0.8868558091796669, "Yes": 0.11314401530430408}, "ground_truth": 0}, {"key": "34367070", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.16665947835969555, "res": {"No": 0.8333402973573939, "Yes": 0.16665947835969555}, "ground_truth": 1}, {"key": "34367070", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8609354485612605, "res": {"Yes": 0.8609354485612605, "No": 0.1390644617158997}, "ground_truth": 0}, {"key": "34367070", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.20000770263514273, "res": {"No": 0.7999918970924038, "Yes": 0.20000770263514273}, "ground_truth": 0}, {"key": "35239748", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9982025630281547, "res": {"Yes": 0.9982025630281547, "No": 0.001797380413371599}, "ground_truth": 0}, {"key": "35239748", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994497412079287, "res": {"Yes": 0.9994497412079287, "No": 0.0005501463083124273}, "ground_truth": 0}, {"key": "35239748", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9974345360418024, "res": {"Yes": 0.9974345360418024, "No": 0.002565395810957972}, "ground_truth": 1}, {"key": "35239748", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9760743589718547, "res": {"Yes": 0.9760743589718547, "No": 0.023925560847885195}, "ground_truth": 0}, {"key": "35239748", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8996830593114822, "res": {"Yes": 0.8996830593114822, "No": 0.1003167097683805}, "ground_truth": 0}, {"key": "40421370", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9839669833489301, "res": {"Yes": 0.9839669833489301, "No": 0.016033002898372842}, "ground_truth": 0}, {"key": "40421370", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.7905043417008515, "res": {"Yes": 0.7905043417008515, "No": 0.2094955459977255}, "ground_truth": 0}, {"key": "40421370", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9727147327295999, "res": {"Yes": 0.9727147327295999, "No": 0.027285114006352626}, "ground_truth": 1}, {"key": "40421370", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.990237216430445, "res": {"Yes": 0.990237216430445, "No": 0.009762683183475552}, "ground_truth": 0}, {"key": "40421370", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9991704655840672, "res": {"Yes": 0.9991704655840672, "No": 0.0008295227687053462}, "ground_truth": 0}, {"key": "37288396", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.37172427342659764, "res": {"No": 0.6282752766133051, "Yes": 0.37172427342659764}, "ground_truth": 0}, {"key": "37288396", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9756666832123743, "res": {"Yes": 0.9756666832123743, "No": 0.0243330809128544}, "ground_truth": 0}, {"key": "37288396", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9981210822800872, "res": {"Yes": 0.9981210822800872, "No": 0.0018788638874471246}, "ground_truth": 1}, {"key": "37288396", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9891958536135493, "res": {"Yes": 0.9891958536135493, "No": 0.01080400910833514}, "ground_truth": 0}, {"key": "37288396", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8792632571044013, "res": {"Yes": 0.8792632571044013, "No": 0.12073652341587039}, "ground_truth": 0}, {"key": "38903688", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7823623989641758, "res": {"Yes": 0.7823623989641758, "No": 0.21763740082031127}, "ground_truth": 0}, {"key": "38903688", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9317846110768363, "res": {"Yes": 0.9317846110768363, "No": 0.06821525901061609}, "ground_truth": 0}, {"key": "38903688", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8989822130209172, "res": {"Yes": 0.8989822130209172, "No": 0.10101780404527211}, "ground_truth": 1}, {"key": "38903688", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.38936413247752333, "res": {"No": 0.6106356714887686, "Yes": 0.38936413247752333}, "ground_truth": 0}, {"key": "38903688", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.31834680776746144, "res": {"No": 0.6816529306582263, "Yes": 0.31834680776746144}, "ground_truth": 0}, {"key": "28071228", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.05739434498127285, "res": {"No": 0.9426055203460016, "Yes": 0.05739434498127285}, "ground_truth": 0}, {"key": "28071228", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.969208631772179, "res": {"Yes": 0.969208631772179, "No": 0.030791256782486866}, "ground_truth": 0}, {"key": "28071228", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9613424883901776, "res": {"Yes": 0.9613424883901776, "No": 0.038657100809820925}, "ground_truth": 1}, {"key": "28071228", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9917421895906041, "res": {"Yes": 0.9917421895906041, "No": 0.008257751037590551}, "ground_truth": 0}, {"key": "28071228", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9802289403486801, "res": {"Yes": 0.9802289403486801, "No": 0.019771030870253798}, "ground_truth": 0}, {"key": "36855834", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9990518313362517, "res": {"Yes": 0.9990518313362517, "No": 0.0009481543049690378}, "ground_truth": 0}, {"key": "36855834", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9984156866515381, "res": {"Yes": 0.9984156866515381, "No": 0.0015842835385483063}, "ground_truth": 0}, {"key": "36855834", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9525861117794493, "res": {"Yes": 0.9525861117794493, "No": 0.047413812015293995}, "ground_truth": 1}, {"key": "36855834", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9984626327615993, "res": {"Yes": 0.9984626327615993, "No": 0.0015373589535810563}, "ground_truth": 0}, {"key": "36855834", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9829294652374936, "res": {"Yes": 0.9829294652374936, "No": 0.01707031993193879}, "ground_truth": 0}, {"key": "40548717", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9422310608097683, "res": {"Yes": 0.9422310608097683, "No": 0.05776859442614584}, "ground_truth": 0}, {"key": "40548717", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9915093826309909, "res": {"Yes": 0.9915093826309909, "No": 0.008490596686780828}, "ground_truth": 0}, {"key": "40548717", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9959009317092464, "res": {"Yes": 0.9959009317092464, "No": 0.004099000973348396}, "ground_truth": 1}, {"key": "40548717", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9976965795537147, "res": {"Yes": 0.9976965795537147, "No": 0.002303353901893944}, "ground_truth": 0}, {"key": "40548717", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.983893473921583, "res": {"Yes": 0.983893473921583, "No": 0.016106338319888374}, "ground_truth": 0}, {"key": "37051175", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9290981207437286, "res": {"Yes": 0.9290981207437286, "No": 0.07090183955584747}, "ground_truth": 0}, {"key": "37051175", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9760700437566524, "res": {"Yes": 0.9760700437566524, "No": 0.023929834334663634}, "ground_truth": 0}, {"key": "37051175", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9971101474858776, "res": {"Yes": 0.9971101474858776, "No": 0.0028898450332739693}, "ground_truth": 1}, {"key": "37051175", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9989551143735482, "res": {"Yes": 0.9989551143735482, "No": 0.0010448159849793942}, "ground_truth": 0}, {"key": "37051175", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9870669618798961, "res": {"Yes": 0.9870669618798961, "No": 0.012932936557958205}, "ground_truth": 0}, {"key": "38882119", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9911287587392439, "res": {"Yes": 0.9911287587392439, "No": 0.008871171650575618}, "ground_truth": 0}, {"key": "38882119", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9439535363652912, "res": {"Yes": 0.9439535363652912, "No": 0.056046336101212235}, "ground_truth": 0}, {"key": "38882119", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9753169537913182, "res": {"Yes": 0.9753169537913182, "No": 0.02468292178997862}, "ground_truth": 1}, {"key": "38882119", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9407121003059034, "res": {"Yes": 0.9407121003059034, "No": 0.059287629708055976}, "ground_truth": 0}, {"key": "38882119", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9865397168154388, "res": {"Yes": 0.9865397168154388, "No": 0.013460127261745988}, "ground_truth": 0}, {"key": "19485402", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9760757254769138, "res": {"Yes": 0.9760757254769138, "No": 0.023924218661570303}, "ground_truth": 0}, {"key": "19485402", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9866492505080914, "res": {"Yes": 0.9866492505080914, "No": 0.01335065008254145}, "ground_truth": 0}, {"key": "19485402", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9975524348839289, "res": {"Yes": 0.9975524348839289, "No": 0.002447591008454141}, "ground_truth": 1}, {"key": "19485402", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9938118635721412, "res": {"Yes": 0.9938118635721412, "No": 0.006188107339657882}, "ground_truth": 0}, {"key": "19485402", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9840459360911898, "res": {"Yes": 0.9840459360911898, "No": 0.015954052608293242}, "ground_truth": 0}, {"key": "36060907", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9676251513919676, "res": {"Yes": 0.9676251513919676, "No": 0.032374538949691764}, "ground_truth": 0}, {"key": "36060907", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9974903957353101, "res": {"Yes": 0.9974903957353101, "No": 0.0025095621080560215}, "ground_truth": 0}, {"key": "36060907", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9853203660202773, "res": {"Yes": 0.9853203660202773, "No": 0.014679420970501624}, "ground_truth": 1}, {"key": "36060907", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9993079384451427, "res": {"Yes": 0.9993079384451427, "No": 0.0006920566787448797}, "ground_truth": 0}, {"key": "36060907", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9980836702091024, "res": {"Yes": 0.9980836702091024, "No": 0.0019163238302773212}, "ground_truth": 0}, {"key": "24037309", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8770230712003909, "res": {"Yes": 0.8770230712003909, "No": 0.1229762047059835}, "ground_truth": 0}, {"key": "24037309", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994235421557037, "res": {"Yes": 0.9994235421557037, "No": 0.000576322187701434}, "ground_truth": 0}, {"key": "24037309", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995501246942554, "res": {"Yes": 0.9995501246942554, "No": 0.0004496527229104703}, "ground_truth": 1}, {"key": "24037309", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999708403221517, "res": {"Yes": 0.9999708403221517, "No": 2.912606733659039e-05}, "ground_truth": 0}, {"key": "24037309", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9980513644602839, "res": {"Yes": 0.9980513644602839, "No": 0.0019484813591511333}, "ground_truth": 0}, {"key": "35605805", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9890845783067919, "res": {"Yes": 0.9890845783067919, "No": 0.01091529786328338}, "ground_truth": 0}, {"key": "35605805", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9796675446750192, "res": {"Yes": 0.9796675446750192, "No": 0.020332432657312906}, "ground_truth": 0}, {"key": "35605805", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8482838673268992, "res": {"Yes": 0.8482838673268992, "No": 0.15171574615876804}, "ground_truth": 1}, {"key": "35605805", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8822328196072893, "res": {"Yes": 0.8822328196072893, "No": 0.11776697442494964}, "ground_truth": 0}, {"key": "35605805", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9706069553552182, "res": {"Yes": 0.9706069553552182, "No": 0.029392865622117764}, "ground_truth": 0}, {"key": "17706248", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.911926202949828, "res": {"Yes": 0.911926202949828, "No": 0.08807352517488035}, "ground_truth": 0}, {"key": "17706248", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9978712779493313, "res": {"Yes": 0.9978712779493313, "No": 0.002128709609453814}, "ground_truth": 0}, {"key": "17706248", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999840716318578, "res": {"Yes": 0.9999840716318578, "No": 1.5879445831736812e-05}, "ground_truth": 1}, {"key": "17706248", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9993453212257274, "res": {"Yes": 0.9993453212257274, "No": 0.000654613744058587}, "ground_truth": 0}, {"key": "17706248", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9923656428775806, "res": {"Yes": 0.9923656428775806, "No": 0.007634338381946842}, "ground_truth": 0}, {"key": "36883559", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9846422891414487, "res": {"Yes": 0.9846422891414487, "No": 0.015357669316237334}, "ground_truth": 0}, {"key": "36883559", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9916033801927656, "res": {"Yes": 0.9916033801927656, "No": 0.008396606020161512}, "ground_truth": 0}, {"key": "36883559", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9415173481304884, "res": {"Yes": 0.9415173481304884, "No": 0.058482641223498166}, "ground_truth": 1}, {"key": "36883559", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9793054471318509, "res": {"Yes": 0.9793054471318509, "No": 0.020694581008748436}, "ground_truth": 0}, {"key": "36883559", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.4500599872959482, "res": {"No": 0.5499398337067247, "Yes": 0.4500599872959482}, "ground_truth": 0}, {"key": "32799471", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8388839261000586, "res": {"Yes": 0.8388839261000586, "No": 0.16111594788487044}, "ground_truth": 0}, {"key": "32799471", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9370958569517043, "res": {"Yes": 0.9370958569517043, "No": 0.06290400152812199}, "ground_truth": 0}, {"key": "32799471", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9933130110843436, "res": {"Yes": 0.9933130110843436, "No": 0.006686943400871677}, "ground_truth": 1}, {"key": "32799471", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9719319055444152, "res": {"Yes": 0.9719319055444152, "No": 0.028067807498867038}, "ground_truth": 0}, {"key": "32799471", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8477614429867122, "res": {"Yes": 0.8477614429867122, "No": 0.15223839478902623}, "ground_truth": 0}, {"key": "34797243", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8313235248302636, "res": {"Yes": 0.8313235248302636, "No": 0.1686759939150775}, "ground_truth": 0}, {"key": "34797243", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9978496732721748, "res": {"Yes": 0.9978496732721748, "No": 0.0021503492675777093}, "ground_truth": 0}, {"key": "34797243", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9965971016271491, "res": {"Yes": 0.9965971016271491, "No": 0.003402888327158209}, "ground_truth": 1}, {"key": "34797243", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999667875255465, "res": {"Yes": 0.9999667875255465, "No": 3.3100340123487065e-05}, "ground_truth": 0}, {"key": "34797243", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9994871329612839, "res": {"Yes": 0.9994871329612839, "No": 0.000512811821385342}, "ground_truth": 0}, {"key": "32154876", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9234639097183355, "res": {"Yes": 0.9234639097183355, "No": 0.07653591279541122}, "ground_truth": 0}, {"key": "32154876", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9842159988425998, "res": {"Yes": 0.9842159988425998, "No": 0.015783913350210454}, "ground_truth": 0}, {"key": "32154876", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9735724838327091, "res": {"Yes": 0.9735724838327091, "No": 0.026427392818772627}, "ground_truth": 1}, {"key": "32154876", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.982012858026552, "res": {"Yes": 0.982012858026552, "No": 0.0179871547650642}, "ground_truth": 0}, {"key": "32154876", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9995703767345164, "res": {"Yes": 0.9995703767345164, "No": 0.00042957809730339013}, "ground_truth": 0}, {"key": "37962274", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9146497809823548, "res": {"Yes": 0.9146497809823548, "No": 0.08535002814345767}, "ground_truth": 0}, {"key": "37962274", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9959769691582278, "res": {"Yes": 0.9959769691582278, "No": 0.004023077990692238}, "ground_truth": 0}, {"key": "37962274", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9984776073172242, "res": {"Yes": 0.9984776073172242, "No": 0.001522378141510829}, "ground_truth": 1}, {"key": "37962274", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994861797508597, "res": {"Yes": 0.9994861797508597, "No": 0.0005137668472452573}, "ground_truth": 0}, {"key": "37962274", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9970865605260556, "res": {"Yes": 0.9970865605260556, "No": 0.0029133743474785965}, "ground_truth": 0}, {"key": "35574030", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.41723340063986336, "res": {"No": 0.5827664926797483, "Yes": 0.41723340063986336}, "ground_truth": 0}, {"key": "35574030", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9946335401698234, "res": {"Yes": 0.9946335401698234, "No": 0.0053664589573940704}, "ground_truth": 0}, {"key": "35574030", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.051243848368610845, "res": {"No": 0.9487561102280179, "Yes": 0.051243848368610845}, "ground_truth": 1}, {"key": "35574030", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.10852728098033348, "res": {"No": 0.8914725080606816, "Yes": 0.10852728098033348}, "ground_truth": 0}, {"key": "35574030", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9190500653833917, "res": {"Yes": 0.9190500653833917, "No": 0.08094974896298426}, "ground_truth": 0}, {"key": "39105949", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.5378643113592005, "res": {"Yes": 0.5378643113592005, "No": 0.4621352878143351}, "ground_truth": 0}, {"key": "39105949", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9551296199645583, "res": {"Yes": 0.9551296199645583, "No": 0.0448701193092304}, "ground_truth": 0}, {"key": "39105949", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9435388203263372, "res": {"Yes": 0.9435388203263372, "No": 0.05646108156697189}, "ground_truth": 1}, {"key": "39105949", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9958250374891583, "res": {"Yes": 0.9958250374891583, "No": 0.004174906149891}, "ground_truth": 0}, {"key": "39105949", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9732781200236492, "res": {"Yes": 0.9732781200236492, "No": 0.026721547942817822}, "ground_truth": 0}, {"key": "41064322", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7318571019450476, "res": {"Yes": 0.7318571019450476, "No": 0.26814255989351005}, "ground_truth": 0}, {"key": "41064322", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9953333806625403, "res": {"Yes": 0.9953333806625403, "No": 0.0046666654441634365}, "ground_truth": 0}, {"key": "41064322", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9955784987455267, "res": {"Yes": 0.9955784987455267, "No": 0.004421439310679737}, "ground_truth": 1}, {"key": "41064322", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9679643508674345, "res": {"Yes": 0.9679643508674345, "No": 0.03203545566559372}, "ground_truth": 0}, {"key": "41064322", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997942159164498, "res": {"Yes": 0.9997942159164498, "No": 0.00020575220532407864}, "ground_truth": 0}, {"key": "28105101", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9270347106967328, "res": {"Yes": 0.9270347106967328, "No": 0.07296471579184056}, "ground_truth": 0}, {"key": "28105101", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9947871055385813, "res": {"Yes": 0.9947871055385813, "No": 0.0052128601537474485}, "ground_truth": 0}, {"key": "28105101", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9795048755689263, "res": {"Yes": 0.9795048755689263, "No": 0.02049490418662847}, "ground_truth": 1}, {"key": "28105101", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9958907661023688, "res": {"Yes": 0.9958907661023688, "No": 0.004109276500135889}, "ground_truth": 0}, {"key": "28105101", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9980828375082437, "res": {"Yes": 0.9980828375082437, "No": 0.0019171568456150814}, "ground_truth": 0}, {"key": "36036068", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9973908886630692, "res": {"Yes": 0.9973908886630692, "No": 0.0026091024648190945}, "ground_truth": 0}, {"key": "36036068", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9974176929059781, "res": {"Yes": 0.9974176929059781, "No": 0.002582299494660287}, "ground_truth": 0}, {"key": "36036068", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9990155496894662, "res": {"Yes": 0.9990155496894662, "No": 0.000984407618741477}, "ground_truth": 1}, {"key": "36036068", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9869316768444009, "res": {"Yes": 0.9869316768444009, "No": 0.013068271624382496}, "ground_truth": 0}, {"key": "36036068", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9912896775906227, "res": {"Yes": 0.9912896775906227, "No": 0.008710230714560976}, "ground_truth": 0}, {"key": "37991460", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.02586694448808507, "res": {"No": 0.974132914620888, "Yes": 0.02586694448808507}, "ground_truth": 0}, {"key": "37991460", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999450934134217, "res": {"Yes": 0.9999450934134217, "No": 5.4781665015708546e-05}, "ground_truth": 0}, {"key": "37991460", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995047551742229, "res": {"Yes": 0.9995047551742229, "No": 0.0004952375557053237}, "ground_truth": 1}, {"key": "37991460", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9707184806187414, "res": {"Yes": 0.9707184806187414, "No": 0.02928130562105922}, "ground_truth": 0}, {"key": "37991460", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9983492730509439, "res": {"Yes": 0.9983492730509439, "No": 0.0016507195421104877}, "ground_truth": 0}, {"key": "38437830", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.36321337772137985, "res": {"No": 0.636786082539098, "Yes": 0.36321337772137985}, "ground_truth": 0}, {"key": "38437830", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9854038035254797, "res": {"Yes": 0.9854038035254797, "No": 0.014596123488636611}, "ground_truth": 0}, {"key": "38437830", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9622463086412217, "res": {"Yes": 0.9622463086412217, "No": 0.03775345440713055}, "ground_truth": 1}, {"key": "38437830", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7986158318852465, "res": {"Yes": 0.7986158318852465, "No": 0.20138400921256486}, "ground_truth": 0}, {"key": "38437830", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.968981140625068, "res": {"Yes": 0.968981140625068, "No": 0.031018604790048393}, "ground_truth": 0}, {"key": "36507138", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.6741718231968358, "res": {"Yes": 0.6741718231968358, "No": 0.32582791665683564}, "ground_truth": 0}, {"key": "36507138", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9319236390527674, "res": {"Yes": 0.9319236390527674, "No": 0.06807595580845896}, "ground_truth": 0}, {"key": "36507138", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9880066603619075, "res": {"Yes": 0.9880066603619075, "No": 0.011993193481422202}, "ground_truth": 1}, {"key": "36507138", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9940769587826582, "res": {"Yes": 0.9940769587826582, "No": 0.005923030809674062}, "ground_truth": 0}, {"key": "36507138", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9833805721457356, "res": {"Yes": 0.9833805721457356, "No": 0.01661934334937372}, "ground_truth": 0}, {"key": "37824866", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9916029111644777, "res": {"Yes": 0.9916029111644777, "No": 0.008396857922120625}, "ground_truth": 0}, {"key": "37824866", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9920214330832237, "res": {"Yes": 0.9920214330832237, "No": 0.007978471717175521}, "ground_truth": 0}, {"key": "37824866", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9982473459949915, "res": {"Yes": 0.9982473459949915, "No": 0.0017526108299491677}, "ground_truth": 1}, {"key": "37824866", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9123142827436976, "res": {"Yes": 0.9123142827436976, "No": 0.08768517816390238}, "ground_truth": 0}, {"key": "37824866", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9961249086197052, "res": {"Yes": 0.9961249086197052, "No": 0.003875115926919239}, "ground_truth": 0}, {"key": "25088134", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9671795600703816, "res": {"Yes": 0.9671795600703816, "No": 0.032820357520716}, "ground_truth": 0}, {"key": "25088134", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9953435370961746, "res": {"Yes": 0.9953435370961746, "No": 0.0046564715302579605}, "ground_truth": 0}, {"key": "25088134", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999240212085124, "res": {"Yes": 0.999240212085124, "No": 0.0007597371802138203}, "ground_truth": 1}, {"key": "25088134", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9991940302382499, "res": {"Yes": 0.9991940302382499, "No": 0.0008059391617550648}, "ground_truth": 0}, {"key": "25088134", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9965483262691628, "res": {"Yes": 0.9965483262691628, "No": 0.0034516047885119684}, "ground_truth": 0}, {"key": "40172531", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8205030756106234, "res": {"Yes": 0.8205030756106234, "No": 0.17949683563205004}, "ground_truth": 0}, {"key": "40172531", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.7868762150803498, "res": {"Yes": 0.7868762150803498, "No": 0.2131237065364409}, "ground_truth": 0}, {"key": "40172531", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9106092071973205, "res": {"Yes": 0.9106092071973205, "No": 0.08939059532886798}, "ground_truth": 1}, {"key": "40172531", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9009438256390083, "res": {"Yes": 0.9009438256390083, "No": 0.0990561418686259}, "ground_truth": 0}, {"key": "40172531", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.586337744284398, "res": {"Yes": 0.586337744284398, "No": 0.41366212977989225}, "ground_truth": 0}, {"key": "37035874", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9249292138180992, "res": {"Yes": 0.9249292138180992, "No": 0.07507035788936287}, "ground_truth": 0}, {"key": "37035874", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9976036782740123, "res": {"Yes": 0.9976036782740123, "No": 0.002396329709237792}, "ground_truth": 0}, {"key": "37035874", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999079046010416, "res": {"Yes": 0.9999079046010416, "No": 9.20453654254272e-05}, "ground_truth": 1}, {"key": "37035874", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994113977942826, "res": {"Yes": 0.9994113977942826, "No": 0.0005885097875802202}, "ground_truth": 0}, {"key": "37035874", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9944029103998951, "res": {"Yes": 0.9944029103998951, "No": 0.005597058987501936}, "ground_truth": 0}, {"key": "36404465", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9996239747452238, "res": {"Yes": 0.9996239747452238, "No": 0.00037593312102854033}, "ground_truth": 0}, {"key": "36404465", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9992347402207351, "res": {"Yes": 0.9992347402207351, "No": 0.0007652421792911432}, "ground_truth": 0}, {"key": "36404465", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9981878229791985, "res": {"Yes": 0.9981878229791985, "No": 0.0018121215373825582}, "ground_truth": 1}, {"key": "36404465", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9969178353519955, "res": {"Yes": 0.9969178353519955, "No": 0.0030821126247613036}, "ground_truth": 0}, {"key": "36404465", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9984396950434766, "res": {"Yes": 0.9984396950434766, "No": 0.001560270617513949}, "ground_truth": 0}, {"key": "39602052", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.2831111223297579, "res": {"No": 0.71688858569747, "Yes": 0.2831111223297579}, "ground_truth": 0}, {"key": "39602052", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.3864729617747715, "res": {"No": 0.6135267889587868, "Yes": 0.3864729617747715}, "ground_truth": 0}, {"key": "39602052", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7876625711756039, "res": {"Yes": 0.7876625711756039, "No": 0.21233724286723757}, "ground_truth": 1}, {"key": "39602052", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8520842773221607, "res": {"Yes": 0.8520842773221607, "No": 0.14791536897882598}, "ground_truth": 0}, {"key": "39602052", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.29800270957038466, "res": {"No": 0.7019971323051981, "Yes": 0.29800270957038466}, "ground_truth": 0}, {"key": "33792789", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.996844497496194, "res": {"Yes": 0.996844497496194, "No": 0.0031554773339355115}, "ground_truth": 0}, {"key": "33792789", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9982576766107552, "res": {"Yes": 0.9982576766107552, "No": 0.0017423038322547666}, "ground_truth": 0}, {"key": "33792789", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995047551742229, "res": {"Yes": 0.9995047551742229, "No": 0.0004951203468481036}, "ground_truth": 1}, {"key": "33792789", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9931106233198408, "res": {"Yes": 0.9931106233198408, "No": 0.00688925725639761}, "ground_truth": 0}, {"key": "33792789", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9994767712319385, "res": {"Yes": 0.9994767712319385, "No": 0.0005231261073106014}, "ground_truth": 0}, {"key": "32776626", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9384201961269966, "res": {"Yes": 0.9384201961269966, "No": 0.0615797005055228}, "ground_truth": 0}, {"key": "32776626", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9924927916185625, "res": {"Yes": 0.9924927916185625, "No": 0.0075071415862924575}, "ground_truth": 0}, {"key": "32776626", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7985159472639088, "res": {"Yes": 0.7985159472639088, "No": 0.2014840614958683}, "ground_truth": 1}, {"key": "32776626", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9607521132076733, "res": {"Yes": 0.9607521132076733, "No": 0.03924774062911474}, "ground_truth": 0}, {"key": "32776626", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8725456023217867, "res": {"Yes": 0.8725456023217867, "No": 0.12745437774817697}, "ground_truth": 0}, {"key": "37195090", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7364559535201718, "res": {"Yes": 0.7364559535201718, "No": 0.2635438002386296}, "ground_truth": 0}, {"key": "37195090", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8518112110335346, "res": {"Yes": 0.8518112110335346, "No": 0.14818868775076574}, "ground_truth": 0}, {"key": "37195090", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9793097942783792, "res": {"Yes": 0.9793097942783792, "No": 0.02069017560142392}, "ground_truth": 1}, {"key": "37195090", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9467456063330155, "res": {"Yes": 0.9467456063330155, "No": 0.05325425880670155}, "ground_truth": 0}, {"key": "37195090", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9978170326464523, "res": {"Yes": 0.9978170326464523, "No": 0.0021829240360092105}, "ground_truth": 0}, {"key": "33981824", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8001908558139611, "res": {"Yes": 0.8001908558139611, "No": 0.19980879394450476}, "ground_truth": 0}, {"key": "33981824", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999144602247352, "res": {"Yes": 0.9999144602247352, "No": 8.54781303602861e-05}, "ground_truth": 0}, {"key": "33981824", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9955943355386638, "res": {"Yes": 0.9955943355386638, "No": 0.00440556528407928}, "ground_truth": 1}, {"key": "33981824", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998539191008537, "res": {"Yes": 0.9998539191008537, "No": 0.0001459678742283553}, "ground_truth": 0}, {"key": "33981824", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9989461906474744, "res": {"Yes": 0.9989461906474744, "No": 0.0010536963113808152}, "ground_truth": 0}, {"key": "39569142", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.45457513295260565, "res": {"No": 0.5454246027693256, "Yes": 0.45457513295260565}, "ground_truth": 0}, {"key": "39569142", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9752856485701556, "res": {"Yes": 0.9752856485701556, "No": 0.024713688464697188}, "ground_truth": 0}, {"key": "39569142", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9824274541304973, "res": {"Yes": 0.9824274541304973, "No": 0.017572334800196557}, "ground_truth": 1}, {"key": "39569142", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9816290396770101, "res": {"Yes": 0.9816290396770101, "No": 0.01837080425170608}, "ground_truth": 0}, {"key": "39569142", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.46546872516001925, "res": {"No": 0.5345304612303182, "Yes": 0.46546872516001925}, "ground_truth": 0}, {"key": "40268210", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9102746197978758, "res": {"Yes": 0.9102746197978758, "No": 0.08972529088429762}, "ground_truth": 0}, {"key": "40268210", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8184462199088264, "res": {"Yes": 0.8184462199088264, "No": 0.18155382731800332}, "ground_truth": 0}, {"key": "40268210", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7642364562429146, "res": {"Yes": 0.7642364562429146, "No": 0.23576345168197121}, "ground_truth": 1}, {"key": "40268210", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3978853661235555, "res": {"No": 0.602114531564871, "Yes": 0.3978853661235555}, "ground_truth": 0}, {"key": "40268210", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9543939531301382, "res": {"Yes": 0.9543939531301382, "No": 0.04560594035515217}, "ground_truth": 0}, {"key": "34925159", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.05805392842507804, "res": {"No": 0.9419457163703716, "Yes": 0.05805392842507804}, "ground_truth": 0}, {"key": "34925159", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.4125131479075849, "res": {"No": 0.5874859782835751, "Yes": 0.4125131479075849}, "ground_truth": 0}, {"key": "34925159", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.14146431280971028, "res": {"No": 0.8585355843988525, "Yes": 0.14146431280971028}, "ground_truth": 1}, {"key": "34925159", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8623987657576367, "res": {"Yes": 0.8623987657576367, "No": 0.1376012595250107}, "ground_truth": 0}, {"key": "34925159", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.7807179374567581, "res": {"Yes": 0.7807179374567581, "No": 0.2192819794467766}, "ground_truth": 0}, {"key": "36181903", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9948163650451383, "res": {"Yes": 0.9948163650451383, "No": 0.005183664709371764}, "ground_truth": 0}, {"key": "36181903", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9452060404800033, "res": {"Yes": 0.9452060404800033, "No": 0.0547936865832506}, "ground_truth": 0}, {"key": "36181903", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9604995387425822, "res": {"Yes": 0.9604995387425822, "No": 0.03950038738136445}, "ground_truth": 1}, {"key": "36181903", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5352132571581673, "res": {"Yes": 0.5352132571581673, "No": 0.4647867410460907}, "ground_truth": 0}, {"key": "36181903", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.3967178723175009, "res": {"No": 0.6032817163869767, "Yes": 0.3967178723175009}, "ground_truth": 0}, {"key": "38620559", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.996255522107225, "res": {"Yes": 0.996255522107225, "No": 0.003744465858833831}, "ground_truth": 0}, {"key": "38620559", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996013458336972, "res": {"Yes": 0.9996013458336972, "No": 0.00039857921362597957}, "ground_truth": 0}, {"key": "38620559", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998434344376671, "res": {"Yes": 0.9998434344376671, "No": 0.00015650165076821243}, "ground_truth": 1}, {"key": "38620559", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999497421129699, "res": {"Yes": 0.9999497421129699, "No": 5.0198671287167634e-05}, "ground_truth": 0}, {"key": "38620559", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9975692692220872, "res": {"Yes": 0.9975692692220872, "No": 0.0024307252727732813}, "ground_truth": 0}, {"key": "32719657", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8809214033696167, "res": {"Yes": 0.8809214033696167, "No": 0.1190783856236828}, "ground_truth": 0}, {"key": "32719657", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.7936360564569763, "res": {"Yes": 0.7936360564569763, "No": 0.20636370679660151}, "ground_truth": 0}, {"key": "32719657", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9672232833778447, "res": {"Yes": 0.9672232833778447, "No": 0.03277654863345421}, "ground_truth": 1}, {"key": "32719657", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9867090220501302, "res": {"Yes": 0.9867090220501302, "No": 0.01329083472136789}, "ground_truth": 0}, {"key": "32719657", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8963228833857616, "res": {"Yes": 0.8963228833857616, "No": 0.10367697739654906}, "ground_truth": 0}, {"key": "37530914", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.04611285580815917, "res": {"No": 0.9538869314982873, "Yes": 0.04611285580815917}, "ground_truth": 0}, {"key": "37530914", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9874996733418284, "res": {"Yes": 0.9874996733418284, "No": 0.012500166684534865}, "ground_truth": 0}, {"key": "37530914", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9809998444298677, "res": {"Yes": 0.9809998444298677, "No": 0.01900013869700446}, "ground_truth": 1}, {"key": "37530914", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9947998508326236, "res": {"Yes": 0.9947998508326236, "No": 0.005200175000597392}, "ground_truth": 0}, {"key": "37530914", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9752042458497042, "res": {"Yes": 0.9752042458497042, "No": 0.02479566845843304}, "ground_truth": 0}, {"key": "33306933", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9954119232291075, "res": {"Yes": 0.9954119232291075, "No": 0.004587971002624117}, "ground_truth": 0}, {"key": "33306933", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9865284540490525, "res": {"Yes": 0.9865284540490525, "No": 0.013471274168161285}, "ground_truth": 0}, {"key": "33306933", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9991047805082782, "res": {"Yes": 0.9991047805082782, "No": 0.0008950884658939282}, "ground_truth": 1}, {"key": "33306933", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9977676669697348, "res": {"Yes": 0.9977676669697348, "No": 0.00223234762638958}, "ground_truth": 0}, {"key": "33306933", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9938782686222064, "res": {"Yes": 0.9938782686222064, "No": 0.006121604767004047}, "ground_truth": 0}, {"key": "33837212", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9931951719156933, "res": {"Yes": 0.9931951719156933, "No": 0.006804822683599794}, "ground_truth": 0}, {"key": "33837212", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.911078287162532, "res": {"Yes": 0.911078287162532, "No": 0.08892159078290511}, "ground_truth": 0}, {"key": "33837212", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9904999523010191, "res": {"Yes": 0.9904999523010191, "No": 0.009499807915510543}, "ground_truth": 1}, {"key": "33837212", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9965304543317376, "res": {"Yes": 0.9965304543317376, "No": 0.0034695606738443146}, "ground_truth": 0}, {"key": "33837212", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8462996549687031, "res": {"Yes": 0.8462996549687031, "No": 0.15369988141090288}, "ground_truth": 0}, {"key": "40945179", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9997091513046849, "res": {"Yes": 0.9997091513046849, "No": 0.000290722893642548}, "ground_truth": 0}, {"key": "40945179", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9855744488511659, "res": {"Yes": 0.9855744488511659, "No": 0.01442538178558378}, "ground_truth": 0}, {"key": "40945179", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9418394256776295, "res": {"Yes": 0.9418394256776295, "No": 0.05816040978266335}, "ground_truth": 1}, {"key": "40945179", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9935578387731188, "res": {"Yes": 0.9935578387731188, "No": 0.006442154146491311}, "ground_truth": 0}, {"key": "40945179", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.958606049031068, "res": {"Yes": 0.958606049031068, "No": 0.04139363309307952}, "ground_truth": 0}, {"key": "34152358", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9030327935507865, "res": {"Yes": 0.9030327935507865, "No": 0.0969670062589848}, "ground_truth": 0}, {"key": "34152358", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9968719852640834, "res": {"Yes": 0.9968719852640834, "No": 0.0031279638915119113}, "ground_truth": 0}, {"key": "34152358", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9514660056492796, "res": {"Yes": 0.9514660056492796, "No": 0.04853385904379504}, "ground_truth": 1}, {"key": "34152358", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9973492680107583, "res": {"Yes": 0.9973492680107583, "No": 0.002650686277155737}, "ground_truth": 0}, {"key": "34152358", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9971824605358127, "res": {"Yes": 0.9971824605358127, "No": 0.002817533562401738}, "ground_truth": 0}, {"key": "34136541", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9252808924998224, "res": {"Yes": 0.9252808924998224, "No": 0.07471884073279202}, "ground_truth": 0}, {"key": "34136541", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9958195989051497, "res": {"Yes": 0.9958195989051497, "No": 0.004180397254497806}, "ground_truth": 0}, {"key": "34136541", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994635539386676, "res": {"Yes": 0.9994635539386676, "No": 0.0005363671768413324}, "ground_truth": 1}, {"key": "34136541", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.998879106171314, "res": {"Yes": 0.998879106171314, "No": 0.0011208426617567184}, "ground_truth": 0}, {"key": "34136541", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9952801128407737, "res": {"Yes": 0.9952801128407737, "No": 0.004719909751558462}, "ground_truth": 0}, {"key": "37469603", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.4333021016124739, "res": {"No": 0.5666976004876596, "Yes": 0.4333021016124739}, "ground_truth": 0}, {"key": "37469603", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.1713496796891507, "res": {"No": 0.8286501727011059, "Yes": 0.1713496796891507}, "ground_truth": 0}, {"key": "37469603", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6847870123122349, "res": {"Yes": 0.6847870123122349, "No": 0.3152128039987537}, "ground_truth": 1}, {"key": "37469603", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5044401391722223, "res": {"Yes": 0.5044401391722223, "No": 0.4955597733976097}, "ground_truth": 0}, {"key": "37469603", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.19676766730712664, "res": {"No": 0.8032321126138084, "Yes": 0.19676766730712664}, "ground_truth": 0}, {"key": "37353611", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.23338472660062534, "res": {"No": 0.7666151987896195, "Yes": 0.23338472660062534}, "ground_truth": 0}, {"key": "37353611", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.789667759164513, "res": {"Yes": 0.789667759164513, "No": 0.21033188311293297}, "ground_truth": 0}, {"key": "37353611", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2628011280376989, "res": {"No": 0.737198638530322, "Yes": 0.2628011280376989}, "ground_truth": 1}, {"key": "37353611", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.42698495524179675, "res": {"No": 0.5730147848899597, "Yes": 0.42698495524179675}, "ground_truth": 0}, {"key": "37353611", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.20421843463003544, "res": {"No": 0.795781259917948, "Yes": 0.20421843463003544}, "ground_truth": 0}, {"key": "37211649", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9891603905886978, "res": {"Yes": 0.9891603905886978, "No": 0.010839536957291675}, "ground_truth": 0}, {"key": "37211649", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8688200258991782, "res": {"Yes": 0.8688200258991782, "No": 0.13117971837213102}, "ground_truth": 0}, {"key": "37211649", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8684748948947535, "res": {"Yes": 0.8684748948947535, "No": 0.13152438595924998}, "ground_truth": 1}, {"key": "37211649", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9924268099136524, "res": {"Yes": 0.9924268099136524, "No": 0.007573138502730991}, "ground_truth": 0}, {"key": "37211649", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.994478591318762, "res": {"Yes": 0.994478591318762, "No": 0.005521393527054832}, "ground_truth": 0}, {"key": "37320976", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9972774146762169, "res": {"Yes": 0.9972774146762169, "No": 0.0027225364903399074}, "ground_truth": 0}, {"key": "37320976", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9933564085864742, "res": {"Yes": 0.9933564085864742, "No": 0.006643604608928879}, "ground_truth": 0}, {"key": "37320976", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997767004150644, "res": {"Yes": 0.9997767004150644, "No": 0.00022318043234621443}, "ground_truth": 1}, {"key": "37320976", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9948013813333715, "res": {"Yes": 0.9948013813333715, "No": 0.0051986167416664805}, "ground_truth": 0}, {"key": "37320976", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998678640007302, "res": {"Yes": 0.9998678640007302, "No": 0.00013202529829382977}, "ground_truth": 0}, {"key": "34492412", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8598494441518054, "res": {"Yes": 0.8598494441518054, "No": 0.14015040662778316}, "ground_truth": 0}, {"key": "34492412", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9970128611040899, "res": {"Yes": 0.9970128611040899, "No": 0.002987138420034917}, "ground_truth": 0}, {"key": "34492412", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9863980766520825, "res": {"Yes": 0.9863980766520825, "No": 0.01360183321640917}, "ground_truth": 1}, {"key": "34492412", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9950778761488528, "res": {"Yes": 0.9950778761488528, "No": 0.004922136966465007}, "ground_truth": 0}, {"key": "34492412", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9930813498294466, "res": {"Yes": 0.9930813498294466, "No": 0.006918657023053558}, "ground_truth": 0}, {"key": "36655016", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.5088148098348606, "res": {"Yes": 0.5088148098348606, "No": 0.49118511792113784}, "ground_truth": 0}, {"key": "36655016", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.6059628490439906, "res": {"Yes": 0.6059628490439906, "No": 0.39403686818042116}, "ground_truth": 0}, {"key": "36655016", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9569232684517316, "res": {"Yes": 0.9569232684517316, "No": 0.0430766410409316}, "ground_truth": 1}, {"key": "36655016", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9145738864924349, "res": {"Yes": 0.9145738864924349, "No": 0.08542568177800389}, "ground_truth": 0}, {"key": "36655016", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.21819716463666897, "res": {"No": 0.7818026390667366, "Yes": 0.21819716463666897}, "ground_truth": 0}, {"key": "35220773", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9733398959463653, "res": {"Yes": 0.9733398959463653, "No": 0.026660015042418012}, "ground_truth": 0}, {"key": "35220773", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9983174273152424, "res": {"Yes": 0.9983174273152424, "No": 0.0016825168191034361}, "ground_truth": 0}, {"key": "35220773", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.998619052586619, "res": {"Yes": 0.998619052586619, "No": 0.0013809062395734187}, "ground_truth": 1}, {"key": "35220773", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.998796919981736, "res": {"Yes": 0.998796919981736, "No": 0.001202985855196641}, "ground_truth": 0}, {"key": "35220773", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.993870615192679, "res": {"Yes": 0.993870615192679, "No": 0.006129316721062113}, "ground_truth": 0}, {"key": "31569808", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.1513386254933452, "res": {"No": 0.8486611321329635, "Yes": 0.1513386254933452}, "ground_truth": 0}, {"key": "31569808", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9659675021521051, "res": {"Yes": 0.9659675021521051, "No": 0.03403238866550085}, "ground_truth": 0}, {"key": "31569808", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9408328983169665, "res": {"Yes": 0.9408328983169665, "No": 0.05916691272565257}, "ground_truth": 1}, {"key": "31569808", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9822714590018212, "res": {"Yes": 0.9822714590018212, "No": 0.01772849583103503}, "ground_truth": 0}, {"key": "31569808", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9871816124172224, "res": {"Yes": 0.9871816124172224, "No": 0.012818215932566296}, "ground_truth": 0}, {"key": "37696256", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9689334679257073, "res": {"Yes": 0.9689334679257073, "No": 0.031066459948471403}, "ground_truth": 0}, {"key": "37696256", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9624536402059596, "res": {"Yes": 0.9624536402059596, "No": 0.037546136332775946}, "ground_truth": 0}, {"key": "37696256", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9992480698912377, "res": {"Yes": 0.9992480698912377, "No": 0.0007518520465354365}, "ground_truth": 1}, {"key": "37696256", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9922321856199077, "res": {"Yes": 0.9922321856199077, "No": 0.007767639169191223}, "ground_truth": 0}, {"key": "37696256", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9986910939657258, "res": {"Yes": 0.9986910939657258, "No": 0.001308882184901403}, "ground_truth": 0}, {"key": "36874328", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.26287798232308607, "res": {"No": 0.7371217232371184, "Yes": 0.26287798232308607}, "ground_truth": 0}, {"key": "36874328", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9871641888155269, "res": {"Yes": 0.9871641888155269, "No": 0.01283575426656059}, "ground_truth": 0}, {"key": "36874328", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5709013793028733, "res": {"Yes": 0.5709013793028733, "No": 0.42909844965791744}, "ground_truth": 1}, {"key": "36874328", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.21002670283315383, "res": {"No": 0.7899730879400474, "Yes": 0.21002670283315383}, "ground_truth": 0}, {"key": "36874328", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.34025559288851986, "res": {"No": 0.6597441834048855, "Yes": 0.34025559288851986}, "ground_truth": 0}, {"key": "24532377", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.999962734742367, "res": {"Yes": 0.999962734742367, "No": 3.7151626715252886e-05}, "ground_truth": 0}, {"key": "24532377", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9993872322788702, "res": {"Yes": 0.9993872322788702, "No": 0.0006126884435041422}, "ground_truth": 0}, {"key": "24532377", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9989739105901252, "res": {"Yes": 0.9989739105901252, "No": 0.0010258938475634046}, "ground_truth": 1}, {"key": "24532377", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997799181915551, "res": {"Yes": 0.9997799181915551, "No": 0.00022003388516390833}, "ground_truth": 0}, {"key": "24532377", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999338889494318, "res": {"Yes": 0.9999338889494318, "No": 6.59529466327482e-05}, "ground_truth": 0}, {"key": "39560618", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.06437605060845945, "res": {"No": 0.9356236936890004, "Yes": 0.06437605060845945}, "ground_truth": 0}, {"key": "39560618", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9980533868138126, "res": {"Yes": 0.9980533868138126, "No": 0.0019465396913183105}, "ground_truth": 0}, {"key": "39560618", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.992007717489706, "res": {"Yes": 0.992007717489706, "No": 0.007992069032284131}, "ground_truth": 1}, {"key": "39560618", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9964387141743346, "res": {"Yes": 0.9964387141743346, "No": 0.003561280883835779}, "ground_truth": 0}, {"key": "39560618", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9977019167458426, "res": {"Yes": 0.9977019167458426, "No": 0.0022980737659626283}, "ground_truth": 0}, {"key": "34922693", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7136569459224255, "res": {"Yes": 0.7136569459224255, "No": 0.2863430532740029}, "ground_truth": 0}, {"key": "34922693", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9966913626439234, "res": {"Yes": 0.9966913626439234, "No": 0.0033085994699337297}, "ground_truth": 0}, {"key": "34922693", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9949906521418376, "res": {"Yes": 0.9949906521418376, "No": 0.005009328023134862}, "ground_truth": 1}, {"key": "34922693", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9644220432294033, "res": {"Yes": 0.9644220432294033, "No": 0.03557788649494261}, "ground_truth": 0}, {"key": "34922693", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9958926578982649, "res": {"Yes": 0.9958926578982649, "No": 0.004107309860228089}, "ground_truth": 0}, {"key": "33629577", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9869237824094916, "res": {"Yes": 0.9869237824094916, "No": 0.013076049559705294}, "ground_truth": 0}, {"key": "33629577", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996242130456116, "res": {"Yes": 0.9996242130456116, "No": 0.00037569534337175914}, "ground_truth": 0}, {"key": "33629577", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9990924055132495, "res": {"Yes": 0.9990924055132495, "No": 0.0009074854726977094}, "ground_truth": 1}, {"key": "33629577", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9986742148307111, "res": {"Yes": 0.9986742148307111, "No": 0.0013257040528825053}, "ground_truth": 0}, {"key": "33629577", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9973927870986935, "res": {"Yes": 0.9973927870986935, "No": 0.0026071776575413044}, "ground_truth": 0}, {"key": "32284359", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8669794976012023, "res": {"Yes": 0.8669794976012023, "No": 0.13302003806046758}, "ground_truth": 0}, {"key": "32284359", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9943160381844789, "res": {"Yes": 0.9943160381844789, "No": 0.005683989022770156}, "ground_truth": 0}, {"key": "32284359", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9833413753773267, "res": {"Yes": 0.9833413753773267, "No": 0.01665860132249146}, "ground_truth": 1}, {"key": "32284359", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9990124571417172, "res": {"Yes": 0.9990124571417172, "No": 0.000987545486471707}, "ground_truth": 0}, {"key": "32284359", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.7034855519662465, "res": {"Yes": 0.7034855519662465, "No": 0.29651407238534067}, "ground_truth": 0}, {"key": "28082962", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9992954417877803, "res": {"Yes": 0.9992954417877803, "No": 0.000704489874030465}, "ground_truth": 0}, {"key": "28082962", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9990603949655179, "res": {"Yes": 0.9990603949655179, "No": 0.000939596528353087}, "ground_truth": 0}, {"key": "28082962", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9972745727388177, "res": {"Yes": 0.9972745727388177, "No": 0.0027254070115053865}, "ground_truth": 1}, {"key": "28082962", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9960812800014535, "res": {"Yes": 0.9960812800014535, "No": 0.003918753344979978}, "ground_truth": 0}, {"key": "28082962", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9954864621659871, "res": {"Yes": 0.9954864621659871, "No": 0.004513510711568551}, "ground_truth": 0}, {"key": "24796803", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.996037279381598, "res": {"Yes": 0.996037279381598, "No": 0.00396274425515109}, "ground_truth": 0}, {"key": "24796803", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9900018462066879, "res": {"Yes": 0.9900018462066879, "No": 0.009998042051584036}, "ground_truth": 0}, {"key": "24796803", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995650148634783, "res": {"Yes": 0.9995650148634783, "No": 0.00043495629638643776}, "ground_truth": 1}, {"key": "24796803", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996202808615674, "res": {"Yes": 0.9996202808615674, "No": 0.00037959639048750025}, "ground_truth": 0}, {"key": "24796803", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9993491332557126, "res": {"Yes": 0.9993491332557126, "No": 0.0006508538771587151}, "ground_truth": 0}, {"key": "35466150", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.012445427407997554, "res": {"No": 0.9875544297417682, "Yes": 0.012445427407997554}, "ground_truth": 0}, {"key": "35466150", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9094375613521469, "res": {"Yes": 0.9094375613521469, "No": 0.09056219742224865}, "ground_truth": 0}, {"key": "35466150", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5347850973394673, "res": {"Yes": 0.5347850973394673, "No": 0.4652146067359768}, "ground_truth": 1}, {"key": "35466150", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.042960435134658116, "res": {"No": 0.9570395225515141, "Yes": 0.042960435134658116}, "ground_truth": 0}, {"key": "35466150", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.5690771643947687, "res": {"Yes": 0.5690771643947687, "No": 0.4309225687028781}, "ground_truth": 0}, {"key": "35754289", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.981531989830015, "res": {"Yes": 0.981531989830015, "No": 0.01846784254873419}, "ground_truth": 0}, {"key": "35754289", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9307115349722734, "res": {"Yes": 0.9307115349722734, "No": 0.06928815575636978}, "ground_truth": 0}, {"key": "35754289", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9889903586414664, "res": {"Yes": 0.9889903586414664, "No": 0.011009513679746395}, "ground_truth": 1}, {"key": "35754289", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.984535842263892, "res": {"Yes": 0.984535842263892, "No": 0.01546405469744166}, "ground_truth": 0}, {"key": "35754289", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9517932234890253, "res": {"Yes": 0.9517932234890253, "No": 0.04820654879685597}, "ground_truth": 0}, {"key": "36678662", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.07010363722876352, "res": {"No": 0.9298960809428352, "Yes": 0.07010363722876352}, "ground_truth": 0}, {"key": "36678662", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.18473413419496346, "res": {"No": 0.8152655260237763, "Yes": 0.18473413419496346}, "ground_truth": 0}, {"key": "36678662", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5219393216009051, "res": {"Yes": 0.5219393216009051, "No": 0.47806034963193}, "ground_truth": 1}, {"key": "36678662", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.0924736814327538, "res": {"No": 0.9075259610814721, "Yes": 0.0924736814327538}, "ground_truth": 0}, {"key": "36678662", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.7212478226648122, "res": {"Yes": 0.7212478226648122, "No": 0.2787518832992886}, "ground_truth": 0}, {"key": "35399671", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9733723116246866, "res": {"Yes": 0.9733723116246866, "No": 0.026627560248699906}, "ground_truth": 0}, {"key": "35399671", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9886594345493608, "res": {"Yes": 0.9886594345493608, "No": 0.011340430709119163}, "ground_truth": 0}, {"key": "35399671", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9978981096717077, "res": {"Yes": 0.9978981096717077, "No": 0.0021018156764026543}, "ground_truth": 1}, {"key": "35399671", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9813044185778439, "res": {"Yes": 0.9813044185778439, "No": 0.01869553600367318}, "ground_truth": 0}, {"key": "35399671", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9952476441482156, "res": {"Yes": 0.9952476441482156, "No": 0.004752312202027606}, "ground_truth": 0}, {"key": "36888180", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7504805307129577, "res": {"Yes": 0.7504805307129577, "No": 0.24951935291627872}, "ground_truth": 0}, {"key": "36888180", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9555033932749041, "res": {"Yes": 0.9555033932749041, "No": 0.04449658652746818}, "ground_truth": 0}, {"key": "36888180", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8350262980137345, "res": {"Yes": 0.8350262980137345, "No": 0.16497366893966442}, "ground_truth": 1}, {"key": "36888180", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.88513201111192, "res": {"Yes": 0.88513201111192, "No": 0.11486788926302019}, "ground_truth": 0}, {"key": "36888180", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8735274452840656, "res": {"Yes": 0.8735274452840656, "No": 0.12647254878700706}, "ground_truth": 0}, {"key": "28061069", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9428359476884287, "res": {"Yes": 0.9428359476884287, "No": 0.05716383704639529}, "ground_truth": 0}, {"key": "28061069", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9865887914811228, "res": {"Yes": 0.9865887914811228, "No": 0.013410977379059537}, "ground_truth": 0}, {"key": "28061069", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9981023201755728, "res": {"Yes": 0.9981023201755728, "No": 0.0018976272291630198}, "ground_truth": 1}, {"key": "28061069", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9979384668843658, "res": {"Yes": 0.9979384668843658, "No": 0.0020613406642003484}, "ground_truth": 0}, {"key": "28061069", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9956459658110539, "res": {"Yes": 0.9956459658110539, "No": 0.00435391690979607}, "ground_truth": 0}, {"key": "22259982", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9818497503999155, "res": {"Yes": 0.9818497503999155, "No": 0.01815012193703329}, "ground_truth": 0}, {"key": "22259982", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9915706746778427, "res": {"Yes": 0.9915706746778427, "No": 0.008429091353717132}, "ground_truth": 0}, {"key": "22259982", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9866687439298988, "res": {"Yes": 0.9866687439298988, "No": 0.013331161690687874}, "ground_truth": 1}, {"key": "22259982", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9968050640050149, "res": {"Yes": 0.9968050640050149, "No": 0.0031949590759947767}, "ground_truth": 0}, {"key": "22259982", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9498495360848996, "res": {"Yes": 0.9498495360848996, "No": 0.05015042951523505}, "ground_truth": 0}, {"key": "34026805", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9976797355845508, "res": {"Yes": 0.9976797355845508, "No": 0.002320228515718541}, "ground_truth": 0}, {"key": "34026805", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9608045976468822, "res": {"Yes": 0.9608045976468822, "No": 0.0391952701070589}, "ground_truth": 0}, {"key": "34026805", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9633240367720222, "res": {"Yes": 0.9633240367720222, "No": 0.036675812541905516}, "ground_truth": 1}, {"key": "34026805", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9979981873049779, "res": {"Yes": 0.9979981873049779, "No": 0.0020017781868632823}, "ground_truth": 0}, {"key": "34026805", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.821000303383947, "res": {"Yes": 0.821000303383947, "No": 0.1789996013927512}, "ground_truth": 0}, {"key": "36713809", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9956821346020369, "res": {"Yes": 0.9956821346020369, "No": 0.004317884481614199}, "ground_truth": 0}, {"key": "36713809", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.998704766140388, "res": {"Yes": 0.998704766140388, "No": 0.0012951670448334833}, "ground_truth": 0}, {"key": "36713809", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994766520943144, "res": {"Yes": 0.9994766520943144, "No": 0.0005232738589848603}, "ground_truth": 1}, {"key": "36713809", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9950532058869218, "res": {"Yes": 0.9950532058869218, "No": 0.004946653458644108}, "ground_truth": 0}, {"key": "36713809", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9995625126754997, "res": {"Yes": 0.9995625126754997, "No": 0.0004373943355849085}, "ground_truth": 0}, {"key": "39726411", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.021687810594876896, "res": {"No": 0.9783120258450108, "Yes": 0.021687810594876896}, "ground_truth": 0}, {"key": "39726411", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.974238927481084, "res": {"Yes": 0.974238927481084, "No": 0.02576083921640637}, "ground_truth": 0}, {"key": "39726411", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9969171258458246, "res": {"Yes": 0.9969171258458246, "No": 0.0030827587032741157}, "ground_truth": 1}, {"key": "39726411", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9610556781202512, "res": {"Yes": 0.9610556781202512, "No": 0.03894406862628235}, "ground_truth": 0}, {"key": "39726411", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9464122335463846, "res": {"Yes": 0.9464122335463846, "No": 0.05358717563895448}, "ground_truth": 0}, {"key": "37069841", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.032891500435936506, "res": {"No": 0.9671079807928209, "Yes": 0.032891500435936506}, "ground_truth": 0}, {"key": "37069841", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.6048300920384442, "res": {"Yes": 0.6048300920384442, "No": 0.3951696352270316}, "ground_truth": 0}, {"key": "37069841", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8498137169886564, "res": {"Yes": 0.8498137169886564, "No": 0.15018597793030863}, "ground_truth": 1}, {"key": "37069841", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8711388503211359, "res": {"Yes": 0.8711388503211359, "No": 0.12886086791002643}, "ground_truth": 0}, {"key": "37069841", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.14338482203040492, "res": {"No": 0.8566150452839479, "Yes": 0.14338482203040492}, "ground_truth": 0}, {"key": "38894693", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.3213692517440749, "res": {"No": 0.6786305557603864, "Yes": 0.3213692517440749}, "ground_truth": 0}, {"key": "38894693", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.4759411342215363, "res": {"No": 0.5240587933004992, "Yes": 0.4759411342215363}, "ground_truth": 0}, {"key": "38894693", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9380740955237691, "res": {"Yes": 0.9380740955237691, "No": 0.06192582383132571}, "ground_truth": 1}, {"key": "38894693", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9152027898602159, "res": {"Yes": 0.9152027898602159, "No": 0.0847971792753412}, "ground_truth": 0}, {"key": "38894693", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8566732028535087, "res": {"Yes": 0.8566732028535087, "No": 0.1433267486025674}, "ground_truth": 0}, {"key": "33946032", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7116106051893546, "res": {"Yes": 0.7116106051893546, "No": 0.2883888603531191}, "ground_truth": 0}, {"key": "33946032", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9489440461158367, "res": {"Yes": 0.9489440461158367, "No": 0.051055740097349235}, "ground_truth": 0}, {"key": "33946032", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8745640022077404, "res": {"Yes": 0.8745640022077404, "No": 0.12543563757121}, "ground_truth": 1}, {"key": "33946032", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8407538156363065, "res": {"Yes": 0.8407538156363065, "No": 0.15924596856241}, "ground_truth": 0}, {"key": "33946032", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.6238653931428532, "res": {"Yes": 0.6238653931428532, "No": 0.37613404367023157}, "ground_truth": 0}, {"key": "39035311", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.6085455336619892, "res": {"Yes": 0.6085455336619892, "No": 0.3914538697482917}, "ground_truth": 0}, {"key": "39035311", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9914117594574025, "res": {"Yes": 0.9914117594574025, "No": 0.008587999132316634}, "ground_truth": 0}, {"key": "39035311", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9770287622671124, "res": {"Yes": 0.9770287622671124, "No": 0.022970943219962812}, "ground_truth": 1}, {"key": "39035311", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9830974150493604, "res": {"Yes": 0.9830974150493604, "No": 0.01690235698854306}, "ground_truth": 0}, {"key": "39035311", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9978264110730338, "res": {"Yes": 0.9978264110730338, "No": 0.002173580220008604}, "ground_truth": 0}, {"key": "27680038", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9243574467633832, "res": {"Yes": 0.9243574467633832, "No": 0.07564237023104947}, "ground_truth": 0}, {"key": "27680038", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9950748100201168, "res": {"Yes": 0.9950748100201168, "No": 0.0049251842199088585}, "ground_truth": 0}, {"key": "27680038", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9981576591971378, "res": {"Yes": 0.9981576591971378, "No": 0.00184232355941084}, "ground_truth": 1}, {"key": "27680038", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9977071386315701, "res": {"Yes": 0.9977071386315701, "No": 0.002292805734048603}, "ground_truth": 0}, {"key": "27680038", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9983281229461259, "res": {"Yes": 0.9983281229461259, "No": 0.0016717996467473905}, "ground_truth": 0}, {"key": "36901907", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.5403251181216255, "res": {"Yes": 0.5403251181216255, "No": 0.4596748579338924}, "ground_truth": 0}, {"key": "36901907", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9921473335517196, "res": {"Yes": 0.9921473335517196, "No": 0.00785265726007012}, "ground_truth": 0}, {"key": "36901907", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998439111831272, "res": {"Yes": 0.9998439111831272, "No": 0.00015607607645555714}, "ground_truth": 1}, {"key": "36901907", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999773720984959, "res": {"Yes": 0.999773720984959, "No": 0.0002261882366125303}, "ground_truth": 0}, {"key": "36901907", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9993810411641001, "res": {"Yes": 0.9993810411641001, "No": 0.0006189261485939882}, "ground_truth": 0}, {"key": "21530542", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9918233313053222, "res": {"Yes": 0.9918233313053222, "No": 0.008176517111730295}, "ground_truth": 0}, {"key": "21530542", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9735183634690768, "res": {"Yes": 0.9735183634690768, "No": 0.026481529208898853}, "ground_truth": 0}, {"key": "21530542", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995510778757043, "res": {"Yes": 0.9995510778757043, "No": 0.0004488973120740278}, "ground_truth": 1}, {"key": "21530542", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998224581124266, "res": {"Yes": 0.9998224581124266, "No": 0.00017741303726598835}, "ground_truth": 0}, {"key": "21530542", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9995096401657008, "res": {"Yes": 0.9995096401657008, "No": 0.0004902980923969753}, "ground_truth": 0}, {"key": "38192532", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9852247699494435, "res": {"Yes": 0.9852247699494435, "No": 0.014775074107005237}, "ground_truth": 0}, {"key": "38192532", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8757105894259573, "res": {"Yes": 0.8757105894259573, "No": 0.12428892293545685}, "ground_truth": 0}, {"key": "38192532", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9957729044256437, "res": {"Yes": 0.9957729044256437, "No": 0.00422692711614752}, "ground_truth": 1}, {"key": "38192532", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9881882242545182, "res": {"Yes": 0.9881882242545182, "No": 0.011811539519342594}, "ground_truth": 0}, {"key": "38192532", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9327135272787346, "res": {"Yes": 0.9327135272787346, "No": 0.06728570311770184}, "ground_truth": 0}, {"key": "34102400", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9710123033187242, "res": {"Yes": 0.9710123033187242, "No": 0.02898738626488225}, "ground_truth": 0}, {"key": "34102400", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9897678809809575, "res": {"Yes": 0.9897678809809575, "No": 0.010232053357006773}, "ground_truth": 0}, {"key": "34102400", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995863359525745, "res": {"Yes": 0.9995863359525745, "No": 0.0004135897572277274}, "ground_truth": 1}, {"key": "34102400", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9872721203890439, "res": {"Yes": 0.9872721203890439, "No": 0.012727783472426774}, "ground_truth": 0}, {"key": "34102400", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9500329412392784, "res": {"Yes": 0.9500329412392784, "No": 0.049966654801668085}, "ground_truth": 0}, {"key": "36133399", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7947463276207107, "res": {"Yes": 0.7947463276207107, "No": 0.20525316716526595}, "ground_truth": 0}, {"key": "36133399", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9889481543784513, "res": {"Yes": 0.9889481543784513, "No": 0.011051755443176324}, "ground_truth": 0}, {"key": "36133399", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9893311818947765, "res": {"Yes": 0.9893311818947765, "No": 0.0106687251570073}, "ground_truth": 1}, {"key": "36133399", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9987040592574046, "res": {"Yes": 0.9987040592574046, "No": 0.0012958824335762944}, "ground_truth": 0}, {"key": "36133399", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9599342446715078, "res": {"Yes": 0.9599342446715078, "No": 0.04006563478495668}, "ground_truth": 0}, {"key": "34314544", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.6533083081574058, "res": {"Yes": 0.6533083081574058, "No": 0.34669158440728415}, "ground_truth": 0}, {"key": "34314544", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.7714750864908039, "res": {"Yes": 0.7714750864908039, "No": 0.2285248092827708}, "ground_truth": 0}, {"key": "34314544", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8560904264872974, "res": {"Yes": 0.8560904264872974, "No": 0.14390944951652873}, "ground_truth": 1}, {"key": "34314544", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.61306559024594, "res": {"Yes": 0.61306559024594, "No": 0.3869345003570852}, "ground_truth": 0}, {"key": "34314544", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.5868408618988165, "res": {"Yes": 0.5868408618988165, "No": 0.41315881086265616}, "ground_truth": 0}, {"key": "33460074", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.13867985551844106, "res": {"No": 0.8613197095144783, "Yes": 0.13867985551844106}, "ground_truth": 0}, {"key": "33460074", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.3479168028709127, "res": {"No": 0.6520828660256637, "Yes": 0.3479168028709127}, "ground_truth": 0}, {"key": "33460074", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5385518128621757, "res": {"Yes": 0.5385518128621757, "No": 0.46144800343501524}, "ground_truth": 1}, {"key": "33460074", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9385525940334494, "res": {"Yes": 0.9385525940334494, "No": 0.06144711455825865}, "ground_truth": 0}, {"key": "33460074", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.518217469259059, "res": {"Yes": 0.518217469259059, "No": 0.48178226469092916}, "ground_truth": 0}, {"key": "36191495", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.07268400008688114, "res": {"No": 0.9273159090584336, "Yes": 0.07268400008688114}, "ground_truth": 0}, {"key": "36191495", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9825109895585035, "res": {"Yes": 0.9825109895585035, "No": 0.017488959200223274}, "ground_truth": 0}, {"key": "36191495", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9871029776491839, "res": {"Yes": 0.9871029776491839, "No": 0.012896945223976409}, "ground_truth": 1}, {"key": "36191495", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9851787216198539, "res": {"Yes": 0.9851787216198539, "No": 0.014821180595094342}, "ground_truth": 0}, {"key": "36191495", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9897133423143071, "res": {"Yes": 0.9897133423143071, "No": 0.010286437489643648}, "ground_truth": 0}, {"key": "39532668", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.21730695191190358, "res": {"No": 0.782692649050231, "Yes": 0.21730695191190358}, "ground_truth": 0}, {"key": "39532668", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.1329338955843725, "res": {"No": 0.867065965778162, "Yes": 0.1329338955843725}, "ground_truth": 0}, {"key": "39532668", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.26785971152113924, "res": {"No": 0.7321400925992283, "Yes": 0.26785971152113924}, "ground_truth": 1}, {"key": "39532668", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9245408343610407, "res": {"Yes": 0.9245408343610407, "No": 0.07545906916483427}, "ground_truth": 0}, {"key": "39532668", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.79418080464431, "res": {"Yes": 0.79418080464431, "No": 0.20581876067387134}, "ground_truth": 0}, {"key": "20328247", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.009976652158465275, "res": {"No": 0.9900232324575617, "Yes": 0.009976652158465275}, "ground_truth": 0}, {"key": "20328247", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9088419293778791, "res": {"Yes": 0.9088419293778791, "No": 0.09115774606082508}, "ground_truth": 0}, {"key": "20328247", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9975665380806806, "res": {"Yes": 0.9975665380806806, "No": 0.00243341806224846}, "ground_truth": 1}, {"key": "20328247", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.51044386920926, "res": {"Yes": 0.51044386920926, "No": 0.48955571659785263}, "ground_truth": 0}, {"key": "20328247", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9716339209495808, "res": {"Yes": 0.9716339209495808, "No": 0.028365813752567796}, "ground_truth": 0}, {"key": "39112675", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9983321581965542, "res": {"Yes": 0.9983321581965542, "No": 0.0016678181562889628}, "ground_truth": 0}, {"key": "39112675", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9929514530562542, "res": {"Yes": 0.9929514530562542, "No": 0.007048491194690093}, "ground_truth": 0}, {"key": "39112675", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997921898755158, "res": {"Yes": 0.9997921898755158, "No": 0.00020778368403894147}, "ground_truth": 1}, {"key": "39112675", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999174400582596, "res": {"Yes": 0.9999174400582596, "No": 8.249210220950518e-05}, "ground_truth": 0}, {"key": "39112675", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9891156073658017, "res": {"Yes": 0.9891156073658017, "No": 0.010884247701556013}, "ground_truth": 0}, {"key": "31620300", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.20729926606402482, "res": {"No": 0.7927003877406015, "Yes": 0.20729926606402482}, "ground_truth": 0}, {"key": "31620300", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9600824622809233, "res": {"Yes": 0.9600824622809233, "No": 0.039917266672931724}, "ground_truth": 0}, {"key": "31620300", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.958915826604904, "res": {"Yes": 0.958915826604904, "No": 0.041084001137146754}, "ground_truth": 1}, {"key": "31620300", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9746467727345001, "res": {"Yes": 0.9746467727345001, "No": 0.025353015535098153}, "ground_truth": 0}, {"key": "31620300", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9605328619311172, "res": {"Yes": 0.9605328619311172, "No": 0.03946636455839644}, "ground_truth": 0}, {"key": "37518509", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9279664591559795, "res": {"Yes": 0.9279664591559795, "No": 0.07203335524136785}, "ground_truth": 0}, {"key": "37518509", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.7771266835597005, "res": {"Yes": 0.7771266835597005, "No": 0.22287315076896155}, "ground_truth": 0}, {"key": "37518509", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9884906348683069, "res": {"Yes": 0.9884906348683069, "No": 0.011509321577413047}, "ground_truth": 1}, {"key": "37518509", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9898976371673259, "res": {"Yes": 0.9898976371673259, "No": 0.010102256138813444}, "ground_truth": 0}, {"key": "37518509", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.848727084152074, "res": {"Yes": 0.848727084152074, "No": 0.15127245958064817}, "ground_truth": 0}, {"key": "35454095", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.6745872072461035, "res": {"Yes": 0.6745872072461035, "No": 0.32541236029251513}, "ground_truth": 0}, {"key": "35454095", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.5492911555906195, "res": {"Yes": 0.5492911555906195, "No": 0.4507087433427974}, "ground_truth": 0}, {"key": "35454095", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8266526948708135, "res": {"Yes": 0.8266526948708135, "No": 0.17334721027508676}, "ground_truth": 1}, {"key": "35454095", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.995103143586587, "res": {"Yes": 0.995103143586587, "No": 0.004896902581333815}, "ground_truth": 0}, {"key": "35454095", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9654676535050033, "res": {"Yes": 0.9654676535050033, "No": 0.03453224685664352}, "ground_truth": 0}, {"key": "38542788", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8535308005948555, "res": {"Yes": 0.8535308005948555, "No": 0.14646877692962343}, "ground_truth": 0}, {"key": "38542788", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9529040380442718, "res": {"Yes": 0.9529040380442718, "No": 0.04709572718572708}, "ground_truth": 0}, {"key": "38542788", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9624922902328187, "res": {"Yes": 0.9624922902328187, "No": 0.03750741245771364}, "ground_truth": 1}, {"key": "38542788", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8793336801187148, "res": {"Yes": 0.8793336801187148, "No": 0.12066608213756917}, "ground_truth": 0}, {"key": "38542788", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8224779625120305, "res": {"Yes": 0.8224779625120305, "No": 0.17752152674208163}, "ground_truth": 0}, {"key": "23944937", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.893372456959861, "res": {"Yes": 0.893372456959861, "No": 0.10662732158955668}, "ground_truth": 0}, {"key": "23944937", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9976951560412505, "res": {"Yes": 0.9976951560412505, "No": 0.0023047570716420176}, "ground_truth": 0}, {"key": "23944937", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9943026030769387, "res": {"Yes": 0.9943026030769387, "No": 0.005697393439767677}, "ground_truth": 1}, {"key": "23944937", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9992566388299146, "res": {"Yes": 0.9992566388299146, "No": 0.0007432690245344449}, "ground_truth": 0}, {"key": "23944937", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9979344294330839, "res": {"Yes": 0.9979344294330839, "No": 0.0020655046497089907}, "ground_truth": 0}, {"key": "31753944", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.26591610890488776, "res": {"No": 0.7340834975186808, "Yes": 0.26591610890488776}, "ground_truth": 0}, {"key": "31753944", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.6485748251502278, "res": {"Yes": 0.6485748251502278, "No": 0.3514248459930555}, "ground_truth": 0}, {"key": "31753944", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7136317400042346, "res": {"Yes": 0.7136317400042346, "No": 0.28636788029295746}, "ground_truth": 1}, {"key": "31753944", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9624827952932095, "res": {"Yes": 0.9624827952932095, "No": 0.03751706060489631}, "ground_truth": 0}, {"key": "31753944", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8508648042080678, "res": {"Yes": 0.8508648042080678, "No": 0.14913494675507843}, "ground_truth": 0}, {"key": "35527214", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9696617932845886, "res": {"Yes": 0.9696617932845886, "No": 0.030338116098064513}, "ground_truth": 0}, {"key": "35527214", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9967862296509482, "res": {"Yes": 0.9967862296509482, "No": 0.0032137589965334852}, "ground_truth": 0}, {"key": "35527214", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.99493117208508, "res": {"Yes": 0.99493117208508, "No": 0.005068790753423501}, "ground_truth": 1}, {"key": "35527214", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996818656154786, "res": {"Yes": 0.9996818656154786, "No": 0.0003180175461925955}, "ground_truth": 0}, {"key": "35527214", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9979467757546309, "res": {"Yes": 0.9979467757546309, "No": 0.002053165357219708}, "ground_truth": 0}, {"key": "40400404", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.6366414211752983, "res": {"Yes": 0.6366414211752983, "No": 0.363358110759971}, "ground_truth": 0}, {"key": "40400404", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.5090330924033888, "res": {"Yes": 0.5090330924033888, "No": 0.49096673645991507}, "ground_truth": 0}, {"key": "40400404", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.90119343967003, "res": {"Yes": 0.90119343967003, "No": 0.09880617264657118}, "ground_truth": 1}, {"key": "40400404", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.49930650166508433, "res": {"No": 0.5006931452867932, "Yes": 0.49930650166508433}, "ground_truth": 0}, {"key": "40400404", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.7216189135525178, "res": {"Yes": 0.7216189135525178, "No": 0.27838086080320174}, "ground_truth": 0}, {"key": "21713119", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8599581445903072, "res": {"Yes": 0.8599581445903072, "No": 0.14004146803240453}, "ground_truth": 0}, {"key": "21713119", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.998228336352135, "res": {"Yes": 0.998228336352135, "No": 0.0017716269035717006}, "ground_truth": 0}, {"key": "21713119", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9765668492453451, "res": {"Yes": 0.9765668492453451, "No": 0.02343294126961452}, "ground_truth": 1}, {"key": "21713119", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996785326816946, "res": {"Yes": 0.9996785326816946, "No": 0.0003214058188906752}, "ground_truth": 0}, {"key": "21713119", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.998368284996844, "res": {"Yes": 0.998368284996844, "No": 0.0016315375933433714}, "ground_truth": 0}, {"key": "28730678", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9974163887831973, "res": {"Yes": 0.9974163887831973, "No": 0.0025836261043249457}, "ground_truth": 0}, {"key": "28730678", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997328588885428, "res": {"Yes": 0.9997328588885428, "No": 0.00026709424081138705}, "ground_truth": 0}, {"key": "28730678", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9936888355896446, "res": {"Yes": 0.9936888355896446, "No": 0.006311183882373216}, "ground_truth": 1}, {"key": "28730678", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9858150165207363, "res": {"Yes": 0.9858150165207363, "No": 0.014184950504668305}, "ground_truth": 0}, {"key": "28730678", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9672982286757057, "res": {"Yes": 0.9672982286757057, "No": 0.032701654697145366}, "ground_truth": 0}, {"key": "36823733", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9874789785280256, "res": {"Yes": 0.9874789785280256, "No": 0.01252080648061728}, "ground_truth": 0}, {"key": "36823733", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9437016438842235, "res": {"Yes": 0.9437016438842235, "No": 0.05629811081969152}, "ground_truth": 0}, {"key": "36823733", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9880214410521058, "res": {"Yes": 0.9880214410521058, "No": 0.011978510814377114}, "ground_truth": 1}, {"key": "36823733", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9903173031232313, "res": {"Yes": 0.9903173031232313, "No": 0.009682578879537291}, "ground_truth": 0}, {"key": "36823733", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9632418533243322, "res": {"Yes": 0.9632418533243322, "No": 0.03675769454004161}, "ground_truth": 0}, {"key": "35988862", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7870071779171788, "res": {"Yes": 0.7870071779171788, "No": 0.2129925479574497}, "ground_truth": 0}, {"key": "35988862", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9811425794991734, "res": {"Yes": 0.9811425794991734, "No": 0.01885732210364369}, "ground_truth": 0}, {"key": "35988862", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9614209390607374, "res": {"Yes": 0.9614209390607374, "No": 0.038578791365578145}, "ground_truth": 1}, {"key": "35988862", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9790178800824181, "res": {"Yes": 0.9790178800824181, "No": 0.020981938138290794}, "ground_truth": 0}, {"key": "35988862", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.950059732545977, "res": {"Yes": 0.950059732545977, "No": 0.04994004964690234}, "ground_truth": 0}, {"key": "40499665", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8996732204313447, "res": {"Yes": 0.8996732204313447, "No": 0.10032671184293952}, "ground_truth": 0}, {"key": "40499665", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.3180745785360733, "res": {"No": 0.681925339503524, "Yes": 0.3180745785360733}, "ground_truth": 0}, {"key": "40499665", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7500893546612499, "res": {"Yes": 0.7500893546612499, "No": 0.24991023140063465}, "ground_truth": 1}, {"key": "40499665", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5297536456639621, "res": {"Yes": 0.5297536456639621, "No": 0.4702461020203246}, "ground_truth": 0}, {"key": "40499665", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8586949262174752, "res": {"Yes": 0.8586949262174752, "No": 0.14130500059539414}, "ground_truth": 0}, {"key": "32829820", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7661949405801883, "res": {"Yes": 0.7661949405801883, "No": 0.23380463628691522}, "ground_truth": 0}, {"key": "32829820", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9851713219702652, "res": {"Yes": 0.9851713219702652, "No": 0.014828588589771027}, "ground_truth": 0}, {"key": "32829820", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9943633092914275, "res": {"Yes": 0.9943633092914275, "No": 0.005636733206442511}, "ground_truth": 1}, {"key": "32829820", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995546523966963, "res": {"Yes": 0.9995546523966963, "No": 0.0004452833664126561}, "ground_truth": 0}, {"key": "32829820", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.997492647871668, "res": {"Yes": 0.997492647871668, "No": 0.002507377231111942}, "ground_truth": 0}, {"key": "20583553", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7645675951952775, "res": {"Yes": 0.7645675951952775, "No": 0.2354323424519325}, "ground_truth": 0}, {"key": "20583553", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9384164143012266, "res": {"Yes": 0.9384164143012266, "No": 0.061583456981825005}, "ground_truth": 0}, {"key": "20583553", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9974978685623505, "res": {"Yes": 0.9974978685623505, "No": 0.0025021440366232535}, "ground_truth": 1}, {"key": "20583553", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9947038277360776, "res": {"Yes": 0.9947038277360776, "No": 0.005296145431996785}, "ground_truth": 0}, {"key": "20583553", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9968427224167777, "res": {"Yes": 0.9968427224167777, "No": 0.0031572894135806174}, "ground_truth": 0}, {"key": "30501550", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9673677267134583, "res": {"Yes": 0.9673677267134583, "No": 0.03263209388131827}, "ground_truth": 0}, {"key": "30501550", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9596936356348084, "res": {"Yes": 0.9596936356348084, "No": 0.040306037796438786}, "ground_truth": 0}, {"key": "30501550", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9660840792463079, "res": {"Yes": 0.9660840792463079, "No": 0.033915572645503975}, "ground_truth": 1}, {"key": "30501550", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9846439138025661, "res": {"Yes": 0.9846439138025661, "No": 0.015355933997651948}, "ground_truth": 0}, {"key": "30501550", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9846036761665659, "res": {"Yes": 0.9846036761665659, "No": 0.015396069952818487}, "ground_truth": 0}, {"key": "38755897", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9921225787924295, "res": {"Yes": 0.9921225787924295, "No": 0.007877418738751116}, "ground_truth": 0}, {"key": "38755897", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9963762199962233, "res": {"Yes": 0.9963762199962233, "No": 0.003623793042173943}, "ground_truth": 0}, {"key": "38755897", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9924781234774885, "res": {"Yes": 0.9924781234774885, "No": 0.007521763955325175}, "ground_truth": 1}, {"key": "38755897", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999106566949187, "res": {"Yes": 0.999106566949187, "No": 0.0008933704727658791}, "ground_truth": 0}, {"key": "38755897", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9983264568377118, "res": {"Yes": 0.9983264568377118, "No": 0.0016735127810919323}, "ground_truth": 0}, {"key": "35507201", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8890085997548534, "res": {"Yes": 0.8890085997548534, "No": 0.11099140645401859}, "ground_truth": 0}, {"key": "35507201", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9047289797697009, "res": {"Yes": 0.9047289797697009, "No": 0.09527083964330879}, "ground_truth": 0}, {"key": "35507201", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8519508428523482, "res": {"Yes": 0.8519508428523482, "No": 0.1480491153201764}, "ground_truth": 1}, {"key": "35507201", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9111691353077039, "res": {"Yes": 0.9111691353077039, "No": 0.08883078598913016}, "ground_truth": 0}, {"key": "35507201", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9289178671812415, "res": {"Yes": 0.9289178671812415, "No": 0.0710819672156713}, "ground_truth": 0}, {"key": "36453511", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9827194405668299, "res": {"Yes": 0.9827194405668299, "No": 0.017280567062979816}, "ground_truth": 0}, {"key": "36453511", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8242599710375318, "res": {"Yes": 0.8242599710375318, "No": 0.17573960439466893}, "ground_truth": 0}, {"key": "36453511", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9858992451963743, "res": {"Yes": 0.9858992451963743, "No": 0.014100645535308706}, "ground_truth": 1}, {"key": "36453511", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9951992376743274, "res": {"Yes": 0.9951992376743274, "No": 0.004800784197179703}, "ground_truth": 0}, {"key": "36453511", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9880762471334248, "res": {"Yes": 0.9880762471334248, "No": 0.011923691210153459}, "ground_truth": 0}, {"key": "38066835", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.04837362750617779, "res": {"No": 0.9516262935978047, "Yes": 0.04837362750617779}, "ground_truth": 0}, {"key": "38066835", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9654882085301539, "res": {"Yes": 0.9654882085301539, "No": 0.0345116476113402}, "ground_truth": 0}, {"key": "38066835", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9975919345526354, "res": {"Yes": 0.9975919345526354, "No": 0.002408058398405757}, "ground_truth": 1}, {"key": "38066835", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.997652325907557, "res": {"Yes": 0.997652325907557, "No": 0.002347693317264585}, "ground_truth": 0}, {"key": "38066835", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.994062941700511, "res": {"Yes": 0.994062941700511, "No": 0.0059370375438876025}, "ground_truth": 0}, {"key": "39697181", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9147423345028786, "res": {"Yes": 0.9147423345028786, "No": 0.08525747358285092}, "ground_truth": 0}, {"key": "39697181", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9863446113797645, "res": {"Yes": 0.9863446113797645, "No": 0.01365526660359366}, "ground_truth": 0}, {"key": "39697181", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9875058363465213, "res": {"Yes": 0.9875058363465213, "No": 0.012494046851472868}, "ground_truth": 1}, {"key": "39697181", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9898596703715653, "res": {"Yes": 0.9898596703715653, "No": 0.010140286771560723}, "ground_truth": 0}, {"key": "39697181", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9867560225869907, "res": {"Yes": 0.9867560225869907, "No": 0.01324394251185606}, "ground_truth": 0}, {"key": "21820893", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9012738026719765, "res": {"Yes": 0.9012738026719765, "No": 0.09872606345261305}, "ground_truth": 0}, {"key": "21820893", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9947749612517257, "res": {"Yes": 0.9947749612517257, "No": 0.005225080889536974}, "ground_truth": 0}, {"key": "21820893", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9939941506508204, "res": {"Yes": 0.9939941506508204, "No": 0.0060058733313182785}, "ground_truth": 1}, {"key": "21820893", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9780338148830492, "res": {"Yes": 0.9780338148830492, "No": 0.02196610929465258}, "ground_truth": 0}, {"key": "21820893", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9686310567322833, "res": {"Yes": 0.9686310567322833, "No": 0.03136883170193022}, "ground_truth": 0}, {"key": "40519933", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.01930295472942979, "res": {"No": 0.9806968446850699, "Yes": 0.01930295472942979}, "ground_truth": 0}, {"key": "40519933", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9980043626374671, "res": {"Yes": 0.9980043626374671, "No": 0.001995279775753247}, "ground_truth": 0}, {"key": "40519933", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8435433460449242, "res": {"Yes": 0.8435433460449242, "No": 0.15645646290576737}, "ground_truth": 1}, {"key": "40519933", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.10225917060627829, "res": {"No": 0.8977407459293256, "Yes": 0.10225917060627829}, "ground_truth": 0}, {"key": "40519933", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.4140419424593824, "res": {"No": 0.5859566899919568, "Yes": 0.4140419424593824}, "ground_truth": 0}, {"key": "30446033", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7009264773311226, "res": {"Yes": 0.7009264773311226, "No": 0.2990730562955111}, "ground_truth": 0}, {"key": "30446033", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.972563162512143, "res": {"Yes": 0.972563162512143, "No": 0.02743657230296003}, "ground_truth": 0}, {"key": "30446033", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9872144959844047, "res": {"Yes": 0.9872144959844047, "No": 0.012785438152513207}, "ground_truth": 1}, {"key": "30446033", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9891761361691139, "res": {"Yes": 0.9891761361691139, "No": 0.010823798546816867}, "ground_truth": 0}, {"key": "30446033", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.7522682524182315, "res": {"Yes": 0.7522682524182315, "No": 0.24773132178414647}, "ground_truth": 0}, {"key": "40216291", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.25652983637140747, "res": {"No": 0.7434697498918089, "Yes": 0.25652983637140747}, "ground_truth": 0}, {"key": "40216291", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9794526047064925, "res": {"Yes": 0.9794526047064925, "No": 0.020547432192162297}, "ground_truth": 0}, {"key": "40216291", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9655566688986422, "res": {"Yes": 0.9655566688986422, "No": 0.0344432064933736}, "ground_truth": 1}, {"key": "40216291", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9457922811423973, "res": {"Yes": 0.9457922811423973, "No": 0.054207521884976205}, "ground_truth": 0}, {"key": "40216291", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9662031506498377, "res": {"Yes": 0.9662031506498377, "No": 0.033796663310176514}, "ground_truth": 0}, {"key": "33479118", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.973177402629175, "res": {"Yes": 0.973177402629175, "No": 0.026822236911548357}, "ground_truth": 0}, {"key": "33479118", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9956560068516859, "res": {"Yes": 0.9956560068516859, "No": 0.00434389008233725}, "ground_truth": 0}, {"key": "33479118", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9923335934581402, "res": {"Yes": 0.9923335934581402, "No": 0.007666190977953066}, "ground_truth": 1}, {"key": "33479118", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996150415555235, "res": {"Yes": 0.9996150415555235, "No": 0.0003849549282539171}, "ground_truth": 0}, {"key": "33479118", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.999738221619941, "res": {"Yes": 0.999738221619941, "No": 0.0002615460998340981}, "ground_truth": 0}, {"key": "22297373", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.5269274531361502, "res": {"Yes": 0.5269274531361502, "No": 0.4730722281159994}, "ground_truth": 0}, {"key": "22297373", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9970574208977148, "res": {"Yes": 0.9970574208977148, "No": 0.002942586271921501}, "ground_truth": 0}, {"key": "22297373", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994444991477612, "res": {"Yes": 0.9994444991477612, "No": 0.0005554892505593965}, "ground_truth": 1}, {"key": "22297373", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9966769187970247, "res": {"Yes": 0.9966769187970247, "No": 0.0033230587830928894}, "ground_truth": 0}, {"key": "22297373", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9939337255951126, "res": {"Yes": 0.9939337255951126, "No": 0.006066084134279183}, "ground_truth": 0}, {"key": "36463668", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9583031582097528, "res": {"Yes": 0.9583031582097528, "No": 0.04169652028758951}, "ground_truth": 0}, {"key": "36463668", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9707435274201057, "res": {"Yes": 0.9707435274201057, "No": 0.02925616377949766}, "ground_truth": 0}, {"key": "36463668", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9972608230101602, "res": {"Yes": 0.9972608230101602, "No": 0.002739001886353225}, "ground_truth": 1}, {"key": "36463668", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9986946616961625, "res": {"Yes": 0.9986946616961625, "No": 0.0013052125211635744}, "ground_truth": 0}, {"key": "36463668", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9921212880417941, "res": {"Yes": 0.9921212880417941, "No": 0.007878584683002192}, "ground_truth": 0}, {"key": "35264615", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7393449925196558, "res": {"Yes": 0.7393449925196558, "No": 0.26065468334080943}, "ground_truth": 0}, {"key": "35264615", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.993693889106437, "res": {"Yes": 0.993693889106437, "No": 0.006306111729810348}, "ground_truth": 0}, {"key": "35264615", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9968904684389098, "res": {"Yes": 0.9968904684389098, "No": 0.0031095694589842802}, "ground_truth": 1}, {"key": "35264615", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9976868454747524, "res": {"Yes": 0.9976868454747524, "No": 0.0023131043087819695}, "ground_truth": 0}, {"key": "35264615", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.6946609320620855, "res": {"Yes": 0.6946609320620855, "No": 0.3053386165544152}, "ground_truth": 0}, {"key": "39898482", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.5542581644318411, "res": {"Yes": 0.5542581644318411, "No": 0.4457413624466725}, "ground_truth": 0}, {"key": "39898482", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9260000410550977, "res": {"Yes": 0.9260000410550977, "No": 0.07399968814822667}, "ground_truth": 0}, {"key": "39898482", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9537265345894029, "res": {"Yes": 0.9537265345894029, "No": 0.046273196259836856}, "ground_truth": 1}, {"key": "39898482", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9375479929198686, "res": {"Yes": 0.9375479929198686, "No": 0.062451751273093714}, "ground_truth": 0}, {"key": "39898482", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9722785349317541, "res": {"Yes": 0.9722785349317541, "No": 0.02772139448026138}, "ground_truth": 0}, {"key": "37228721", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9858029610626126, "res": {"Yes": 0.9858029610626126, "No": 0.01419695179133889}, "ground_truth": 0}, {"key": "37228721", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.6705146883084832, "res": {"Yes": 0.6705146883084832, "No": 0.32948521403230235}, "ground_truth": 0}, {"key": "37228721", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9874528343806878, "res": {"Yes": 0.9874528343806878, "No": 0.012547103962420699}, "ground_truth": 1}, {"key": "37228721", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9361122261116545, "res": {"Yes": 0.9361122261116545, "No": 0.06388769516943854}, "ground_truth": 0}, {"key": "37228721", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9337858497178353, "res": {"Yes": 0.9337858497178353, "No": 0.06621403694977002}, "ground_truth": 0}, {"key": "24535799", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.015552288200888921, "res": {"No": 0.9844474585415448, "Yes": 0.015552288200888921}, "ground_truth": 0}, {"key": "24535799", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9805636388151261, "res": {"Yes": 0.9805636388151261, "No": 0.01943612949158567}, "ground_truth": 0}, {"key": "24535799", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9942257650482892, "res": {"Yes": 0.9942257650482892, "No": 0.005774006915762948}, "ground_truth": 1}, {"key": "24535799", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9922077730395374, "res": {"Yes": 0.9922077730395374, "No": 0.00779199308570878}, "ground_truth": 0}, {"key": "24535799", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9992461640771738, "res": {"Yes": 0.9992461640771738, "No": 0.0007537370430907355}, "ground_truth": 0}, {"key": "35177759", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.6129885266145262, "res": {"Yes": 0.6129885266145262, "No": 0.38701131060884236}, "ground_truth": 0}, {"key": "35177759", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9932878319119604, "res": {"Yes": 0.9932878319119604, "No": 0.0067121517588088755}, "ground_truth": 0}, {"key": "35177759", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9440580623513242, "res": {"Yes": 0.9440580623513242, "No": 0.05594187912375445}, "ground_truth": 1}, {"key": "35177759", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.99143789143295, "res": {"Yes": 0.99143789143295, "No": 0.00856199547292403}, "ground_truth": 0}, {"key": "35177759", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9654230982009157, "res": {"Yes": 0.9654230982009157, "No": 0.034576784165520076}, "ground_truth": 0}, {"key": "34364829", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9841917470749674, "res": {"Yes": 0.9841917470749674, "No": 0.015808104933482756}, "ground_truth": 0}, {"key": "34364829", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9188844491047395, "res": {"Yes": 0.9188844491047395, "No": 0.08111535513196293}, "ground_truth": 0}, {"key": "34364829", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9817027922372413, "res": {"Yes": 0.9817027922372413, "No": 0.01829721164261769}, "ground_truth": 1}, {"key": "34364829", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9981992311333804, "res": {"Yes": 0.9981992311333804, "No": 0.0018007167480083024}, "ground_truth": 0}, {"key": "34364829", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9942511001326032, "res": {"Yes": 0.9942511001326032, "No": 0.005748903143624214}, "ground_truth": 0}, {"key": "38090732", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9937920818444949, "res": {"Yes": 0.9937920818444949, "No": 0.0062077982190757225}, "ground_truth": 0}, {"key": "38090732", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9974539806191515, "res": {"Yes": 0.9974539806191515, "No": 0.0025459671029567973}, "ground_truth": 0}, {"key": "38090732", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998408123716953, "res": {"Yes": 0.9998408123716953, "No": 0.00015916661680160396}, "ground_truth": 1}, {"key": "38090732", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994942778505762, "res": {"Yes": 0.9994942778505762, "No": 0.0005056436933784076}, "ground_truth": 0}, {"key": "38090732", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9994786774858282, "res": {"Yes": 0.9994786774858282, "No": 0.000521304413443641}, "ground_truth": 0}, {"key": "30651479", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.07075496275300723, "res": {"No": 0.9292447774482635, "Yes": 0.07075496275300723}, "ground_truth": 0}, {"key": "30651479", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9949962014191774, "res": {"Yes": 0.9949962014191774, "No": 0.005003780261057674}, "ground_truth": 0}, {"key": "30651479", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9780370120808158, "res": {"Yes": 0.9780370120808158, "No": 0.021962913458252947}, "ground_truth": 1}, {"key": "30651479", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9964971846217122, "res": {"Yes": 0.9964971846217122, "No": 0.0035028106146076862}, "ground_truth": 0}, {"key": "30651479", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9670009559078634, "res": {"Yes": 0.9670009559078634, "No": 0.03299895449457886}, "ground_truth": 0}, {"key": "39380921", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.17938167152305706, "res": {"No": 0.8206180853813975, "Yes": 0.17938167152305706}, "ground_truth": 0}, {"key": "39380921", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9890944959075804, "res": {"Yes": 0.9890944959075804, "No": 0.010905168803384262}, "ground_truth": 0}, {"key": "39380921", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8997769551349508, "res": {"Yes": 0.8997769551349508, "No": 0.10022235614483069}, "ground_truth": 1}, {"key": "39380921", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9694800249180789, "res": {"Yes": 0.9694800249180789, "No": 0.030519880134946057}, "ground_truth": 0}, {"key": "39380921", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.973211391443488, "res": {"Yes": 0.973211391443488, "No": 0.026788400562199707}, "ground_truth": 0}, {"key": "39037490", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9817566813459397, "res": {"Yes": 0.9817566813459397, "No": 0.01824329433814821}, "ground_truth": 0}, {"key": "39037490", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.998520278668796, "res": {"Yes": 0.998520278668796, "No": 0.001479666876480971}, "ground_truth": 0}, {"key": "39037490", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999148177982098, "res": {"Yes": 0.9999148177982098, "No": 8.512157749003165e-05}, "ground_truth": 1}, {"key": "39037490", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994784392601138, "res": {"Yes": 0.9994784392601138, "No": 0.0005214954543772884}, "ground_truth": 0}, {"key": "39037490", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9981347394646659, "res": {"Yes": 0.9981347394646659, "No": 0.001865100201469741}, "ground_truth": 0}, {"key": "35917499", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9152136716860796, "res": {"Yes": 0.9152136716860796, "No": 0.08478621570887077}, "ground_truth": 0}, {"key": "35917499", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9744396900949495, "res": {"Yes": 0.9744396900949495, "No": 0.025560169961089913}, "ground_truth": 0}, {"key": "35917499", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9310208990479871, "res": {"Yes": 0.9310208990479871, "No": 0.06897884607588979}, "ground_truth": 1}, {"key": "35917499", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9989912784020214, "res": {"Yes": 0.9989912784020214, "No": 0.0010086545146104138}, "ground_truth": 0}, {"key": "35917499", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9901347128745687, "res": {"Yes": 0.9901347128745687, "No": 0.009865178947865068}, "ground_truth": 0}, {"key": "34908073", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9614614907282932, "res": {"Yes": 0.9614614907282932, "No": 0.03853835113975531}, "ground_truth": 0}, {"key": "34908073", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9483296548406426, "res": {"Yes": 0.9483296548406426, "No": 0.05167021890987661}, "ground_truth": 0}, {"key": "34908073", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9977216191587508, "res": {"Yes": 0.9977216191587508, "No": 0.002278303937268667}, "ground_truth": 1}, {"key": "34908073", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9929211264982133, "res": {"Yes": 0.9929211264982133, "No": 0.0070788585926539775}, "ground_truth": 0}, {"key": "34908073", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9783402104421464, "res": {"Yes": 0.9783402104421464, "No": 0.021659747389758323}, "ground_truth": 0}, {"key": "36344759", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.6631899029636726, "res": {"Yes": 0.6631899029636726, "No": 0.33680959779633524}, "ground_truth": 0}, {"key": "36344759", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9875511846432439, "res": {"Yes": 0.9875511846432439, "No": 0.012448613845260572}, "ground_truth": 0}, {"key": "36344759", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999360558389981, "res": {"Yes": 0.999360558389981, "No": 0.0006393837730897672}, "ground_truth": 1}, {"key": "36344759", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9957987908745377, "res": {"Yes": 0.9957987908745377, "No": 0.004201060439360104}, "ground_truth": 0}, {"key": "36344759", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9796606801685834, "res": {"Yes": 0.9796606801685834, "No": 0.020339265502539484}, "ground_truth": 0}, {"key": "39984637", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8976223004432391, "res": {"Yes": 0.8976223004432391, "No": 0.1023775836509129}, "ground_truth": 0}, {"key": "39984637", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.39397591539173343, "res": {"No": 0.6060239331777628, "Yes": 0.39397591539173343}, "ground_truth": 0}, {"key": "39984637", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8697581539010582, "res": {"Yes": 0.8697581539010582, "No": 0.13024174219669502}, "ground_truth": 1}, {"key": "39984637", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.92054079038419, "res": {"Yes": 0.92054079038419, "No": 0.07945913616290864}, "ground_truth": 0}, {"key": "39984637", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.6891836967039409, "res": {"Yes": 0.6891836967039409, "No": 0.3108160254130063}, "ground_truth": 0}, {"key": "17917326", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9279440907940774, "res": {"Yes": 0.9279440907940774, "No": 0.07205525671021581}, "ground_truth": 0}, {"key": "17917326", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9815937792135753, "res": {"Yes": 0.9815937792135753, "No": 0.01840605670097541}, "ground_truth": 0}, {"key": "17917326", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9847292112913069, "res": {"Yes": 0.9847292112913069, "No": 0.015270575075235401}, "ground_truth": 1}, {"key": "17917326", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9704713108651173, "res": {"Yes": 0.9704713108651173, "No": 0.02952844331470489}, "ground_truth": 0}, {"key": "17917326", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9271349100229389, "res": {"Yes": 0.9271349100229389, "No": 0.0728646695577176}, "ground_truth": 0}, {"key": "32193638", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.4616072073809221, "res": {"No": 0.5383924628035898, "Yes": 0.4616072073809221}, "ground_truth": 0}, {"key": "32193638", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9949408458478951, "res": {"Yes": 0.9949408458478951, "No": 0.0050591019078647555}, "ground_truth": 0}, {"key": "32193638", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9945004058437084, "res": {"Yes": 0.9945004058437084, "No": 0.005499569843912588}, "ground_truth": 1}, {"key": "32193638", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9958885213671118, "res": {"Yes": 0.9958885213671118, "No": 0.004111446359884399}, "ground_truth": 0}, {"key": "32193638", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.951049514568309, "res": {"Yes": 0.951049514568309, "No": 0.04895038719630634}, "ground_truth": 0}, {"key": "34564692", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.6622521926007694, "res": {"Yes": 0.6622521926007694, "No": 0.3377476027069105}, "ground_truth": 0}, {"key": "34564692", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9184501261859128, "res": {"Yes": 0.9184501261859128, "No": 0.08154973495600826}, "ground_truth": 0}, {"key": "34564692", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9539337923448072, "res": {"Yes": 0.9539337923448072, "No": 0.046066065791573754}, "ground_truth": 1}, {"key": "34564692", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6330469430189807, "res": {"Yes": 0.6330469430189807, "No": 0.36695296496469987}, "ground_truth": 0}, {"key": "34564692", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.2979540497010983, "res": {"No": 0.7020458385558378, "Yes": 0.2979540497010983}, "ground_truth": 0}, {"key": "39329284", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.6736092257251658, "res": {"Yes": 0.6736092257251658, "No": 0.32639038958469496}, "ground_truth": 0}, {"key": "39329284", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9986063301610697, "res": {"Yes": 0.9986063301610697, "No": 0.0013936367244443842}, "ground_truth": 0}, {"key": "39329284", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9977680200797747, "res": {"Yes": 0.9977680200797747, "No": 0.0022317697463988995}, "ground_truth": 1}, {"key": "39329284", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9991410763862468, "res": {"Yes": 0.9991410763862468, "No": 0.000858916969619926}, "ground_truth": 0}, {"key": "39329284", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.37747312684218676, "res": {"No": 0.6225260525705929, "Yes": 0.37747312684218676}, "ground_truth": 0}, {"key": "37438541", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.05240409719330224, "res": {"No": 0.947595843548873, "Yes": 0.05240409719330224}, "ground_truth": 0}, {"key": "37438541", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9873871640388245, "res": {"Yes": 0.9873871640388245, "No": 0.012612782438689001}, "ground_truth": 0}, {"key": "37438541", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8936984410264243, "res": {"Yes": 0.8936984410264243, "No": 0.10630145564328566}, "ground_truth": 1}, {"key": "37438541", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9803843993406717, "res": {"Yes": 0.9803843993406717, "No": 0.01961564729235403}, "ground_truth": 0}, {"key": "37438541", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.7760923408016122, "res": {"Yes": 0.7760923408016122, "No": 0.22390755795656625}, "ground_truth": 0}, {"key": "34652757", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.06327378339485745, "res": {"No": 0.9367259457944294, "Yes": 0.06327378339485745}, "ground_truth": 0}, {"key": "34652757", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.23072397415982188, "res": {"No": 0.7692757893692932, "Yes": 0.23072397415982188}, "ground_truth": 0}, {"key": "34652757", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.60020648578176, "res": {"Yes": 0.60020648578176, "No": 0.39979321421853975}, "ground_truth": 1}, {"key": "34652757", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9682600936592405, "res": {"Yes": 0.9682600936592405, "No": 0.03173976397855171}, "ground_truth": 0}, {"key": "34652757", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9894908322651216, "res": {"Yes": 0.9894908322651216, "No": 0.010509023604277514}, "ground_truth": 0}, {"key": "31361004", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9804678237984548, "res": {"Yes": 0.9804678237984548, "No": 0.019532111973033103}, "ground_truth": 0}, {"key": "31361004", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9898988022775306, "res": {"Yes": 0.9898988022775306, "No": 0.010101114648365175}, "ground_truth": 0}, {"key": "31361004", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9961716371382587, "res": {"Yes": 0.9961716371382587, "No": 0.003828351447170517}, "ground_truth": 1}, {"key": "31361004", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9881139646998032, "res": {"Yes": 0.9881139646998032, "No": 0.011885952955752822}, "ground_truth": 0}, {"key": "31361004", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9976906439751102, "res": {"Yes": 0.9976906439751102, "No": 0.002309286907863561}, "ground_truth": 0}, {"key": "26150727", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.705034035521303, "res": {"Yes": 0.705034035521303, "No": 0.2949655526908556}, "ground_truth": 0}, {"key": "26150727", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9610060322273147, "res": {"Yes": 0.9610060322273147, "No": 0.03899363310101399}, "ground_truth": 0}, {"key": "26150727", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9956427723813045, "res": {"Yes": 0.9956427723813045, "No": 0.004357259387977493}, "ground_truth": 1}, {"key": "26150727", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9984892570228207, "res": {"Yes": 0.9984892570228207, "No": 0.0015106806018521707}, "ground_truth": 0}, {"key": "26150727", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9960664930853933, "res": {"Yes": 0.9960664930853933, "No": 0.003933417649872978}, "ground_truth": 0}, {"key": "36997402", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.6858618612320522, "res": {"Yes": 0.6858618612320522, "No": 0.3141374342931538}, "ground_truth": 0}, {"key": "36997402", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.645829397910013, "res": {"Yes": 0.645829397910013, "No": 0.3541702674449391}, "ground_truth": 0}, {"key": "36997402", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7537666361551713, "res": {"Yes": 0.7537666361551713, "No": 0.2462329352028086}, "ground_truth": 1}, {"key": "36997402", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9869186721313769, "res": {"Yes": 0.9869186721313769, "No": 0.01308121561951102}, "ground_truth": 0}, {"key": "36997402", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8817249232684186, "res": {"Yes": 0.8817249232684186, "No": 0.11827436811335196}, "ground_truth": 0}, {"key": "37430643", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.033490699686283994, "res": {"No": 0.9665090564700018, "Yes": 0.033490699686283994}, "ground_truth": 0}, {"key": "37430643", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9946156138521475, "res": {"Yes": 0.9946156138521475, "No": 0.0053843461303938606}, "ground_truth": 0}, {"key": "37430643", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998716780239691, "res": {"Yes": 0.9998716780239691, "No": 0.0001282296193489443}, "ground_truth": 1}, {"key": "37430643", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9986494866650274, "res": {"Yes": 0.9986494866650274, "No": 0.0013504672976759804}, "ground_truth": 0}, {"key": "37430643", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9153969249592446, "res": {"Yes": 0.9153969249592446, "No": 0.08460286903514619}, "ground_truth": 0}, {"key": "36964631", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7152612631675646, "res": {"Yes": 0.7152612631675646, "No": 0.28473823342058613}, "ground_truth": 0}, {"key": "36964631", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8578882094493427, "res": {"Yes": 0.8578882094493427, "No": 0.14211133205704266}, "ground_truth": 0}, {"key": "36964631", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9602362058736452, "res": {"Yes": 0.9602362058736452, "No": 0.039763614236683544}, "ground_truth": 1}, {"key": "36964631", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9821279448926266, "res": {"Yes": 0.9821279448926266, "No": 0.017872095376957876}, "ground_truth": 0}, {"key": "36964631", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9074170926299973, "res": {"Yes": 0.9074170926299973, "No": 0.09258256851765849}, "ground_truth": 0}, {"key": "35502013", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9713564745524264, "res": {"Yes": 0.9713564745524264, "No": 0.028643429469833715}, "ground_truth": 0}, {"key": "35502013", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9019384991541449, "res": {"Yes": 0.9019384991541449, "No": 0.09806142607332193}, "ground_truth": 0}, {"key": "35502013", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9909601503645088, "res": {"Yes": 0.9909601503645088, "No": 0.009039805683474057}, "ground_truth": 1}, {"key": "35502013", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9982566057803197, "res": {"Yes": 0.9982566057803197, "No": 0.001743372191922723}, "ground_truth": 0}, {"key": "35502013", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9616323249207693, "res": {"Yes": 0.9616323249207693, "No": 0.038367547265528795}, "ground_truth": 0}, {"key": "33987664", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8615162678702853, "res": {"Yes": 0.8615162678702853, "No": 0.138483623807866}, "ground_truth": 0}, {"key": "33987664", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9373554347138173, "res": {"Yes": 0.9373554347138173, "No": 0.06264433654814831}, "ground_truth": 0}, {"key": "33987664", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9713884220208752, "res": {"Yes": 0.9713884220208752, "No": 0.028611283814437977}, "ground_truth": 1}, {"key": "33987664", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9984454042378197, "res": {"Yes": 0.9984454042378197, "No": 0.0015545466799504264}, "ground_truth": 0}, {"key": "33987664", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9960133907099635, "res": {"Yes": 0.9960133907099635, "No": 0.00398663604570694}, "ground_truth": 0}, {"key": "35203721", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.752569520936432, "res": {"Yes": 0.752569520936432, "No": 0.24742996666120537}, "ground_truth": 0}, {"key": "35203721", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9956752756701409, "res": {"Yes": 0.9956752756701409, "No": 0.004324680561234865}, "ground_truth": 0}, {"key": "35203721", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9909697547968291, "res": {"Yes": 0.9909697547968291, "No": 0.009030137361370517}, "ground_truth": 1}, {"key": "35203721", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9894369025265064, "res": {"Yes": 0.9894369025265064, "No": 0.010562950883654757}, "ground_truth": 0}, {"key": "35203721", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9986883631485319, "res": {"Yes": 0.9986883631485319, "No": 0.0013116474978226568}, "ground_truth": 0}, {"key": "39028348", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7465014123814949, "res": {"Yes": 0.7465014123814949, "No": 0.25349839178865424}, "ground_truth": 0}, {"key": "39028348", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9855114677284378, "res": {"Yes": 0.9855114677284378, "No": 0.014488428248443189}, "ground_truth": 0}, {"key": "39028348", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9991419100699088, "res": {"Yes": 0.9991419100699088, "No": 0.0008579812601773297}, "ground_truth": 1}, {"key": "39028348", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9992350975871115, "res": {"Yes": 0.9992350975871115, "No": 0.0007648861184426877}, "ground_truth": 0}, {"key": "39028348", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9783152268871962, "res": {"Yes": 0.9783152268871962, "No": 0.021684739818312773}, "ground_truth": 0}, {"key": "37459383", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9446771122812592, "res": {"Yes": 0.9446771122812592, "No": 0.05532258749781726}, "ground_truth": 0}, {"key": "37459383", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9846628634570297, "res": {"Yes": 0.9846628634570297, "No": 0.015337139096691819}, "ground_truth": 0}, {"key": "37459383", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9951300543257476, "res": {"Yes": 0.9951300543257476, "No": 0.004869949556254053}, "ground_truth": 1}, {"key": "37459383", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9889521161126931, "res": {"Yes": 0.9889521161126931, "No": 0.011047792994183965}, "ground_truth": 0}, {"key": "37459383", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9894720340968546, "res": {"Yes": 0.9894720340968546, "No": 0.010527922139402436}, "ground_truth": 0}, {"key": "34020070", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9940685966424522, "res": {"Yes": 0.9940685966424522, "No": 0.005931231587091881}, "ground_truth": 0}, {"key": "34020070", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9954099133949224, "res": {"Yes": 0.9954099133949224, "No": 0.004590010350047114}, "ground_truth": 0}, {"key": "34020070", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9836507419279934, "res": {"Yes": 0.9836507419279934, "No": 0.016349080855632237}, "ground_truth": 1}, {"key": "34020070", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.98966522955937, "res": {"Yes": 0.98966522955937, "No": 0.010334546157208399}, "ground_truth": 0}, {"key": "34020070", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.995910277287439, "res": {"Yes": 0.995910277287439, "No": 0.004089512128613103}, "ground_truth": 0}, {"key": "35176615", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.2717805336377779, "res": {"No": 0.7282192125293967, "Yes": 0.2717805336377779}, "ground_truth": 0}, {"key": "35176615", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9549151066623021, "res": {"Yes": 0.9549151066623021, "No": 0.04508461146050881}, "ground_truth": 0}, {"key": "35176615", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9922677369437666, "res": {"Yes": 0.9922677369437666, "No": 0.007732203490639642}, "ground_truth": 1}, {"key": "35176615", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6802066499459366, "res": {"Yes": 0.6802066499459366, "No": 0.31979305356605764}, "ground_truth": 0}, {"key": "35176615", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9715266827738717, "res": {"Yes": 0.9715266827738717, "No": 0.028473040123202453}, "ground_truth": 0}, {"key": "33296389", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8032026586322809, "res": {"Yes": 0.8032026586322809, "No": 0.1967972437101339}, "ground_truth": 0}, {"key": "33296389", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.6035456133149565, "res": {"Yes": 0.6035456133149565, "No": 0.396454087128673}, "ground_truth": 0}, {"key": "33296389", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9986706434782372, "res": {"Yes": 0.9986706434782372, "No": 0.001329279251238828}, "ground_truth": 1}, {"key": "33296389", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9795171145588095, "res": {"Yes": 0.9795171145588095, "No": 0.02048288744957562}, "ground_truth": 0}, {"key": "33296389", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9844010163593032, "res": {"Yes": 0.9844010163593032, "No": 0.015598966763954392}, "ground_truth": 0}, {"key": "35399504", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9979193586413376, "res": {"Yes": 0.9979193586413376, "No": 0.0020806248952092918}, "ground_truth": 0}, {"key": "35399504", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9361273772106441, "res": {"Yes": 0.9361273772106441, "No": 0.06387240867308673}, "ground_truth": 0}, {"key": "35399504", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9879982860529447, "res": {"Yes": 0.9879982860529447, "No": 0.01200160365747498}, "ground_truth": 1}, {"key": "35399504", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9704068630349376, "res": {"Yes": 0.9704068630349376, "No": 0.029592900334079127}, "ground_truth": 0}, {"key": "35399504", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9980979216481499, "res": {"Yes": 0.9980979216481499, "No": 0.0019020184777514548}, "ground_truth": 0}, {"key": "34807886", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9711330309568379, "res": {"Yes": 0.9711330309568379, "No": 0.028866728790163127}, "ground_truth": 0}, {"key": "34807886", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9056160397065263, "res": {"Yes": 0.9056160397065263, "No": 0.09438381233850082}, "ground_truth": 0}, {"key": "34807886", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9979973582682281, "res": {"Yes": 0.9979973582682281, "No": 0.002002593877441498}, "ground_truth": 1}, {"key": "34807886", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9911960993058082, "res": {"Yes": 0.9911960993058082, "No": 0.008803772114208375}, "ground_truth": 0}, {"key": "34807886", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9989662967396862, "res": {"Yes": 0.9989662967396862, "No": 0.001033715175230361}, "ground_truth": 0}, {"key": "37629813", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.6661487933209395, "res": {"Yes": 0.6661487933209395, "No": 0.33385083084627065}, "ground_truth": 0}, {"key": "37629813", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9782100343066295, "res": {"Yes": 0.9782100343066295, "No": 0.021789784934319358}, "ground_truth": 0}, {"key": "37629813", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9948310004932295, "res": {"Yes": 0.9948310004932295, "No": 0.005168991898985355}, "ground_truth": 1}, {"key": "37629813", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9978723449736593, "res": {"Yes": 0.9978723449736593, "No": 0.002127453402977173}, "ground_truth": 0}, {"key": "37629813", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.968631391878687, "res": {"Yes": 0.968631391878687, "No": 0.03136824824708671}, "ground_truth": 0}, {"key": "28084389", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.12869443569600214, "res": {"No": 0.8713055151241824, "Yes": 0.12869443569600214}, "ground_truth": 0}, {"key": "28084389", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995988435847601, "res": {"Yes": 0.9995988435847601, "No": 0.000401067426061775}, "ground_truth": 0}, {"key": "28084389", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999345202033818, "res": {"Yes": 0.999345202033818, "No": 0.0006547755845787826}, "ground_truth": 1}, {"key": "28084389", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9975410485861548, "res": {"Yes": 0.9975410485861548, "No": 0.002458971022695271}, "ground_truth": 0}, {"key": "28084389", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9972220498642198, "res": {"Yes": 0.9972220498642198, "No": 0.002777963982491149}, "ground_truth": 0}, {"key": "35391734", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9480018217990558, "res": {"Yes": 0.9480018217990558, "No": 0.051997933581896486}, "ground_truth": 0}, {"key": "35391734", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9494597085658367, "res": {"Yes": 0.9494597085658367, "No": 0.05053997862270088}, "ground_truth": 0}, {"key": "35391734", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9777256986645555, "res": {"Yes": 0.9777256986645555, "No": 0.022274133644651035}, "ground_truth": 1}, {"key": "35391734", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9496612404315065, "res": {"Yes": 0.9496612404315065, "No": 0.050338262760078724}, "ground_truth": 0}, {"key": "35391734", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9789215749145228, "res": {"Yes": 0.9789215749145228, "No": 0.021078226304537775}, "ground_truth": 0}, {"key": "40214591", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7340052777488086, "res": {"Yes": 0.7340052777488086, "No": 0.26599429973439453}, "ground_truth": 0}, {"key": "40214591", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9192555088986015, "res": {"Yes": 0.9192555088986015, "No": 0.08074392111683072}, "ground_truth": 0}, {"key": "40214591", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9962421504553446, "res": {"Yes": 0.9962421504553446, "No": 0.0037578360720651248}, "ground_truth": 1}, {"key": "40214591", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9676444576415867, "res": {"Yes": 0.9676444576415867, "No": 0.03235531418387636}, "ground_truth": 0}, {"key": "40214591", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9967253480067366, "res": {"Yes": 0.9967253480067366, "No": 0.0032746827002777187}, "ground_truth": 0}, {"key": "26283171", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.2972203785550034, "res": {"No": 0.7027793117484151, "Yes": 0.2972203785550034}, "ground_truth": 0}, {"key": "26283171", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.7179363377150606, "res": {"Yes": 0.7179363377150606, "No": 0.2820631491379508}, "ground_truth": 0}, {"key": "26283171", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6509005587352702, "res": {"Yes": 0.6509005587352702, "No": 0.3490990800777294}, "ground_truth": 1}, {"key": "26283171", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9278347900575687, "res": {"Yes": 0.9278347900575687, "No": 0.07216505160972822}, "ground_truth": 0}, {"key": "26283171", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.4552438816515688, "res": {"No": 0.5447558316657123, "Yes": 0.4552438816515688}, "ground_truth": 0}, {"key": "37084030", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.990531293205836, "res": {"Yes": 0.990531293205836, "No": 0.009468522528062554}, "ground_truth": 0}, {"key": "37084030", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9755042033595123, "res": {"Yes": 0.9755042033595123, "No": 0.024495726627801973}, "ground_truth": 0}, {"key": "37084030", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9987725496342079, "res": {"Yes": 0.9987725496342079, "No": 0.001227403888420746}, "ground_truth": 1}, {"key": "37084030", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9967106700383206, "res": {"Yes": 0.9967106700383206, "No": 0.00328930823247097}, "ground_truth": 0}, {"key": "37084030", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9984320888590081, "res": {"Yes": 0.9984320888590081, "No": 0.0015678719890862133}, "ground_truth": 0}, {"key": "39027295", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9801795743216783, "res": {"Yes": 0.9801795743216783, "No": 0.019820403342695466}, "ground_truth": 0}, {"key": "39027295", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9888351873496612, "res": {"Yes": 0.9888351873496612, "No": 0.011164766583488977}, "ground_truth": 0}, {"key": "39027295", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9920237792166872, "res": {"Yes": 0.9920237792166872, "No": 0.007976145533996785}, "ground_truth": 1}, {"key": "39027295", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9992987697769106, "res": {"Yes": 0.9992987697769106, "No": 0.000701162420134994}, "ground_truth": 0}, {"key": "39027295", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9578781511184944, "res": {"Yes": 0.9578781511184944, "No": 0.042121722394308767}, "ground_truth": 0}, {"key": "14018647", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9815374991845359, "res": {"Yes": 0.9815374991845359, "No": 0.018462382415013086}, "ground_truth": 0}, {"key": "14018647", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995569162406144, "res": {"Yes": 0.9995569162406144, "No": 0.0004429684541714136}, "ground_truth": 0}, {"key": "14018647", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9987218910778332, "res": {"Yes": 0.9987218910778332, "No": 0.0012780995895083505}, "ground_truth": 1}, {"key": "14018647", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.989850671598206, "res": {"Yes": 0.989850671598206, "No": 0.010149214151390704}, "ground_truth": 0}, {"key": "14018647", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9993757994942419, "res": {"Yes": 0.9993757994942419, "No": 0.0006241960851095218}, "ground_truth": 0}, {"key": "37424289", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8993870259297677, "res": {"Yes": 0.8993870259297677, "No": 0.100612457195657}, "ground_truth": 0}, {"key": "37424289", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.271497512965003, "res": {"No": 0.7285022243665962, "Yes": 0.271497512965003}, "ground_truth": 0}, {"key": "37424289", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9623923656206538, "res": {"Yes": 0.9623923656206538, "No": 0.03760752934861469}, "ground_truth": 1}, {"key": "37424289", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9307171099510649, "res": {"Yes": 0.9307171099510649, "No": 0.06928275841926537}, "ground_truth": 0}, {"key": "37424289", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9457258548085263, "res": {"Yes": 0.9457258548085263, "No": 0.054274053626895975}, "ground_truth": 0}, {"key": "37498031", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9078887862372109, "res": {"Yes": 0.9078887862372109, "No": 0.0921106138214266}, "ground_truth": 0}, {"key": "37498031", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9820259581654558, "res": {"Yes": 0.9820259581654558, "No": 0.01797388334455237}, "ground_truth": 0}, {"key": "37498031", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9685083738180318, "res": {"Yes": 0.9685083738180318, "No": 0.031491582219189296}, "ground_truth": 1}, {"key": "37498031", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9937353338623607, "res": {"Yes": 0.9937353338623607, "No": 0.0062644887381848305}, "ground_truth": 0}, {"key": "37498031", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9958080130098431, "res": {"Yes": 0.9958080130098431, "No": 0.004191893658539398}, "ground_truth": 0}, {"key": "30104095", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.0001019113744820423, "res": {"No": 0.9998980116571646, "Yes": 0.0001019113744820423}, "ground_truth": 0}, {"key": "30104095", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9632786883970855, "res": {"Yes": 0.9632786883970855, "No": 0.0367211278071192}, "ground_truth": 0}, {"key": "30104095", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9956363923228607, "res": {"Yes": 0.9956363923228607, "No": 0.004363635669262383}, "ground_truth": 1}, {"key": "30104095", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.892817452049195, "res": {"Yes": 0.892817452049195, "No": 0.10718240245756144}, "ground_truth": 0}, {"key": "30104095", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9807484405032848, "res": {"Yes": 0.9807484405032848, "No": 0.019251538487756424}, "ground_truth": 0}, {"key": "37911407", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9968235385625487, "res": {"Yes": 0.9968235385625487, "No": 0.0031764851253038686}, "ground_truth": 0}, {"key": "37911407", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8472094333660722, "res": {"Yes": 0.8472094333660722, "No": 0.15279006866651743}, "ground_truth": 0}, {"key": "37911407", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9462019423472452, "res": {"Yes": 0.9462019423472452, "No": 0.053797891716924784}, "ground_truth": 1}, {"key": "37911407", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9883420279557835, "res": {"Yes": 0.9883420279557835, "No": 0.011657902532052095}, "ground_truth": 0}, {"key": "37911407", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9719330368738117, "res": {"Yes": 0.9719330368738117, "No": 0.02806687566312664}, "ground_truth": 0}, {"key": "39177472", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.5543694152090846, "res": {"Yes": 0.5543694152090846, "No": 0.4456302084123625}, "ground_truth": 0}, {"key": "39177472", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.6620949131392311, "res": {"Yes": 0.6620949131392311, "No": 0.3379045229102637}, "ground_truth": 0}, {"key": "39177472", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8670196396812232, "res": {"Yes": 0.8670196396812232, "No": 0.13298001835228224}, "ground_truth": 1}, {"key": "39177472", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5989179670968519, "res": {"Yes": 0.5989179670968519, "No": 0.4010818892221669}, "ground_truth": 0}, {"key": "39177472", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.5046555306396725, "res": {"Yes": 0.5046555306396725, "No": 0.49534412793948557}, "ground_truth": 0}, {"key": "32325454", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7830028524674102, "res": {"Yes": 0.7830028524674102, "No": 0.2169970326411327}, "ground_truth": 0}, {"key": "32325454", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9879136933093359, "res": {"Yes": 0.9879136933093359, "No": 0.012086260106121788}, "ground_truth": 0}, {"key": "32325454", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9875322238425229, "res": {"Yes": 0.9875322238425229, "No": 0.012467717192524081}, "ground_truth": 1}, {"key": "32325454", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9838540117296373, "res": {"Yes": 0.9838540117296373, "No": 0.016145923113081436}, "ground_truth": 0}, {"key": "32325454", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9794466604266725, "res": {"Yes": 0.9794466604266725, "No": 0.020553346594188834}, "ground_truth": 0}, {"key": "38395319", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.4140035625502527, "res": {"No": 0.5859962434034439, "Yes": 0.4140035625502527}, "ground_truth": 0}, {"key": "38395319", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997534721429874, "res": {"Yes": 0.9997534721429874, "No": 0.0002464639062874624}, "ground_truth": 0}, {"key": "38395319", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9980297740482581, "res": {"Yes": 0.9980297740482581, "No": 0.001970174227081393}, "ground_truth": 1}, {"key": "38395319", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9958096675462312, "res": {"Yes": 0.9958096675462312, "No": 0.0041902704524795905}, "ground_truth": 0}, {"key": "38395319", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9986050243842861, "res": {"Yes": 0.9986050243842861, "No": 0.001394921692071766}, "ground_truth": 0}, {"key": "38235895", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9899103693126154, "res": {"Yes": 0.9899103693126154, "No": 0.010089409193479306}, "ground_truth": 0}, {"key": "38235895", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9982296416363611, "res": {"Yes": 0.9982296416363611, "No": 0.0017702785002496084}, "ground_truth": 0}, {"key": "38235895", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9582071554169477, "res": {"Yes": 0.9582071554169477, "No": 0.04179250340138673}, "ground_truth": 1}, {"key": "38235895", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.998971767194002, "res": {"Yes": 0.998971767194002, "No": 0.0010281680012100688}, "ground_truth": 0}, {"key": "38235895", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9812920159496761, "res": {"Yes": 0.9812920159496761, "No": 0.018707783720279043}, "ground_truth": 0}, {"key": "26543267", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9905701218030144, "res": {"Yes": 0.9905701218030144, "No": 0.009429638671358428}, "ground_truth": 0}, {"key": "26543267", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9979442848825871, "res": {"Yes": 0.9979442848825871, "No": 0.002055588700081707}, "ground_truth": 0}, {"key": "26543267", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9954226660711765, "res": {"Yes": 0.9954226660711765, "No": 0.004577341078438727}, "ground_truth": 1}, {"key": "26543267", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9983872938104403, "res": {"Yes": 0.9983872938104403, "No": 0.0016126153128600558}, "ground_truth": 0}, {"key": "26543267", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9971042264639798, "res": {"Yes": 0.9971042264639798, "No": 0.0028956536720223533}, "ground_truth": 0}, {"key": "39054728", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8380413215417886, "res": {"Yes": 0.8380413215417886, "No": 0.161958038322912}, "ground_truth": 0}, {"key": "39054728", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9587994423830873, "res": {"Yes": 0.9587994423830873, "No": 0.04119985261248188}, "ground_truth": 0}, {"key": "39054728", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9944400391144939, "res": {"Yes": 0.9944400391144939, "No": 0.005559792453075044}, "ground_truth": 1}, {"key": "39054728", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9904432309643965, "res": {"Yes": 0.9904432309643965, "No": 0.009556692352679499}, "ground_truth": 0}, {"key": "39054728", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9763573580899657, "res": {"Yes": 0.9763573580899657, "No": 0.023641950059967477}, "ground_truth": 0}, {"key": "39158443", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9923691558581743, "res": {"Yes": 0.9923691558581743, "No": 0.0076306099183986245}, "ground_truth": 0}, {"key": "39158443", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9782691248684166, "res": {"Yes": 0.9782691248684166, "No": 0.02173083411583939}, "ground_truth": 0}, {"key": "39158443", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997883761554854, "res": {"Yes": 0.9997883761554854, "No": 0.00021152719207744679}, "ground_truth": 1}, {"key": "39158443", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9758834789674489, "res": {"Yes": 0.9758834789674489, "No": 0.024116404147875064}, "ground_truth": 0}, {"key": "39158443", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9887660741408029, "res": {"Yes": 0.9887660741408029, "No": 0.011233875133344834}, "ground_truth": 0}, {"key": "36254201", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9971116890193571, "res": {"Yes": 0.9971116890193571, "No": 0.002888251434308617}, "ground_truth": 0}, {"key": "36254201", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9967582761698894, "res": {"Yes": 0.9967582761698894, "No": 0.003241743424231164}, "ground_truth": 0}, {"key": "36254201", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9935587784805671, "res": {"Yes": 0.9935587784805671, "No": 0.006441152469403944}, "ground_truth": 1}, {"key": "36254201", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994835587017387, "res": {"Yes": 0.9994835587017387, "No": 0.0005163612192956117}, "ground_truth": 0}, {"key": "36254201", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9859658715608465, "res": {"Yes": 0.9859658715608465, "No": 0.0140340887253622}, "ground_truth": 0}, {"key": "23434347", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9992719864805292, "res": {"Yes": 0.9992719864805292, "No": 0.0007280095439733638}, "ground_truth": 0}, {"key": "23434347", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.996883598092854, "res": {"Yes": 0.996883598092854, "No": 0.0031164108747203673}, "ground_truth": 1}, {"key": "23434347", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999656371833687, "res": {"Yes": 0.999656371833687, "No": 0.000343599885874713}, "ground_truth": 0}, {"key": "23434347", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9976748668195532, "res": {"Yes": 0.9976748668195532, "No": 0.002325126111287154}, "ground_truth": 0}, {"key": "34397620", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.38023010502017857, "res": {"No": 0.619769570815925, "Yes": 0.38023010502017857}, "ground_truth": 0}, {"key": "34397620", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9729838199191677, "res": {"Yes": 0.9729838199191677, "No": 0.027015986381207314}, "ground_truth": 0}, {"key": "34397620", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3009836604277188, "res": {"No": 0.6990155925909898, "Yes": 0.3009836604277188}, "ground_truth": 1}, {"key": "34397620", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8396971901943637, "res": {"Yes": 0.8396971901943637, "No": 0.16030242365279276}, "ground_truth": 0}, {"key": "34397620", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.7820275569431269, "res": {"Yes": 0.7820275569431269, "No": 0.2179720577209966}, "ground_truth": 0}, {"key": "34340916", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9801041034001234, "res": {"Yes": 0.9801041034001234, "No": 0.019895762691497898}, "ground_truth": 0}, {"key": "34340916", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.998006857152489, "res": {"Yes": 0.998006857152489, "No": 0.001993177858774439}, "ground_truth": 0}, {"key": "34340916", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999722971309536, "res": {"Yes": 0.999722971309536, "No": 0.0002769861646877389}, "ground_truth": 1}, {"key": "34340916", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997403667405498, "res": {"Yes": 0.9997403667405498, "No": 0.0002595440812090956}, "ground_truth": 0}, {"key": "34340916", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9993299614361606, "res": {"Yes": 0.9993299614361606, "No": 0.0006700065965705793}, "ground_truth": 0}, {"key": "30375089", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9817174972734993, "res": {"Yes": 0.9817174972734993, "No": 0.018282481001186304}, "ground_truth": 0}, {"key": "30375089", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9678924337879177, "res": {"Yes": 0.9678924337879177, "No": 0.03210746247525024}, "ground_truth": 0}, {"key": "30375089", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9889404529746864, "res": {"Yes": 0.9889404529746864, "No": 0.011059383785907699}, "ground_truth": 1}, {"key": "30375089", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9953422355858165, "res": {"Yes": 0.9953422355858165, "No": 0.004657690754094017}, "ground_truth": 0}, {"key": "30375089", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9664960280157325, "res": {"Yes": 0.9664960280157325, "No": 0.033503914373735955}, "ground_truth": 0}, {"key": "35807797", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.5424674474870708, "res": {"Yes": 0.5424674474870708, "No": 0.45753234377019775}, "ground_truth": 0}, {"key": "35807797", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9258975292651973, "res": {"Yes": 0.9258975292651973, "No": 0.07410220426177967}, "ground_truth": 0}, {"key": "35807797", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9390950689599666, "res": {"Yes": 0.9390950689599666, "No": 0.06090487854521033}, "ground_truth": 1}, {"key": "35807797", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6469306094006889, "res": {"Yes": 0.6469306094006889, "No": 0.3530689907241754}, "ground_truth": 0}, {"key": "35807797", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8963014346357975, "res": {"Yes": 0.8963014346357975, "No": 0.1036982333555767}, "ground_truth": 0}, {"key": "34188172", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9658849743802841, "res": {"Yes": 0.9658849743802841, "No": 0.03411487748352369}, "ground_truth": 0}, {"key": "34188172", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9960960640491323, "res": {"Yes": 0.9960960640491323, "No": 0.0039039203999352693}, "ground_truth": 0}, {"key": "34188172", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9474295200156782, "res": {"Yes": 0.9474295200156782, "No": 0.05257008222747084}, "ground_truth": 1}, {"key": "34188172", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999059975285092, "res": {"Yes": 0.9999059975285092, "No": 9.396990373429642e-05}, "ground_truth": 0}, {"key": "34188172", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9991454832075968, "res": {"Yes": 0.9991454832075968, "No": 0.0008544891952649842}, "ground_truth": 0}, {"key": "37075567", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7702675322490328, "res": {"Yes": 0.7702675322490328, "No": 0.22973222211507702}, "ground_truth": 0}, {"key": "37075567", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8488327148104282, "res": {"Yes": 0.8488327148104282, "No": 0.15116725592985106}, "ground_truth": 0}, {"key": "37075567", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7418352985966027, "res": {"Yes": 0.7418352985966027, "No": 0.2581644226383958}, "ground_truth": 1}, {"key": "37075567", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8447698378247201, "res": {"Yes": 0.8447698378247201, "No": 0.15522995202521114}, "ground_truth": 0}, {"key": "37075567", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9379305005042557, "res": {"Yes": 0.9379305005042557, "No": 0.06206934817882919}, "ground_truth": 0}, {"key": "35559735", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9519772733568902, "res": {"Yes": 0.9519772733568902, "No": 0.04802239569059506}, "ground_truth": 0}, {"key": "35559735", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9951408018878846, "res": {"Yes": 0.9951408018878846, "No": 0.0048590355853004325}, "ground_truth": 0}, {"key": "35559735", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9706969144058266, "res": {"Yes": 0.9706969144058266, "No": 0.02930289389760467}, "ground_truth": 1}, {"key": "35559735", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9980687087475996, "res": {"Yes": 0.9980687087475996, "No": 0.0019312805474373986}, "ground_truth": 0}, {"key": "35559735", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9984231809874922, "res": {"Yes": 0.9984231809874922, "No": 0.0015768195557931712}, "ground_truth": 0}, {"key": "33005019", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.05400507258880111, "res": {"No": 0.9459948253281777, "Yes": 0.05400507258880111}, "ground_truth": 0}, {"key": "33005019", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9921925169701081, "res": {"Yes": 0.9921925169701081, "No": 0.007807370933777777}, "ground_truth": 0}, {"key": "33005019", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9773394090853327, "res": {"Yes": 0.9773394090853327, "No": 0.0226605217158482}, "ground_truth": 1}, {"key": "33005019", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.996015164212986, "res": {"Yes": 0.996015164212986, "No": 0.003984787074147446}, "ground_truth": 0}, {"key": "33005019", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.7043260576316601, "res": {"Yes": 0.7043260576316601, "No": 0.29567372885006493}, "ground_truth": 0}, {"key": "30808252", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.4677503806681729, "res": {"No": 0.5322492416552901, "Yes": 0.4677503806681729}, "ground_truth": 0}, {"key": "30808252", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9848425049954461, "res": {"Yes": 0.9848425049954461, "No": 0.015157353276388863}, "ground_truth": 0}, {"key": "30808252", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9873160391788818, "res": {"Yes": 0.9873160391788818, "No": 0.012683714896134015}, "ground_truth": 1}, {"key": "30808252", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9785198443240892, "res": {"Yes": 0.9785198443240892, "No": 0.021480135525082378}, "ground_truth": 0}, {"key": "30808252", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9442922848296107, "res": {"Yes": 0.9442922848296107, "No": 0.05570741594876293}, "ground_truth": 0}, {"key": "15159017", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.23375210628710807, "res": {"No": 0.7662478481674488, "Yes": 0.23375210628710807}, "ground_truth": 0}, {"key": "15159017", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8148161206755757, "res": {"Yes": 0.8148161206755757, "No": 0.1851838802953805}, "ground_truth": 0}, {"key": "15159017", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9893123946740177, "res": {"Yes": 0.9893123946740177, "No": 0.01068746200300462}, "ground_truth": 1}, {"key": "15159017", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9953301881358418, "res": {"Yes": 0.9953301881358418, "No": 0.0046697861529935705}, "ground_truth": 0}, {"key": "15159017", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9892947801236445, "res": {"Yes": 0.9892947801236445, "No": 0.010705100166516577}, "ground_truth": 0}, {"key": "24493400", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9990732460019923, "res": {"Yes": 0.9990732460019923, "No": 0.0009266944114335391}, "ground_truth": 0}, {"key": "24493400", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9918815014658217, "res": {"Yes": 0.9918815014658217, "No": 0.008118377175631998}, "ground_truth": 0}, {"key": "24493400", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9994491455460549, "res": {"Yes": 0.9994491455460549, "No": 0.0005507736674927328}, "ground_truth": 1}, {"key": "24493400", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997024778684902, "res": {"Yes": 0.9997024778684902, "No": 0.0002974269230377975}, "ground_truth": 0}, {"key": "24493400", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9992504521514384, "res": {"Yes": 0.9992504521514384, "No": 0.0007493368069650733}, "ground_truth": 0}, {"key": "37791071", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9077448609582992, "res": {"Yes": 0.9077448609582992, "No": 0.09225493514367762}, "ground_truth": 0}, {"key": "37791071", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9978750770520927, "res": {"Yes": 0.9978750770520927, "No": 0.0021249287639812774}, "ground_truth": 0}, {"key": "37791071", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9708817167717521, "res": {"Yes": 0.9708817167717521, "No": 0.029118135088353505}, "ground_truth": 1}, {"key": "37791071", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.992042669513345, "res": {"Yes": 0.992042669513345, "No": 0.007957304238422863}, "ground_truth": 0}, {"key": "37791071", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9726938614134983, "res": {"Yes": 0.9726938614134983, "No": 0.02730597872097706}, "ground_truth": 0}, {"key": "33528627", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9743459729924838, "res": {"Yes": 0.9743459729924838, "No": 0.025653930069971593}, "ground_truth": 0}, {"key": "33528627", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9678009780813551, "res": {"Yes": 0.9678009780813551, "No": 0.03219898656705139}, "ground_truth": 0}, {"key": "33528627", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9685411390105382, "res": {"Yes": 0.9685411390105382, "No": 0.0314587850807935}, "ground_truth": 1}, {"key": "33528627", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9927869299651122, "res": {"Yes": 0.9927869299651122, "No": 0.0072130124457580585}, "ground_truth": 0}, {"key": "33528627", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9968740017387746, "res": {"Yes": 0.9968740017387746, "No": 0.003125978264943958}, "ground_truth": 0}, {"key": "39925662", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.980786511935172, "res": {"Yes": 0.980786511935172, "No": 0.019213475445642517}, "ground_truth": 0}, {"key": "39925662", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8147492433130752, "res": {"Yes": 0.8147492433130752, "No": 0.1852506511189448}, "ground_truth": 0}, {"key": "39925662", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9989505930130371, "res": {"Yes": 0.9989505930130371, "No": 0.001049376605683988}, "ground_truth": 1}, {"key": "39925662", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9823274854345034, "res": {"Yes": 0.9823274854345034, "No": 0.017672456170825794}, "ground_truth": 0}, {"key": "39925662", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9334356896666594, "res": {"Yes": 0.9334356896666594, "No": 0.06656395927090134}, "ground_truth": 0}, {"key": "29213416", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.977775957990373, "res": {"Yes": 0.977775957990373, "No": 0.022223926504690118}, "ground_truth": 0}, {"key": "29213416", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9953731828495659, "res": {"Yes": 0.9953731828495659, "No": 0.004626667650164654}, "ground_truth": 0}, {"key": "29213416", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9814835259214642, "res": {"Yes": 0.9814835259214642, "No": 0.018516310253234865}, "ground_truth": 1}, {"key": "29213416", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9761810165396788, "res": {"Yes": 0.9761810165396788, "No": 0.023818890372455864}, "ground_truth": 0}, {"key": "29213416", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9974027523990303, "res": {"Yes": 0.9974027523990303, "No": 0.002597216459365481}, "ground_truth": 0}, {"key": "34492745", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8059029686603497, "res": {"Yes": 0.8059029686603497, "No": 0.19409669815555644}, "ground_truth": 0}, {"key": "34492745", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8812284491675646, "res": {"Yes": 0.8812284491675646, "No": 0.11877142871620067}, "ground_truth": 0}, {"key": "34492745", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9799973895946457, "res": {"Yes": 0.9799973895946457, "No": 0.02000264628361791}, "ground_truth": 1}, {"key": "34492745", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8824060012606132, "res": {"Yes": 0.8824060012606132, "No": 0.11759389007639746}, "ground_truth": 0}, {"key": "34492745", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8971928640011131, "res": {"Yes": 0.8971928640011131, "No": 0.10280711667512474}, "ground_truth": 0}, {"key": "34191937", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.22299601001310554, "res": {"No": 0.7770035187414259, "Yes": 0.22299601001310554}, "ground_truth": 0}, {"key": "34191937", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.5535184633152492, "res": {"Yes": 0.5535184633152492, "No": 0.4464815057572137}, "ground_truth": 0}, {"key": "34191937", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9717868807985978, "res": {"Yes": 0.9717868807985978, "No": 0.028213034260614285}, "ground_truth": 1}, {"key": "34191937", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9282146305661967, "res": {"Yes": 0.9282146305661967, "No": 0.07178541193240393}, "ground_truth": 0}, {"key": "34191937", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8293645863463448, "res": {"Yes": 0.8293645863463448, "No": 0.1706353594774215}, "ground_truth": 0}, {"key": "34933372", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.14471161196727486, "res": {"No": 0.8552879099028371, "Yes": 0.14471161196727486}, "ground_truth": 0}, {"key": "34933372", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.7576735719188599, "res": {"Yes": 0.7576735719188599, "No": 0.24232596411700266}, "ground_truth": 0}, {"key": "34933372", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9567198414921599, "res": {"Yes": 0.9567198414921599, "No": 0.04327993700357832}, "ground_truth": 1}, {"key": "34933372", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7691992040841823, "res": {"Yes": 0.7691992040841823, "No": 0.23080047183604513}, "ground_truth": 0}, {"key": "34933372", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9153741593208128, "res": {"Yes": 0.9153741593208128, "No": 0.08462562180560262}, "ground_truth": 0}, {"key": "38714379", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.007704536251625095, "res": {"No": 0.9922953186987966, "Yes": 0.007704536251625095}, "ground_truth": 0}, {"key": "38714379", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.11176385176674955, "res": {"No": 0.8882359072092466, "Yes": 0.11176385176674955}, "ground_truth": 0}, {"key": "38714379", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.1629780907151621, "res": {"No": 0.8370217275403637, "Yes": 0.1629780907151621}, "ground_truth": 1}, {"key": "38714379", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8621454560814336, "res": {"Yes": 0.8621454560814336, "No": 0.13785449970591307}, "ground_truth": 0}, {"key": "38714379", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.5524621028883815, "res": {"Yes": 0.5524621028883815, "No": 0.44753741760632904}, "ground_truth": 0}, {"key": "39220660", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7907427779522168, "res": {"Yes": 0.7907427779522168, "No": 0.20925679437673136}, "ground_truth": 0}, {"key": "39220660", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.7763690825720114, "res": {"Yes": 0.7763690825720114, "No": 0.22363055427737233}, "ground_truth": 0}, {"key": "39220660", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6332763501442604, "res": {"Yes": 0.6332763501442604, "No": 0.36672339763833334}, "ground_truth": 1}, {"key": "39220660", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7913417709546575, "res": {"Yes": 0.7913417709546575, "No": 0.20865775607038886}, "ground_truth": 0}, {"key": "39220660", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8540607089130332, "res": {"Yes": 0.8540607089130332, "No": 0.1459387596093534}, "ground_truth": 0}, {"key": "41028780", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.018841937036842563, "res": {"No": 0.9811578471776428, "Yes": 0.018841937036842563}, "ground_truth": 0}, {"key": "41028780", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.5246542000384931, "res": {"Yes": 0.5246542000384931, "No": 0.47534541012674286}, "ground_truth": 0}, {"key": "41028780", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3134098562904566, "res": {"No": 0.6865899056525412, "Yes": 0.3134098562904566}, "ground_truth": 1}, {"key": "41028780", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.27376316710348014, "res": {"No": 0.7262366740273923, "Yes": 0.27376316710348014}, "ground_truth": 0}, {"key": "41028780", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9025479906079383, "res": {"Yes": 0.9025479906079383, "No": 0.09745175928351404}, "ground_truth": 0}, {"key": "39457108", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.13374981184143767, "res": {"No": 0.8662497474691252, "Yes": 0.13374981184143767}, "ground_truth": 0}, {"key": "39457108", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9118341121727941, "res": {"Yes": 0.9118341121727941, "No": 0.0881658128958257}, "ground_truth": 0}, {"key": "39457108", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8938074744183664, "res": {"Yes": 0.8938074744183664, "No": 0.10619242504267391}, "ground_truth": 1}, {"key": "39457108", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9130184960948348, "res": {"Yes": 0.9130184960948348, "No": 0.08698131095614306}, "ground_truth": 0}, {"key": "39457108", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9198902863663392, "res": {"Yes": 0.9198902863663392, "No": 0.08010943833114098}, "ground_truth": 0}, {"key": "38288018", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9566356194115363, "res": {"Yes": 0.9566356194115363, "No": 0.043364042287860206}, "ground_truth": 0}, {"key": "38288018", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9933385230673483, "res": {"Yes": 0.9933385230673483, "No": 0.0066614260215489204}, "ground_truth": 0}, {"key": "38288018", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9984022673360012, "res": {"Yes": 0.9984022673360012, "No": 0.0015977117973497116}, "ground_truth": 1}, {"key": "38288018", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9838347372344974, "res": {"Yes": 0.9838347372344974, "No": 0.016165208009401938}, "ground_truth": 0}, {"key": "38288018", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9914111755160482, "res": {"Yes": 0.9914111755160482, "No": 0.008588583994971893}, "ground_truth": 0}, {"key": "40106293", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9757641708434692, "res": {"Yes": 0.9757641708434692, "No": 0.024235736989916826}, "ground_truth": 0}, {"key": "40106293", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9757959539516589, "res": {"Yes": 0.9757959539516589, "No": 0.02420400412005514}, "ground_truth": 0}, {"key": "40106293", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9227048205642826, "res": {"Yes": 0.9227048205642826, "No": 0.077294768774969}, "ground_truth": 1}, {"key": "40106293", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9683890832091505, "res": {"Yes": 0.9683890832091505, "No": 0.031610700256329526}, "ground_truth": 0}, {"key": "40106293", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9023448002906294, "res": {"Yes": 0.9023448002906294, "No": 0.09765492379917724}, "ground_truth": 0}, {"key": "39948797", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9287507550277756, "res": {"Yes": 0.9287507550277756, "No": 0.0712488927717314}, "ground_truth": 0}, {"key": "39948797", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.921484456151385, "res": {"Yes": 0.921484456151385, "No": 0.07851500703849477}, "ground_truth": 0}, {"key": "39948797", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9801017031281133, "res": {"Yes": 0.9801017031281133, "No": 0.019898144356831897}, "ground_truth": 1}, {"key": "39948797", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9774247032625567, "res": {"Yes": 0.9774247032625567, "No": 0.022575263670295203}, "ground_truth": 0}, {"key": "39948797", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9449006871254417, "res": {"Yes": 0.9449006871254417, "No": 0.055099149418127394}, "ground_truth": 0}, {"key": "31853399", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.04337876687957495, "res": {"No": 0.9566209906514961, "Yes": 0.04337876687957495}, "ground_truth": 0}, {"key": "31853399", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9908487191973173, "res": {"Yes": 0.9908487191973173, "No": 0.009151232585219132}, "ground_truth": 0}, {"key": "31853399", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9990906190377165, "res": {"Yes": 0.9990906190377165, "No": 0.0009092680707024262}, "ground_truth": 1}, {"key": "31853399", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9921209338545575, "res": {"Yes": 0.9921209338545575, "No": 0.007879002259056063}, "ground_truth": 0}, {"key": "31853399", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9893680608912594, "res": {"Yes": 0.9893680608912594, "No": 0.01063190705090322}, "ground_truth": 0}, {"key": "35273252", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9163998097579426, "res": {"Yes": 0.9163998097579426, "No": 0.08359936956570492}, "ground_truth": 0}, {"key": "35273252", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9971338610413818, "res": {"Yes": 0.9971338610413818, "No": 0.0028659700492417654}, "ground_truth": 0}, {"key": "35273252", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9960219060632466, "res": {"Yes": 0.9960219060632466, "No": 0.003977996414034937}, "ground_truth": 1}, {"key": "35273252", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9959628956050984, "res": {"Yes": 0.9959628956050984, "No": 0.004036825351496378}, "ground_truth": 0}, {"key": "35273252", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9869286509165184, "res": {"Yes": 0.9869286509165184, "No": 0.013071081604843812}, "ground_truth": 0}, {"key": "37130459", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9281684205511063, "res": {"Yes": 0.9281684205511063, "No": 0.0718311394408159}, "ground_truth": 0}, {"key": "37130459", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9849863468225223, "res": {"Yes": 0.9849863468225223, "No": 0.01501338713849617}, "ground_truth": 0}, {"key": "37130459", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9966545267082326, "res": {"Yes": 0.9966545267082326, "No": 0.003345433491255969}, "ground_truth": 1}, {"key": "37130459", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9991223923230754, "res": {"Yes": 0.9991223923230754, "No": 0.0008775703806250998}, "ground_truth": 0}, {"key": "37130459", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9989367945039012, "res": {"Yes": 0.9989367945039012, "No": 0.0010631136191102083}, "ground_truth": 0}, {"key": "21734003", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8893628815815638, "res": {"Yes": 0.8893628815815638, "No": 0.11063692332423904}, "ground_truth": 0}, {"key": "21734003", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9952116319628125, "res": {"Yes": 0.9952116319628125, "No": 0.00478835188920957}, "ground_truth": 0}, {"key": "21734003", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9901973646404841, "res": {"Yes": 0.9901973646404841, "No": 0.009802557087944048}, "ground_truth": 1}, {"key": "21734003", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.993793492433977, "res": {"Yes": 0.993793492433977, "No": 0.0062064562381741175}, "ground_truth": 0}, {"key": "21734003", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9956946606607855, "res": {"Yes": 0.9956946606607855, "No": 0.004305340715928157}, "ground_truth": 0}, {"key": "33990737", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.6819904869429246, "res": {"Yes": 0.6819904869429246, "No": 0.3180091255231567}, "ground_truth": 0}, {"key": "33990737", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8214583933148718, "res": {"Yes": 0.8214583933148718, "No": 0.17854146639627055}, "ground_truth": 0}, {"key": "33990737", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9869473504308166, "res": {"Yes": 0.9869473504308166, "No": 0.013052533840993917}, "ground_truth": 1}, {"key": "33990737", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9736469542664732, "res": {"Yes": 0.9736469542664732, "No": 0.026352904035277284}, "ground_truth": 0}, {"key": "33990737", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.814465246792109, "res": {"Yes": 0.814465246792109, "No": 0.1855346540679131}, "ground_truth": 0}, {"key": "34559912", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.0028731522701189055, "res": {"No": 0.9971267438244221, "Yes": 0.0028731522701189055}, "ground_truth": 0}, {"key": "34559912", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9914629731459847, "res": {"Yes": 0.9914629731459847, "No": 0.008536966884648247}, "ground_truth": 0}, {"key": "34559912", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9530672987836568, "res": {"Yes": 0.9530672987836568, "No": 0.04693269147399697}, "ground_truth": 1}, {"key": "34559912", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9692343878645794, "res": {"Yes": 0.9692343878645794, "No": 0.030765535278421192}, "ground_truth": 0}, {"key": "34559912", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.7781155297543081, "res": {"Yes": 0.7781155297543081, "No": 0.22188434529500609}, "ground_truth": 0}, {"key": "39820439", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9728118553623233, "res": {"Yes": 0.9728118553623233, "No": 0.027187885253226848}, "ground_truth": 0}, {"key": "39820439", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.749418909565907, "res": {"Yes": 0.749418909565907, "No": 0.2505809141587857}, "ground_truth": 0}, {"key": "39820439", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9726399620492654, "res": {"Yes": 0.9726399620492654, "No": 0.027359936695265496}, "ground_truth": 1}, {"key": "39820439", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9770057740278112, "res": {"Yes": 0.9770057740278112, "No": 0.02299407051467175}, "ground_truth": 0}, {"key": "39820439", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8991881394887864, "res": {"Yes": 0.8991881394887864, "No": 0.10081186721167298}, "ground_truth": 0}, {"key": "34759328", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9947763801002648, "res": {"Yes": 0.9947763801002648, "No": 0.005223614415160591}, "ground_truth": 0}, {"key": "34759328", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9329820008633811, "res": {"Yes": 0.9329820008633811, "No": 0.067017868574916}, "ground_truth": 0}, {"key": "34759328", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.909453430266603, "res": {"Yes": 0.909453430266603, "No": 0.09054650435327906}, "ground_truth": 1}, {"key": "34759328", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9759077748649753, "res": {"Yes": 0.9759077748649753, "No": 0.024092179336713027}, "ground_truth": 0}, {"key": "34759328", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.17915769201179282, "res": {"No": 0.8208420379919402, "Yes": 0.17915769201179282}, "ground_truth": 0}, {"key": "36939137", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.985036321372547, "res": {"Yes": 0.985036321372547, "No": 0.014963610379866085}, "ground_truth": 0}, {"key": "36939137", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9978391087942536, "res": {"Yes": 0.9978391087942536, "No": 0.00216092480165782}, "ground_truth": 0}, {"key": "36939137", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9916577701381692, "res": {"Yes": 0.9916577701381692, "No": 0.008342182551713392}, "ground_truth": 1}, {"key": "36939137", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9887883325044259, "res": {"Yes": 0.9887883325044259, "No": 0.011211525133416016}, "ground_truth": 0}, {"key": "36939137", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9938223377087099, "res": {"Yes": 0.9938223377087099, "No": 0.006177629853041828}, "ground_truth": 0}, {"key": "35851522", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9667316063010153, "res": {"Yes": 0.9667316063010153, "No": 0.033268289598391926}, "ground_truth": 0}, {"key": "35851522", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9592943729144988, "res": {"Yes": 0.9592943729144988, "No": 0.04070545768781005}, "ground_truth": 0}, {"key": "35851522", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9667555622070318, "res": {"Yes": 0.9667555622070318, "No": 0.03324425196691674}, "ground_truth": 1}, {"key": "35851522", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9946794111403656, "res": {"Yes": 0.9946794111403656, "No": 0.005320535621305619}, "ground_truth": 0}, {"key": "35851522", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9885202233070609, "res": {"Yes": 0.9885202233070609, "No": 0.011479716310786696}, "ground_truth": 0}, {"key": "22412782", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9934532287008966, "res": {"Yes": 0.9934532287008966, "No": 0.0065466834570808395}, "ground_truth": 0}, {"key": "22412782", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994149681379139, "res": {"Yes": 0.9994149681379139, "No": 0.0005849598766544957}, "ground_truth": 0}, {"key": "22412782", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997931433077965, "res": {"Yes": 0.9997931433077965, "No": 0.00020677568491981227}, "ground_truth": 1}, {"key": "22412782", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994572432457648, "res": {"Yes": 0.9994572432457648, "No": 0.000542746064645755}, "ground_truth": 0}, {"key": "22412782", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996766260266379, "res": {"Yes": 0.9996766260266379, "No": 0.00032325599321151135}, "ground_truth": 0}, {"key": "38579227", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8418498130068914, "res": {"Yes": 0.8418498130068914, "No": 0.1581499255478178}, "ground_truth": 0}, {"key": "38579227", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9118971585627984, "res": {"Yes": 0.9118971585627984, "No": 0.08810262947536382}, "ground_truth": 0}, {"key": "38579227", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9743663448310537, "res": {"Yes": 0.9743663448310537, "No": 0.025633338232854495}, "ground_truth": 1}, {"key": "38579227", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9773561422565995, "res": {"Yes": 0.9773561422565995, "No": 0.022643539489300854}, "ground_truth": 0}, {"key": "38579227", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.6081166602563963, "res": {"Yes": 0.6081166602563963, "No": 0.39188294244425864}, "ground_truth": 0}, {"key": "37206995", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.962147089804276, "res": {"Yes": 0.962147089804276, "No": 0.03785266091441385}, "ground_truth": 0}, {"key": "37206995", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9914530070102686, "res": {"Yes": 0.9914530070102686, "No": 0.008546754147356716}, "ground_truth": 0}, {"key": "37206995", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9939817858380908, "res": {"Yes": 0.9939817858380908, "No": 0.006018182352822342}, "ground_truth": 1}, {"key": "37206995", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9983870557949379, "res": {"Yes": 0.9983870557949379, "No": 0.0016128862549911172}, "ground_truth": 0}, {"key": "37206995", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9929154857293163, "res": {"Yes": 0.9929154857293163, "No": 0.007084356044139166}, "ground_truth": 0}, {"key": "38700847", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.952911504076658, "res": {"Yes": 0.952911504076658, "No": 0.04708819247217052}, "ground_truth": 0}, {"key": "38700847", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9963454567575243, "res": {"Yes": 0.9963454567575243, "No": 0.0036543453961535247}, "ground_truth": 0}, {"key": "38700847", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9973595855425678, "res": {"Yes": 0.9973595855425678, "No": 0.002640267608777557}, "ground_truth": 1}, {"key": "38700847", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9936504656322929, "res": {"Yes": 0.9936504656322929, "No": 0.006349382594609219}, "ground_truth": 0}, {"key": "38700847", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9889398709834011, "res": {"Yes": 0.9889398709834011, "No": 0.011059799626555811}, "ground_truth": 0}, {"key": "20246590", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9986509151342747, "res": {"Yes": 0.9986509151342747, "No": 0.0013488835886926612}, "ground_truth": 0}, {"key": "20246590", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9907088777055997, "res": {"Yes": 0.9907088777055997, "No": 0.009290876331322697}, "ground_truth": 0}, {"key": "20246590", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998863309608148, "res": {"Yes": 0.9998863309608148, "No": 0.0001136081853654136}, "ground_truth": 1}, {"key": "20246590", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996041982501873, "res": {"Yes": 0.9996041982501873, "No": 0.0003957366473049141}, "ground_truth": 0}, {"key": "20246590", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9930965119140074, "res": {"Yes": 0.9930965119140074, "No": 0.006903445114687469}, "ground_truth": 0}, {"key": "39141360", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8976672455170128, "res": {"Yes": 0.8976672455170128, "No": 0.10233233252467934}, "ground_truth": 0}, {"key": "39141360", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.7688319068815549, "res": {"Yes": 0.7688319068815549, "No": 0.23116771336751385}, "ground_truth": 0}, {"key": "39141360", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7796642661282064, "res": {"Yes": 0.7796642661282064, "No": 0.22033510213204927}, "ground_truth": 1}, {"key": "39141360", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.663376657002751, "res": {"Yes": 0.663376657002751, "No": 0.3366230906180473}, "ground_truth": 0}, {"key": "39141360", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9286073391228208, "res": {"Yes": 0.9286073391228208, "No": 0.07139240413212386}, "ground_truth": 0}, {"key": "37906226", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9760695937884659, "res": {"Yes": 0.9760695937884659, "No": 0.023930279433721672}, "ground_truth": 0}, {"key": "37906226", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9906218314082795, "res": {"Yes": 0.9906218314082795, "No": 0.009378044623581822}, "ground_truth": 0}, {"key": "37906226", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9943458581696125, "res": {"Yes": 0.9943458581696125, "No": 0.005654034035546959}, "ground_truth": 1}, {"key": "37906226", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9580436637718275, "res": {"Yes": 0.9580436637718275, "No": 0.04195611449316211}, "ground_truth": 0}, {"key": "37906226", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8818162306115791, "res": {"Yes": 0.8818162306115791, "No": 0.11818335554001226}, "ground_truth": 0}, {"key": "16201033", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8662973405376208, "res": {"Yes": 0.8662973405376208, "No": 0.13370259311983237}, "ground_truth": 0}, {"key": "16201033", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9976097319510203, "res": {"Yes": 0.9976097319510203, "No": 0.0023902543406948786}, "ground_truth": 0}, {"key": "16201033", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999844030374527, "res": {"Yes": 0.999844030374527, "No": 0.0001558767386338464}, "ground_truth": 1}, {"key": "16201033", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998496284069415, "res": {"Yes": 0.9998496284069415, "No": 0.00015030965876950208}, "ground_truth": 0}, {"key": "16201033", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9995769227624253, "res": {"Yes": 0.9995769227624253, "No": 0.00042295955421607813}, "ground_truth": 0}, {"key": "36469022", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7138424066372665, "res": {"Yes": 0.7138424066372665, "No": 0.2861575344650397}, "ground_truth": 0}, {"key": "36469022", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9137425876152734, "res": {"Yes": 0.9137425876152734, "No": 0.0862572990486747}, "ground_truth": 0}, {"key": "36469022", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8054785952214971, "res": {"Yes": 0.8054785952214971, "No": 0.19452137982211715}, "ground_truth": 1}, {"key": "36469022", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8952085017408246, "res": {"Yes": 0.8952085017408246, "No": 0.1047914598915603}, "ground_truth": 0}, {"key": "36469022", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.7452448178654706, "res": {"Yes": 0.7452448178654706, "No": 0.25475512227899877}, "ground_truth": 0}, {"key": "31295270", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8803151593501235, "res": {"Yes": 0.8803151593501235, "No": 0.11968432827625955}, "ground_truth": 0}, {"key": "31295270", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9259335010829574, "res": {"Yes": 0.9259335010829574, "No": 0.07406608824119736}, "ground_truth": 0}, {"key": "31295270", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998609511181921, "res": {"Yes": 0.9998609511181921, "No": 0.0001389788214654504}, "ground_truth": 1}, {"key": "31295270", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997309521398686, "res": {"Yes": 0.9997309521398686, "No": 0.00026901914766093665}, "ground_truth": 0}, {"key": "31295270", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9962995401392367, "res": {"Yes": 0.9962995401392367, "No": 0.0037004531272725096}, "ground_truth": 0}, {"key": "35360689", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8413361467475696, "res": {"Yes": 0.8413361467475696, "No": 0.15866355895955528}, "ground_truth": 0}, {"key": "35360689", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9746714550018983, "res": {"Yes": 0.9746714550018983, "No": 0.02532842490212771}, "ground_truth": 0}, {"key": "35360689", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8368948028196924, "res": {"Yes": 0.8368948028196924, "No": 0.16310510011156623}, "ground_truth": 1}, {"key": "35360689", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9090107360848351, "res": {"Yes": 0.9090107360848351, "No": 0.09098915094110724}, "ground_truth": 0}, {"key": "35360689", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9514586698746555, "res": {"Yes": 0.9514586698746555, "No": 0.04854126587581203}, "ground_truth": 0}, {"key": "29202793", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9832772931080368, "res": {"Yes": 0.9832772931080368, "No": 0.016722708379897738}, "ground_truth": 0}, {"key": "29202793", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999746547351966, "res": {"Yes": 0.9999746547351966, "No": 2.5309493385107636e-05}, "ground_truth": 0}, {"key": "29202793", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995974136996211, "res": {"Yes": 0.9995974136996211, "No": 0.00040257666789592105}, "ground_truth": 1}, {"key": "29202793", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999697353626656, "res": {"Yes": 0.999697353626656, "No": 0.0003025866122463538}, "ground_truth": 0}, {"key": "29202793", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997764620483319, "res": {"Yes": 0.9997764620483319, "No": 0.00022351098393392482}, "ground_truth": 0}, {"key": "35999008", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9357721140902042, "res": {"Yes": 0.9357721140902042, "No": 0.06422778331402143}, "ground_truth": 0}, {"key": "35999008", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994833203949022, "res": {"Yes": 0.9994833203949022, "No": 0.0005166702506506276}, "ground_truth": 0}, {"key": "35999008", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999254613878358, "res": {"Yes": 0.999254613878358, "No": 0.0007453232060791803}, "ground_truth": 1}, {"key": "35999008", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997890912243835, "res": {"Yes": 0.9997890912243835, "No": 0.00021080563714079344}, "ground_truth": 0}, {"key": "35999008", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9920590978759786, "res": {"Yes": 0.9920590978759786, "No": 0.007940786128475538}, "ground_truth": 0}, {"key": "31797119", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.05248787964198166, "res": {"No": 0.9475118173179068, "Yes": 0.05248787964198166}, "ground_truth": 0}, {"key": "31797119", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996682845297464, "res": {"Yes": 0.9996682845297464, "No": 0.0003316023658759994}, "ground_truth": 0}, {"key": "31797119", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9965491575901233, "res": {"Yes": 0.9965491575901233, "No": 0.0034508569067902115}, "ground_truth": 1}, {"key": "31797119", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9888166567518826, "res": {"Yes": 0.9888166567518826, "No": 0.011183184643322564}, "ground_truth": 0}, {"key": "31797119", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.979405956448726, "res": {"Yes": 0.979405956448726, "No": 0.020594004278354068}, "ground_truth": 0}, {"key": "26711893", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8968068894451139, "res": {"Yes": 0.8968068894451139, "No": 0.10319305754998445}, "ground_truth": 0}, {"key": "26711893", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.6257496925504866, "res": {"Yes": 0.6257496925504866, "No": 0.37424990856414914}, "ground_truth": 0}, {"key": "26711893", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8512017708338593, "res": {"Yes": 0.8512017708338593, "No": 0.14879785083509273}, "ground_truth": 1}, {"key": "26711893", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9788433220727494, "res": {"Yes": 0.9788433220727494, "No": 0.021156527336340016}, "ground_truth": 0}, {"key": "26711893", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8590955750777799, "res": {"Yes": 0.8590955750777799, "No": 0.140904150986064}, "ground_truth": 0}, {"key": "35348288", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9964311379806612, "res": {"Yes": 0.9964311379806612, "No": 0.003568881634200955}, "ground_truth": 0}, {"key": "35348288", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.977370611144028, "res": {"Yes": 0.977370611144028, "No": 0.02262928081501747}, "ground_truth": 0}, {"key": "35348288", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9578741021761071, "res": {"Yes": 0.9578741021761071, "No": 0.04212572836058916}, "ground_truth": 1}, {"key": "35348288", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9900721942771686, "res": {"Yes": 0.9900721942771686, "No": 0.00992767765379371}, "ground_truth": 0}, {"key": "35348288", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9371902891105214, "res": {"Yes": 0.9371902891105214, "No": 0.06280963460488323}, "ground_truth": 0}, {"key": "38124131", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8159025052634556, "res": {"Yes": 0.8159025052634556, "No": 0.18409727363599337}, "ground_truth": 0}, {"key": "38124131", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9132493947955855, "res": {"Yes": 0.9132493947955855, "No": 0.08675045631966102}, "ground_truth": 0}, {"key": "38124131", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9384998770148139, "res": {"Yes": 0.9384998770148139, "No": 0.06149994490645907}, "ground_truth": 1}, {"key": "38124131", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9080129031174851, "res": {"Yes": 0.9080129031174851, "No": 0.09198661515160331}, "ground_truth": 0}, {"key": "38124131", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9014460435391587, "res": {"Yes": 0.9014460435391587, "No": 0.09855376915461078}, "ground_truth": 0}, {"key": "20285901", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9125083982556568, "res": {"Yes": 0.9125083982556568, "No": 0.08749130167400124}, "ground_truth": 0}, {"key": "20285901", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9679099595748855, "res": {"Yes": 0.9679099595748855, "No": 0.0320899205407777}, "ground_truth": 0}, {"key": "20285901", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9828159139557907, "res": {"Yes": 0.9828159139557907, "No": 0.017184050875982564}, "ground_truth": 1}, {"key": "20285901", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8779855880529678, "res": {"Yes": 0.8779855880529678, "No": 0.12201434002106873}, "ground_truth": 0}, {"key": "20285901", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8626111489772658, "res": {"Yes": 0.8626111489772658, "No": 0.13738860990137092}, "ground_truth": 0}, {"key": "35633632", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7648550807722932, "res": {"Yes": 0.7648550807722932, "No": 0.23514470226917009}, "ground_truth": 0}, {"key": "35633632", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9983628142531418, "res": {"Yes": 0.9983628142531418, "No": 0.0016371463225700697}, "ground_truth": 0}, {"key": "35633632", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9825917837764322, "res": {"Yes": 0.9825917837764322, "No": 0.017408263007422986}, "ground_truth": 1}, {"key": "35633632", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6221990259582356, "res": {"Yes": 0.6221990259582356, "No": 0.3778005001753153}, "ground_truth": 0}, {"key": "35633632", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9846862100902954, "res": {"Yes": 0.9846862100902954, "No": 0.01531376778472415}, "ground_truth": 0}, {"key": "10741274", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9999323394050813, "res": {"Yes": 0.9999323394050813, "No": 6.761065290357124e-05}, "ground_truth": 0}, {"key": "10741274", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9906676938028786, "res": {"Yes": 0.9906676938028786, "No": 0.00933204875817518}, "ground_truth": 0}, {"key": "10741274", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9937279147614267, "res": {"Yes": 0.9937279147614267, "No": 0.006272043878651721}, "ground_truth": 1}, {"key": "10741274", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998502243374946, "res": {"Yes": 0.9998502243374946, "No": 0.00014965019808668316}, "ground_truth": 0}, {"key": "10741274", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997087938087562, "res": {"Yes": 0.9997087938087562, "No": 0.00029108026618852834}, "ground_truth": 0}, {"key": "30605795", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.15422409067456616, "res": {"No": 0.8457753063361033, "Yes": 0.15422409067456616}, "ground_truth": 0}, {"key": "30605795", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9364605340038828, "res": {"Yes": 0.9364605340038828, "No": 0.06353930424975354}, "ground_truth": 0}, {"key": "30605795", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9068284198736004, "res": {"Yes": 0.9068284198736004, "No": 0.09317138801225903}, "ground_truth": 1}, {"key": "30605795", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9535891697126166, "res": {"Yes": 0.9535891697126166, "No": 0.046410776247576974}, "ground_truth": 0}, {"key": "30605795", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.6296089919351936, "res": {"Yes": 0.6296089919351936, "No": 0.37039076834327017}, "ground_truth": 0}, {"key": "30539722", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8741123628296028, "res": {"Yes": 0.8741123628296028, "No": 0.1258874909370916}, "ground_truth": 0}, {"key": "30539722", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9452008182310562, "res": {"Yes": 0.9452008182310562, "No": 0.05479902922814219}, "ground_truth": 0}, {"key": "30539722", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9959945821706842, "res": {"Yes": 0.9959945821706842, "No": 0.004005355253094536}, "ground_truth": 1}, {"key": "30539722", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9764439638521364, "res": {"Yes": 0.9764439638521364, "No": 0.023555910814792826}, "ground_truth": 0}, {"key": "30539722", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9826424268794125, "res": {"Yes": 0.9826424268794125, "No": 0.017357501550664362}, "ground_truth": 0}, {"key": "18639299", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.6299992374462935, "res": {"Yes": 0.6299992374462935, "No": 0.3700001751372722}, "ground_truth": 0}, {"key": "18639299", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999323394050813, "res": {"Yes": 0.9999323394050813, "No": 6.760578511180471e-05}, "ground_truth": 0}, {"key": "18639299", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999651187283657, "res": {"Yes": 0.9999651187283657, "No": 3.4791376625802295e-05}, "ground_truth": 1}, {"key": "18639299", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9943761526703095, "res": {"Yes": 0.9943761526703095, "No": 0.005623747625703258}, "ground_truth": 0}, {"key": "18639299", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9988383099848774, "res": {"Yes": 0.9988383099848774, "No": 0.0011616004807790711}, "ground_truth": 0}, {"key": "39773552", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9946131344774356, "res": {"Yes": 0.9946131344774356, "No": 0.005386775556849222}, "ground_truth": 0}, {"key": "39773552", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9949757812221797, "res": {"Yes": 0.9949757812221797, "No": 0.005024046973696383}, "ground_truth": 0}, {"key": "39773552", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9972624763702492, "res": {"Yes": 0.9972624763702492, "No": 0.0027374183794774917}, "ground_truth": 1}, {"key": "39773552", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9796375692660833, "res": {"Yes": 0.9796375692660833, "No": 0.020362153943352147}, "ground_truth": 0}, {"key": "39773552", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9987142827435753, "res": {"Yes": 0.9987142827435753, "No": 0.001285539248531733}, "ground_truth": 0}, {"key": "34086410", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9069470625552879, "res": {"Yes": 0.9069470625552879, "No": 0.09305283768619395}, "ground_truth": 0}, {"key": "34086410", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9989163317929337, "res": {"Yes": 0.9989163317929337, "No": 0.001083608720580603}, "ground_truth": 0}, {"key": "34086410", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999766332054608, "res": {"Yes": 0.999766332054608, "No": 0.00023360147332222945}, "ground_truth": 1}, {"key": "34086410", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996065813694129, "res": {"Yes": 0.9996065813694129, "No": 0.0003933715393300133}, "ground_truth": 0}, {"key": "34086410", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9988681664073621, "res": {"Yes": 0.9988681664073621, "No": 0.0011318124422353623}, "ground_truth": 0}, {"key": "35454652", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9072944045226405, "res": {"Yes": 0.9072944045226405, "No": 0.09270504361143851}, "ground_truth": 0}, {"key": "35454652", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9949898329663709, "res": {"Yes": 0.9949898329663709, "No": 0.005010185693506914}, "ground_truth": 0}, {"key": "35454652", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9550216438891057, "res": {"Yes": 0.9550216438891057, "No": 0.044977977405120426}, "ground_truth": 1}, {"key": "35454652", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9934328799074718, "res": {"Yes": 0.9934328799074718, "No": 0.0065670339631230925}, "ground_truth": 0}, {"key": "35454652", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9754540686093295, "res": {"Yes": 0.9754540686093295, "No": 0.02454581518911995}, "ground_truth": 0}, {"key": "36158310", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.0022275521210342905, "res": {"No": 0.9977724135613704, "Yes": 0.0022275521210342905}, "ground_truth": 0}, {"key": "36158310", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9938856889450599, "res": {"Yes": 0.9938856889450599, "No": 0.006114229618721199}, "ground_truth": 0}, {"key": "36158310", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9811086099877099, "res": {"Yes": 0.9811086099877099, "No": 0.018891371415964156}, "ground_truth": 1}, {"key": "36158310", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9932498470270253, "res": {"Yes": 0.9932498470270253, "No": 0.0067500524753634425}, "ground_truth": 0}, {"key": "36158310", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.991715568612304, "res": {"Yes": 0.991715568612304, "No": 0.008284347400542}, "ground_truth": 0}, {"key": "35688387", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.08936453283136689, "res": {"No": 0.910635301078011, "Yes": 0.08936453283136689}, "ground_truth": 0}, {"key": "35688387", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9964551697980683, "res": {"Yes": 0.9964551697980683, "No": 0.0035448586246671883}, "ground_truth": 0}, {"key": "35688387", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9987689815256479, "res": {"Yes": 0.9987689815256479, "No": 0.0012310148112100613}, "ground_truth": 1}, {"key": "35688387", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9904287280103491, "res": {"Yes": 0.9904287280103491, "No": 0.009571107059587816}, "ground_truth": 0}, {"key": "35688387", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996579209223942, "res": {"Yes": 0.9996579209223942, "No": 0.00034206116221674904}, "ground_truth": 0}, {"key": "34209292", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9780833202954957, "res": {"Yes": 0.9780833202954957, "No": 0.021916581117149808}, "ground_truth": 0}, {"key": "34209292", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.972029770406985, "res": {"Yes": 0.972029770406985, "No": 0.02797015584279264}, "ground_truth": 0}, {"key": "34209292", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.983190859950127, "res": {"Yes": 0.983190859950127, "No": 0.01680898523718085}, "ground_truth": 1}, {"key": "34209292", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6624612390021082, "res": {"Yes": 0.6624612390021082, "No": 0.33753863538720247}, "ground_truth": 0}, {"key": "34209292", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9475868490118142, "res": {"Yes": 0.9475868490118142, "No": 0.05241300664710723}, "ground_truth": 0}, {"key": "25037859", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7942108887829799, "res": {"Yes": 0.7942108887829799, "No": 0.20578887795935907}, "ground_truth": 0}, {"key": "25037859", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8704233817652007, "res": {"Yes": 0.8704233817652007, "No": 0.12957642726435303}, "ground_truth": 0}, {"key": "25037859", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9876795514319469, "res": {"Yes": 0.9876795514319469, "No": 0.01232040189575678}, "ground_truth": 1}, {"key": "25037859", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9905243892267828, "res": {"Yes": 0.9905243892267828, "No": 0.009475535618067871}, "ground_truth": 0}, {"key": "25037859", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9955280439066361, "res": {"Yes": 0.9955280439066361, "No": 0.00447195010675273}, "ground_truth": 0}, {"key": "36412121", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.6505347578133045, "res": {"Yes": 0.6505347578133045, "No": 0.349465022849574}, "ground_truth": 0}, {"key": "36412121", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8392580846365725, "res": {"Yes": 0.8392580846365725, "No": 0.160741386580977}, "ground_truth": 0}, {"key": "36412121", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9798355309137747, "res": {"Yes": 0.9798355309137747, "No": 0.020164426327154834}, "ground_truth": 1}, {"key": "36412121", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9945737478809547, "res": {"Yes": 0.9945737478809547, "No": 0.005426201117035675}, "ground_truth": 0}, {"key": "36412121", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9867975792117892, "res": {"Yes": 0.9867975792117892, "No": 0.013202025687361792}, "ground_truth": 0}, {"key": "34909172", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.6757937182667715, "res": {"Yes": 0.6757937182667715, "No": 0.32420608518754634}, "ground_truth": 0}, {"key": "34909172", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9953293649981166, "res": {"Yes": 0.9953293649981166, "No": 0.0046706188500973906}, "ground_truth": 0}, {"key": "34909172", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9943377264424351, "res": {"Yes": 0.9943377264424351, "No": 0.005662277967382705}, "ground_truth": 1}, {"key": "34909172", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9845910792277157, "res": {"Yes": 0.9845910792277157, "No": 0.015408861699952064}, "ground_truth": 0}, {"key": "34909172", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.5710771126617222, "res": {"Yes": 0.5710771126617222, "No": 0.42892281270796434}, "ground_truth": 0}, {"key": "39011806", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9508058887943263, "res": {"Yes": 0.9508058887943263, "No": 0.049193907227092666}, "ground_truth": 0}, {"key": "39011806", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9910353185336285, "res": {"Yes": 0.9910353185336285, "No": 0.008964493240131209}, "ground_truth": 0}, {"key": "39011806", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7781560472848073, "res": {"Yes": 0.7781560472848073, "No": 0.22184338922522615}, "ground_truth": 1}, {"key": "39011806", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9933531182974913, "res": {"Yes": 0.9933531182974913, "No": 0.0066466753280536766}, "ground_truth": 0}, {"key": "39011806", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9320108199271865, "res": {"Yes": 0.9320108199271865, "No": 0.06798862956955347}, "ground_truth": 0}, {"key": "33096163", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9045205044756777, "res": {"Yes": 0.9045205044756777, "No": 0.09547914808459937}, "ground_truth": 0}, {"key": "33096163", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9956125282001739, "res": {"Yes": 0.9956125282001739, "No": 0.004387500395709312}, "ground_truth": 0}, {"key": "33096163", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.962881513206064, "res": {"Yes": 0.962881513206064, "No": 0.03711826917425438}, "ground_truth": 1}, {"key": "33096163", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.983151223409587, "res": {"Yes": 0.983151223409587, "No": 0.01684881417552482}, "ground_truth": 0}, {"key": "33096163", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.802110010759156, "res": {"Yes": 0.802110010759156, "No": 0.19788965531971817}, "ground_truth": 0}, {"key": "38762205", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.6407270383265284, "res": {"Yes": 0.6407270383265284, "No": 0.35927272472647137}, "ground_truth": 0}, {"key": "38762205", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9430134740090652, "res": {"Yes": 0.9430134740090652, "No": 0.05698645904406625}, "ground_truth": 0}, {"key": "38762205", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9071162339345433, "res": {"Yes": 0.9071162339345433, "No": 0.09288365411899047}, "ground_truth": 1}, {"key": "38762205", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9717631325609857, "res": {"Yes": 0.9717631325609857, "No": 0.028236788344564483}, "ground_truth": 0}, {"key": "38762205", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9914691817065616, "res": {"Yes": 0.9914691817065616, "No": 0.008530736319870209}, "ground_truth": 0}, {"key": "35519177", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.24849772212163537, "res": {"No": 0.7515017808986247, "Yes": 0.24849772212163537}, "ground_truth": 0}, {"key": "35519177", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9363065738202955, "res": {"Yes": 0.9363065738202955, "No": 0.06369312818562071}, "ground_truth": 0}, {"key": "35519177", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9989373861743397, "res": {"Yes": 0.9989373861743397, "No": 0.0010625750758908523}, "ground_truth": 1}, {"key": "35519177", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9956957214744421, "res": {"Yes": 0.9956957214744421, "No": 0.0043040966522371145}, "ground_truth": 0}, {"key": "35519177", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9915097336253745, "res": {"Yes": 0.9915097336253745, "No": 0.008490019345835945}, "ground_truth": 0}, {"key": "36192531", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.12720510439369592, "res": {"No": 0.8727947147435499, "Yes": 0.12720510439369592}, "ground_truth": 0}, {"key": "36192531", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9960773763665663, "res": {"Yes": 0.9960773763665663, "No": 0.003922580307565251}, "ground_truth": 0}, {"key": "36192531", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9966455338353445, "res": {"Yes": 0.9966455338353445, "No": 0.0033544760186355093}, "ground_truth": 1}, {"key": "36192531", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9984179479657876, "res": {"Yes": 0.9984179479657876, "No": 0.001582022796209462}, "ground_truth": 0}, {"key": "36192531", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9981231011816422, "res": {"Yes": 0.9981231011816422, "No": 0.0018768827312894973}, "ground_truth": 0}, {"key": "33160852", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9998909793831794, "res": {"Yes": 0.9998909793831794, "No": 0.00010880437640943335}, "ground_truth": 0}, {"key": "33160852", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9992785342816365, "res": {"Yes": 0.9992785342816365, "No": 0.0007212637328331294}, "ground_truth": 0}, {"key": "33160852", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999730475458267, "res": {"Yes": 0.999730475458267, "No": 0.00026949249927668426}, "ground_truth": 1}, {"key": "33160852", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999726283256111, "res": {"Yes": 0.9999726283256111, "No": 2.7351751251848324e-05}, "ground_truth": 0}, {"key": "33160852", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999233997739472, "res": {"Yes": 0.9999233997739472, "No": 7.657706029688541e-05}, "ground_truth": 0}, {"key": "36312304", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9585728664984015, "res": {"Yes": 0.9585728664984015, "No": 0.04142695181258958}, "ground_truth": 0}, {"key": "36312304", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9252240449881607, "res": {"Yes": 0.9252240449881607, "No": 0.07477583557537307}, "ground_truth": 0}, {"key": "36312304", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9957029329263892, "res": {"Yes": 0.9957029329263892, "No": 0.0042970566098642264}, "ground_truth": 1}, {"key": "36312304", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9902676122283004, "res": {"Yes": 0.9902676122283004, "No": 0.009732310008657786}, "ground_truth": 0}, {"key": "36312304", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9718230261120324, "res": {"Yes": 0.9718230261120324, "No": 0.028176761529475174}, "ground_truth": 0}, {"key": "33773343", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9880853335243726, "res": {"Yes": 0.9880853335243726, "No": 0.011914573074509263}, "ground_truth": 0}, {"key": "33773343", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9976687009086275, "res": {"Yes": 0.9976687009086275, "No": 0.0023312748852291395}, "ground_truth": 0}, {"key": "33773343", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9973099039106703, "res": {"Yes": 0.9973099039106703, "No": 0.002690053223167486}, "ground_truth": 1}, {"key": "33773343", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9958594388592726, "res": {"Yes": 0.9958594388592726, "No": 0.004140538612373534}, "ground_truth": 0}, {"key": "33773343", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9990921673296485, "res": {"Yes": 0.9990921673296485, "No": 0.0009077998801176729}, "ground_truth": 0}, {"key": "34913320", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9189095166144303, "res": {"Yes": 0.9189095166144303, "No": 0.08109035112254186}, "ground_truth": 0}, {"key": "34913320", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.993866375945998, "res": {"Yes": 0.993866375945998, "No": 0.006133569489829556}, "ground_truth": 0}, {"key": "34913320", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9820202054742425, "res": {"Yes": 0.9820202054742425, "No": 0.017979745583697162}, "ground_truth": 1}, {"key": "34913320", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995243995837504, "res": {"Yes": 0.9995243995837504, "No": 0.00047557022511308673}, "ground_truth": 0}, {"key": "34913320", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9492970306838429, "res": {"Yes": 0.9492970306838429, "No": 0.05070279204399544}, "ground_truth": 0}, {"key": "33784155", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9215126954752385, "res": {"Yes": 0.9215126954752385, "No": 0.07848717840141696}, "ground_truth": 0}, {"key": "33784155", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.7952906290504574, "res": {"Yes": 0.7952906290504574, "No": 0.2047092907894944}, "ground_truth": 0}, {"key": "33784155", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9973417962969696, "res": {"Yes": 0.9973417962969696, "No": 0.002658178939103806}, "ground_truth": 1}, {"key": "33784155", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9810263504227453, "res": {"Yes": 0.9810263504227453, "No": 0.018973633008658473}, "ground_truth": 0}, {"key": "33784155", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9847850489900851, "res": {"Yes": 0.9847850489900851, "No": 0.015214920815765231}, "ground_truth": 0}, {"key": "24085062", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9725830039756136, "res": {"Yes": 0.9725830039756136, "No": 0.027416630421987945}, "ground_truth": 0}, {"key": "24085062", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9976545818015469, "res": {"Yes": 0.9976545818015469, "No": 0.002345237975836075}, "ground_truth": 0}, {"key": "24085062", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9950814195279707, "res": {"Yes": 0.9950814195279707, "No": 0.004918364118533881}, "ground_truth": 1}, {"key": "24085062", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9975359465757551, "res": {"Yes": 0.9975359465757551, "No": 0.0024639345405474927}, "ground_truth": 0}, {"key": "24085062", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9645161908466573, "res": {"Yes": 0.9645161908466573, "No": 0.03548349216811557}, "ground_truth": 0}, {"key": "33893487", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.6238378500937632, "res": {"Yes": 0.6238378500937632, "No": 0.37616190118905696}, "ground_truth": 0}, {"key": "33893487", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9828520694698398, "res": {"Yes": 0.9828520694698398, "No": 0.017147875338718505}, "ground_truth": 0}, {"key": "33893487", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8812751995768366, "res": {"Yes": 0.8812751995768366, "No": 0.1187247369706282}, "ground_truth": 1}, {"key": "33893487", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.96218405235685, "res": {"Yes": 0.96218405235685, "No": 0.03781587465874301}, "ground_truth": 0}, {"key": "33893487", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8807565807779008, "res": {"Yes": 0.8807565807779008, "No": 0.11924336382838792}, "ground_truth": 0}, {"key": "40913011", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.05639811742638608, "res": {"No": 0.9436017545773464, "Yes": 0.05639811742638608}, "ground_truth": 0}, {"key": "40913011", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.011940419949336097, "res": {"No": 0.9880594865382, "Yes": 0.011940419949336097}, "ground_truth": 0}, {"key": "40913011", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8624579973388582, "res": {"Yes": 0.8624579973388582, "No": 0.13754196616079328}, "ground_truth": 1}, {"key": "40913011", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7880992462792789, "res": {"Yes": 0.7880992462792789, "No": 0.2119005114653178}, "ground_truth": 0}, {"key": "40913011", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8721908886743719, "res": {"Yes": 0.8721908886743719, "No": 0.12780904051329198}, "ground_truth": 0}, {"key": "29642545", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9438329908128688, "res": {"Yes": 0.9438329908128688, "No": 0.05616672383637722}, "ground_truth": 0}, {"key": "29642545", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.5962787618735127, "res": {"Yes": 0.5962787618735127, "No": 0.4037210539574418}, "ground_truth": 0}, {"key": "29642545", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7024899808578488, "res": {"Yes": 0.7024899808578488, "No": 0.29750971472255605}, "ground_truth": 1}, {"key": "29642545", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9569334166768044, "res": {"Yes": 0.9569334166768044, "No": 0.043066420166767275}, "ground_truth": 0}, {"key": "29642545", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.951612157294211, "res": {"Yes": 0.951612157294211, "No": 0.04838768208629727}, "ground_truth": 0}, {"key": "35969159", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9985270523537857, "res": {"Yes": 0.9985270523537857, "No": 0.0014729751676412406}, "ground_truth": 0}, {"key": "35969159", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9985224139068429, "res": {"Yes": 0.9985224139068429, "No": 0.0014775433042152326}, "ground_truth": 0}, {"key": "35969159", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9983681660911884, "res": {"Yes": 0.9983681660911884, "No": 0.0016317617820101053}, "ground_truth": 1}, {"key": "35969159", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998747769110656, "res": {"Yes": 0.9998747769110656, "No": 0.00012520614738305893}, "ground_truth": 0}, {"key": "35969159", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9852342626358329, "res": {"Yes": 0.9852342626358329, "No": 0.014765635850494962}, "ground_truth": 0}, {"key": "37081669", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.37910343331947516, "res": {"No": 0.6208961756473278, "Yes": 0.37910343331947516}, "ground_truth": 0}, {"key": "37081669", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9922010860777845, "res": {"Yes": 0.9922010860777845, "No": 0.007798865535743996}, "ground_truth": 0}, {"key": "37081669", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9948150715857413, "res": {"Yes": 0.9948150715857413, "No": 0.005184903753297153}, "ground_truth": 1}, {"key": "37081669", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9978880170810541, "res": {"Yes": 0.9978880170810541, "No": 0.0021120099541711715}, "ground_truth": 0}, {"key": "37081669", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9987737365762112, "res": {"Yes": 0.9987737365762112, "No": 0.0012262543496203294}, "ground_truth": 0}, {"key": "40048022", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.07593547594687726, "res": {"No": 0.9240641992927507, "Yes": 0.07593547594687726}, "ground_truth": 0}, {"key": "40048022", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9874968806967009, "res": {"Yes": 0.9874968806967009, "No": 0.01250299204145887}, "ground_truth": 0}, {"key": "40048022", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9993954413692561, "res": {"Yes": 0.9993954413692561, "No": 0.0006045212714654954}, "ground_truth": 1}, {"key": "40048022", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9850729910171743, "res": {"Yes": 0.9850729910171743, "No": 0.014926994407317404}, "ground_truth": 0}, {"key": "40048022", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8462080479893699, "res": {"Yes": 0.8462080479893699, "No": 0.15379174453142014}, "ground_truth": 0}, {"key": "32884004", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9584851304221756, "res": {"Yes": 0.9584851304221756, "No": 0.041514596113661616}, "ground_truth": 0}, {"key": "32884004", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.6238744267791493, "res": {"Yes": 0.6238744267791493, "No": 0.3761251293990311}, "ground_truth": 0}, {"key": "32884004", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.6377859394752483, "res": {"Yes": 0.6377859394752483, "No": 0.3622133775726279}, "ground_truth": 1}, {"key": "32884004", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9799012310886963, "res": {"Yes": 0.9799012310886963, "No": 0.020098809389997734}, "ground_truth": 0}, {"key": "32884004", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8132323068234912, "res": {"Yes": 0.8132323068234912, "No": 0.18676732279387523}, "ground_truth": 0}, {"key": "39022490", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.983710138520317, "res": {"Yes": 0.983710138520317, "No": 0.016289802181438825}, "ground_truth": 0}, {"key": "39022490", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9955710548329207, "res": {"Yes": 0.9955710548329207, "No": 0.004428900145073593}, "ground_truth": 0}, {"key": "39022490", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9945828257948098, "res": {"Yes": 0.9945828257948098, "No": 0.005417190260874815}, "ground_truth": 1}, {"key": "39022490", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.991913046774828, "res": {"Yes": 0.991913046774828, "No": 0.008086890380394129}, "ground_truth": 0}, {"key": "39022490", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9891892012938023, "res": {"Yes": 0.9891892012938023, "No": 0.01081073669775004}, "ground_truth": 0}, {"key": "35159385", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9162045420997824, "res": {"Yes": 0.9162045420997824, "No": 0.08379512897277969}, "ground_truth": 0}, {"key": "35159385", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9784520499252698, "res": {"Yes": 0.9784520499252698, "No": 0.021547908941875027}, "ground_truth": 0}, {"key": "35159385", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9912083971521018, "res": {"Yes": 0.9912083971521018, "No": 0.008791531063762343}, "ground_truth": 1}, {"key": "35159385", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.977301040470633, "res": {"Yes": 0.977301040470633, "No": 0.022698732342428325}, "ground_truth": 0}, {"key": "35159385", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8201808370908926, "res": {"Yes": 0.8201808370908926, "No": 0.17981896215847024}, "ground_truth": 0}, {"key": "34363669", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.14706776676928812, "res": {"No": 0.8529315678265029, "Yes": 0.14706776676928812}, "ground_truth": 0}, {"key": "34363669", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9819675617641482, "res": {"Yes": 0.9819675617641482, "No": 0.018032420541522667}, "ground_truth": 0}, {"key": "34363669", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8962198390674748, "res": {"Yes": 0.8962198390674748, "No": 0.1037798033415015}, "ground_truth": 1}, {"key": "34363669", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8729697450877061, "res": {"Yes": 0.8729697450877061, "No": 0.12702983485434524}, "ground_truth": 0}, {"key": "34363669", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9959872576534591, "res": {"Yes": 0.9959872576534591, "No": 0.004012682534887358}, "ground_truth": 0}, {"key": "36119687", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.25871269984042644, "res": {"No": 0.74128689935768, "Yes": 0.25871269984042644}, "ground_truth": 0}, {"key": "36119687", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8115584723268086, "res": {"Yes": 0.8115584723268086, "No": 0.18844113538274232}, "ground_truth": 0}, {"key": "36119687", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9739727579317498, "res": {"Yes": 0.9739727579317498, "No": 0.02602697965341045}, "ground_truth": 1}, {"key": "36119687", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9600774122604532, "res": {"Yes": 0.9600774122604532, "No": 0.039922388386796785}, "ground_truth": 0}, {"key": "36119687", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8862420159300664, "res": {"Yes": 0.8862420159300664, "No": 0.11375747602659887}, "ground_truth": 0}, {"key": "35217446", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.13887085200767632, "res": {"No": 0.8611285608184628, "Yes": 0.13887085200767632}, "ground_truth": 0}, {"key": "35217446", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.4756515645722312, "res": {"No": 0.5243476292626078, "Yes": 0.4756515645722312}, "ground_truth": 0}, {"key": "35217446", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8985906567167594, "res": {"Yes": 0.8985906567167594, "No": 0.10140884123087876}, "ground_truth": 1}, {"key": "35217446", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.868894278431357, "res": {"Yes": 0.868894278431357, "No": 0.13110436415729362}, "ground_truth": 0}, {"key": "35217446", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9400353387592557, "res": {"Yes": 0.9400353387592557, "No": 0.059964371587989514}, "ground_truth": 0}, {"key": "39049331", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9026709431667181, "res": {"Yes": 0.9026709431667181, "No": 0.09732904739154351}, "ground_truth": 0}, {"key": "39049331", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9581360130931926, "res": {"Yes": 0.9581360130931926, "No": 0.041863904127875384}, "ground_truth": 0}, {"key": "39049331", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9763240678006796, "res": {"Yes": 0.9763240678006796, "No": 0.023675884038873992}, "ground_truth": 1}, {"key": "39049331", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9721939114909393, "res": {"Yes": 0.9721939114909393, "No": 0.02780592921254255}, "ground_truth": 0}, {"key": "39049331", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9658470506171742, "res": {"Yes": 0.9658470506171742, "No": 0.03415285823437168}, "ground_truth": 0}, {"key": "36472242", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9759891904392479, "res": {"Yes": 0.9759891904392479, "No": 0.024010604309267713}, "ground_truth": 0}, {"key": "36472242", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9479074453317252, "res": {"Yes": 0.9479074453317252, "No": 0.05209241115735463}, "ground_truth": 0}, {"key": "36472242", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9526676947473949, "res": {"Yes": 0.9526676947473949, "No": 0.04733220728904633}, "ground_truth": 1}, {"key": "36472242", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9782221358178177, "res": {"Yes": 0.9782221358178177, "No": 0.021777840870004332}, "ground_truth": 0}, {"key": "36472242", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9706879306474563, "res": {"Yes": 0.9706879306474563, "No": 0.029312008514856342}, "ground_truth": 0}, {"key": "31854721", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9632530241048789, "res": {"Yes": 0.9632530241048789, "No": 0.0367469077397345}, "ground_truth": 0}, {"key": "31854721", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9965323516278757, "res": {"Yes": 0.9965323516278757, "No": 0.0034676089012358633}, "ground_truth": 0}, {"key": "31854721", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9981891319037489, "res": {"Yes": 0.9981891319037489, "No": 0.0018107901174093616}, "ground_truth": 1}, {"key": "31854721", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.998818091005264, "res": {"Yes": 0.998818091005264, "No": 0.001181914497210582}, "ground_truth": 0}, {"key": "31854721", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9951559056525089, "res": {"Yes": 0.9951559056525089, "No": 0.004844100529986174}, "ground_truth": 0}, {"key": "18725849", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.6520255374202851, "res": {"Yes": 0.6520255374202851, "No": 0.34797383110823965}, "ground_truth": 0}, {"key": "18725849", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9991411955837843, "res": {"Yes": 0.9991411955837843, "No": 0.0008587269981978595}, "ground_truth": 0}, {"key": "18725849", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9967855167496917, "res": {"Yes": 0.9967855167496917, "No": 0.0032144808878694694}, "ground_truth": 1}, {"key": "18725849", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997788456082386, "res": {"Yes": 0.9997788456082386, "No": 0.00022104395164457025}, "ground_truth": 0}, {"key": "18725849", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9981409228287161, "res": {"Yes": 0.9981409228287161, "No": 0.0018590456766425622}, "ground_truth": 0}, {"key": "36883179", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.017375927633117804, "res": {"No": 0.9826240084023815, "Yes": 0.017375927633117804}, "ground_truth": 0}, {"key": "36883179", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9650433619398385, "res": {"Yes": 0.9650433619398385, "No": 0.034956599543077255}, "ground_truth": 0}, {"key": "36883179", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9983005583890361, "res": {"Yes": 0.9983005583890361, "No": 0.0016993558695253847}, "ground_truth": 1}, {"key": "36883179", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.99290162135002, "res": {"Yes": 0.99290162135002, "No": 0.0070983245617512935}, "ground_truth": 0}, {"key": "36883179", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9702376184598133, "res": {"Yes": 0.9702376184598133, "No": 0.029762293662337253}, "ground_truth": 0}, {"key": "34266359", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.729287754420637, "res": {"Yes": 0.729287754420637, "No": 0.27071206871438525}, "ground_truth": 0}, {"key": "34266359", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9971781885155844, "res": {"Yes": 0.9971781885155844, "No": 0.0028218347077473734}, "ground_truth": 0}, {"key": "34266359", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995128423998156, "res": {"Yes": 0.9995128423998156, "No": 0.0004870764332847475}, "ground_truth": 1}, {"key": "34266359", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9772334028289824, "res": {"Yes": 0.9772334028289824, "No": 0.022766534644822867}, "ground_truth": 0}, {"key": "34266359", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9932403260775993, "res": {"Yes": 0.9932403260775993, "No": 0.006759624079601075}, "ground_truth": 0}, {"key": "31920289", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9858129275809295, "res": {"Yes": 0.9858129275809295, "No": 0.014186810273584656}, "ground_truth": 0}, {"key": "31920289", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.992958618517679, "res": {"Yes": 0.992958618517679, "No": 0.0070413927117785775}, "ground_truth": 0}, {"key": "31920289", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9932398523020767, "res": {"Yes": 0.9932398523020767, "No": 0.006760137830553414}, "ground_truth": 1}, {"key": "31920289", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9932605486565034, "res": {"Yes": 0.9932605486565034, "No": 0.006739370203776798}, "ground_truth": 0}, {"key": "31920289", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9920168568989078, "res": {"Yes": 0.9920168568989078, "No": 0.007983031918640198}, "ground_truth": 0}, {"key": "36292997", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7544627065708593, "res": {"Yes": 0.7544627065708593, "No": 0.24553688466458087}, "ground_truth": 0}, {"key": "36292997", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.6910737717908884, "res": {"Yes": 0.6910737717908884, "No": 0.30892584954241054}, "ground_truth": 0}, {"key": "36292997", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7970760854044677, "res": {"Yes": 0.7970760854044677, "No": 0.20292330078112344}, "ground_truth": 1}, {"key": "36292997", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9729997302806738, "res": {"Yes": 0.9729997302806738, "No": 0.027000154250841433}, "ground_truth": 0}, {"key": "36292997", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.573005324493957, "res": {"Yes": 0.573005324493957, "No": 0.4269941782164377}, "ground_truth": 0}, {"key": "30412533", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9108610445834622, "res": {"Yes": 0.9108610445834622, "No": 0.08913877981222797}, "ground_truth": 0}, {"key": "30412533", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9932523155539377, "res": {"Yes": 0.9932523155539377, "No": 0.006747647185309369}, "ground_truth": 0}, {"key": "30412533", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9948237959532116, "res": {"Yes": 0.9948237959532116, "No": 0.005176215956456604}, "ground_truth": 1}, {"key": "30412533", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9946703286646611, "res": {"Yes": 0.9946703286646611, "No": 0.005329648449081501}, "ground_truth": 0}, {"key": "30412533", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9667769586779071, "res": {"Yes": 0.9667769586779071, "No": 0.03322292929589909}, "ground_truth": 0}, {"key": "40433191", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7387879581435676, "res": {"Yes": 0.7387879581435676, "No": 0.26121148824129337}, "ground_truth": 0}, {"key": "40433191", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9704122517192096, "res": {"Yes": 0.9704122517192096, "No": 0.029587532668820492}, "ground_truth": 0}, {"key": "40433191", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9260627102284203, "res": {"Yes": 0.9260627102284203, "No": 0.07393725876033716}, "ground_truth": 1}, {"key": "40433191", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7518174027565957, "res": {"Yes": 0.7518174027565957, "No": 0.24818233033071946}, "ground_truth": 0}, {"key": "40433191", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.6545676419023831, "res": {"Yes": 0.6545676419023831, "No": 0.3454319555614396}, "ground_truth": 0}, {"key": "34565591", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.946260639912909, "res": {"Yes": 0.946260639912909, "No": 0.053739273214126734}, "ground_truth": 0}, {"key": "34565591", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8971360555475392, "res": {"Yes": 0.8971360555475392, "No": 0.10286377900545116}, "ground_truth": 0}, {"key": "34565591", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996268345435552, "res": {"Yes": 0.9996268345435552, "No": 0.0003731395797029608}, "ground_truth": 1}, {"key": "34565591", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9993027008261431, "res": {"Yes": 0.9993027008261431, "No": 0.0006972813988901308}, "ground_truth": 0}, {"key": "34565591", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9861303335374413, "res": {"Yes": 0.9861303335374413, "No": 0.013869570656015639}, "ground_truth": 0}, {"key": "36062480", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.07487065157694485, "res": {"No": 0.9251292421375777, "Yes": 0.07487065157694485}, "ground_truth": 0}, {"key": "36062480", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.6909688954832837, "res": {"Yes": 0.6909688954832837, "No": 0.3090309948984775}, "ground_truth": 0}, {"key": "36062480", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.741998409016255, "res": {"Yes": 0.741998409016255, "No": 0.2580016238727244}, "ground_truth": 1}, {"key": "36062480", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.715051793821866, "res": {"Yes": 0.715051793821866, "No": 0.2849481343532941}, "ground_truth": 0}, {"key": "36062480", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.3674502292915381, "res": {"No": 0.6325496774764108, "Yes": 0.3674502292915381}, "ground_truth": 0}, {"key": "37276883", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9478585990238372, "res": {"Yes": 0.9478585990238372, "No": 0.05214102041464434}, "ground_truth": 0}, {"key": "37276883", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.999217241875103, "res": {"Yes": 0.999217241875103, "No": 0.0007826914507347215}, "ground_truth": 0}, {"key": "37276883", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9969484039134672, "res": {"Yes": 0.9969484039134672, "No": 0.0030515570818310136}, "ground_truth": 1}, {"key": "37276883", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9920275340337975, "res": {"Yes": 0.9920275340337975, "No": 0.007972437488510864}, "ground_truth": 0}, {"key": "37276883", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9817324352005853, "res": {"Yes": 0.9817324352005853, "No": 0.018267349372276188}, "ground_truth": 0}, {"key": "38509260", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.00799427514776697, "res": {"No": 0.9920057215721862, "Yes": 0.00799427514776697}, "ground_truth": 0}, {"key": "38509260", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8590111301332868, "res": {"Yes": 0.8590111301332868, "No": 0.14098869064673517}, "ground_truth": 0}, {"key": "38509260", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9969834750815867, "res": {"Yes": 0.9969834750815867, "No": 0.003016454934078392}, "ground_truth": 1}, {"key": "38509260", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7581699879293105, "res": {"Yes": 0.7581699879293105, "No": 0.24182980146212965}, "ground_truth": 0}, {"key": "38509260", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.7916210768314366, "res": {"Yes": 0.7916210768314366, "No": 0.20837819606175234}, "ground_truth": 0}, {"key": "37139607", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9853563486064056, "res": {"Yes": 0.9853563486064056, "No": 0.014643326153160493}, "ground_truth": 0}, {"key": "37139607", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996071771750973, "res": {"Yes": 0.9996071771750973, "No": 0.0003927300805505639}, "ground_truth": 0}, {"key": "37139607", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998514161896679, "res": {"Yes": 0.9998514161896679, "No": 0.00014849815857006287}, "ground_truth": 1}, {"key": "37139607", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9939087716118937, "res": {"Yes": 0.9939087716118937, "No": 0.006091140138027445}, "ground_truth": 0}, {"key": "37139607", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8573426631364044, "res": {"Yes": 0.8573426631364044, "No": 0.14265680475596954}, "ground_truth": 0}, {"key": "37092824", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8527226166616775, "res": {"Yes": 0.8527226166616775, "No": 0.14727706062353335}, "ground_truth": 0}, {"key": "37092824", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9862450064533914, "res": {"Yes": 0.9862450064533914, "No": 0.013754778334536139}, "ground_truth": 0}, {"key": "37092824", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9701091500408053, "res": {"Yes": 0.9701091500408053, "No": 0.029890672612860995}, "ground_truth": 1}, {"key": "37092824", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8271933950229484, "res": {"Yes": 0.8271933950229484, "No": 0.1728060865745769}, "ground_truth": 0}, {"key": "37092824", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9791542138978969, "res": {"Yes": 0.9791542138978969, "No": 0.020845236959998095}, "ground_truth": 0}, {"key": "32191802", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.16623979247130352, "res": {"No": 0.833760148260244, "Yes": 0.16623979247130352}, "ground_truth": 0}, {"key": "32191802", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.4909810876273631, "res": {"No": 0.5090188345861523, "Yes": 0.4909810876273631}, "ground_truth": 0}, {"key": "32191802", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.758257864923285, "res": {"Yes": 0.758257864923285, "No": 0.2417415980398387}, "ground_truth": 1}, {"key": "32191802", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9484377364400156, "res": {"Yes": 0.9484377364400156, "No": 0.051562098581685714}, "ground_truth": 0}, {"key": "32191802", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9865317071319933, "res": {"Yes": 0.9865317071319933, "No": 0.013468185559117087}, "ground_truth": 0}, {"key": "39396038", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9004938265228583, "res": {"Yes": 0.9004938265228583, "No": 0.09950609305090172}, "ground_truth": 0}, {"key": "39396038", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9645792963827607, "res": {"Yes": 0.9645792963827607, "No": 0.03542022834028028}, "ground_truth": 0}, {"key": "39396038", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9656394670017221, "res": {"Yes": 0.9656394670017221, "No": 0.034360452931903825}, "ground_truth": 1}, {"key": "39396038", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9107008509660948, "res": {"Yes": 0.9107008509660948, "No": 0.08929903477041828}, "ground_truth": 0}, {"key": "39396038", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.977053000697299, "res": {"Yes": 0.977053000697299, "No": 0.02294698783750345}, "ground_truth": 0}, {"key": "39076884", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.32227750107414044, "res": {"No": 0.6777220709069032, "Yes": 0.32227750107414044}, "ground_truth": 0}, {"key": "39076884", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.5104354979984491, "res": {"Yes": 0.5104354979984491, "No": 0.48956449930618906}, "ground_truth": 0}, {"key": "39076884", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7527515594137915, "res": {"Yes": 0.7527515594137915, "No": 0.24724824662772782}, "ground_truth": 1}, {"key": "39076884", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7150851160118591, "res": {"Yes": 0.7150851160118591, "No": 0.28491468340593}, "ground_truth": 0}, {"key": "39076884", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.7720327095280121, "res": {"Yes": 0.7720327095280121, "No": 0.2279670677849008}, "ground_truth": 0}, {"key": "27763432", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9212626262586759, "res": {"Yes": 0.9212626262586759, "No": 0.0787372436338015}, "ground_truth": 0}, {"key": "27763432", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9746662434475616, "res": {"Yes": 0.9746662434475616, "No": 0.025333600027956323}, "ground_truth": 0}, {"key": "27763432", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9754558761273933, "res": {"Yes": 0.9754558761273933, "No": 0.024544082315738753}, "ground_truth": 1}, {"key": "27763432", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8957379950571754, "res": {"Yes": 0.8957379950571754, "No": 0.10426192879053432}, "ground_truth": 0}, {"key": "27763432", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9929694425206863, "res": {"Yes": 0.9929694425206863, "No": 0.007030545360464125}, "ground_truth": 0}, {"key": "37806929", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9947746124836855, "res": {"Yes": 0.9947746124836855, "No": 0.0052252585453073505}, "ground_truth": 0}, {"key": "37806929", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9906364545803735, "res": {"Yes": 0.9906364545803735, "No": 0.00936323901258931}, "ground_truth": 0}, {"key": "37806929", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9953888839899867, "res": {"Yes": 0.9953888839899867, "No": 0.004610953547902135}, "ground_truth": 1}, {"key": "37806929", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997165361334757, "res": {"Yes": 0.9997165361334757, "No": 0.00028338773774785165}, "ground_truth": 0}, {"key": "37806929", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9987686244658008, "res": {"Yes": 0.9987686244658008, "No": 0.0012313526480233488}, "ground_truth": 0}, {"key": "32334186", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9887390337359594, "res": {"Yes": 0.9887390337359594, "No": 0.011260813635001634}, "ground_truth": 0}, {"key": "32334186", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.99852419946622, "res": {"Yes": 0.99852419946622, "No": 0.001475766150465383}, "ground_truth": 0}, {"key": "32334186", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9993975857243853, "res": {"Yes": 0.9993975857243853, "No": 0.0006024023382072408}, "ground_truth": 1}, {"key": "32334186", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9988656660436974, "res": {"Yes": 0.9988656660436974, "No": 0.001134294961903489}, "ground_truth": 0}, {"key": "32334186", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9947415781589558, "res": {"Yes": 0.9947415781589558, "No": 0.005258456691194382}, "ground_truth": 0}, {"key": "36187324", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.322362561329881, "res": {"No": 0.6776372457441814, "Yes": 0.322362561329881}, "ground_truth": 0}, {"key": "36187324", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8582458002877764, "res": {"Yes": 0.8582458002877764, "No": 0.14175413014122196}, "ground_truth": 0}, {"key": "36187324", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9112154603241149, "res": {"Yes": 0.9112154603241149, "No": 0.08878448168989524}, "ground_truth": 1}, {"key": "36187324", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9453528310327282, "res": {"Yes": 0.9453528310327282, "No": 0.05464708528498135}, "ground_truth": 0}, {"key": "36187324", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8477381213902057, "res": {"Yes": 0.8477381213902057, "No": 0.15226149110532702}, "ground_truth": 0}, {"key": "35306009", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.3528050997077615, "res": {"No": 0.6471947015492284, "Yes": 0.3528050997077615}, "ground_truth": 0}, {"key": "35306009", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.7205470671496944, "res": {"Yes": 0.7205470671496944, "No": 0.2794526529058962}, "ground_truth": 0}, {"key": "35306009", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.746151959843641, "res": {"Yes": 0.746151959843641, "No": 0.25384800413474623}, "ground_truth": 1}, {"key": "35306009", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5851231740394822, "res": {"Yes": 0.5851231740394822, "No": 0.414876476518689}, "ground_truth": 0}, {"key": "35306009", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8931708008904875, "res": {"Yes": 0.8931708008904875, "No": 0.10682877071686304}, "ground_truth": 0}, {"key": "39490050", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.993840236113271, "res": {"Yes": 0.993840236113271, "No": 0.00615964581903267}, "ground_truth": 0}, {"key": "39490050", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9987865727988811, "res": {"Yes": 0.9987865727988811, "No": 0.0012134035703814565}, "ground_truth": 0}, {"key": "39490050", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9956583732301294, "res": {"Yes": 0.9956583732301294, "No": 0.00434164791075414}, "ground_truth": 1}, {"key": "39490050", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.997496560544253, "res": {"Yes": 0.997496560544253, "No": 0.0025033911656931035}, "ground_truth": 0}, {"key": "39490050", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9989091947610227, "res": {"Yes": 0.9989091947610227, "No": 0.0010907300895881987}, "ground_truth": 0}, {"key": "38072149", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9655751256907685, "res": {"Yes": 0.9655751256907685, "No": 0.034424818731075627}, "ground_truth": 0}, {"key": "38072149", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9648985043635095, "res": {"Yes": 0.9648985043635095, "No": 0.03510140924100139}, "ground_truth": 0}, {"key": "38072149", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9392837051477223, "res": {"Yes": 0.9392837051477223, "No": 0.060716201833526676}, "ground_truth": 1}, {"key": "38072149", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9761010530184365, "res": {"Yes": 0.9761010530184365, "No": 0.023898848526806845}, "ground_truth": 0}, {"key": "38072149", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9974807903480434, "res": {"Yes": 0.9974807903480434, "No": 0.0025191392582289544}, "ground_truth": 0}, {"key": "35899689", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9508405957443117, "res": {"Yes": 0.9508405957443117, "No": 0.04915931148383793}, "ground_truth": 0}, {"key": "35899689", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9710187188182058, "res": {"Yes": 0.9710187188182058, "No": 0.028981050314789314}, "ground_truth": 0}, {"key": "35899689", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9481440090342789, "res": {"Yes": 0.9481440090342789, "No": 0.05185537423579691}, "ground_truth": 1}, {"key": "35899689", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9498986901704948, "res": {"Yes": 0.9498986901704948, "No": 0.050101015581948356}, "ground_truth": 0}, {"key": "35899689", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9458511443566713, "res": {"Yes": 0.9458511443566713, "No": 0.05414827294226962}, "ground_truth": 0}, {"key": "27994518", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7143663164196457, "res": {"Yes": 0.7143663164196457, "No": 0.28563331675950926}, "ground_truth": 0}, {"key": "27994518", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8660253745352023, "res": {"Yes": 0.8660253745352023, "No": 0.13397455300671954}, "ground_truth": 0}, {"key": "27994518", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9974807903480434, "res": {"Yes": 0.9974807903480434, "No": 0.0025192158412264626}, "ground_truth": 1}, {"key": "27994518", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994796306391235, "res": {"Yes": 0.9994796306391235, "No": 0.0005202559270810111}, "ground_truth": 0}, {"key": "27994518", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9980011576514032, "res": {"Yes": 0.9980011576514032, "No": 0.0019988765125186388}, "ground_truth": 0}, {"key": "10615479", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8248165178406331, "res": {"Yes": 0.8248165178406331, "No": 0.175183356874626}, "ground_truth": 0}, {"key": "10615479", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9971280513575786, "res": {"Yes": 0.9971280513575786, "No": 0.002871763436773821}, "ground_truth": 0}, {"key": "10615479", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9727466538600281, "res": {"Yes": 0.9727466538600281, "No": 0.027252723988175853}, "ground_truth": 1}, {"key": "10615479", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9947385100822378, "res": {"Yes": 0.9947385100822378, "No": 0.0052611707550152854}, "ground_truth": 0}, {"key": "10615479", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9934151150028377, "res": {"Yes": 0.9934151150028377, "No": 0.006584754013931269}, "ground_truth": 0}, {"key": "40186667", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.717150971912398, "res": {"Yes": 0.717150971912398, "No": 0.28284874335482374}, "ground_truth": 0}, {"key": "40186667", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.7163782495633402, "res": {"Yes": 0.7163782495633402, "No": 0.2836216634216955}, "ground_truth": 0}, {"key": "40186667", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.965471093472381, "res": {"Yes": 0.965471093472381, "No": 0.034528817974777457}, "ground_truth": 1}, {"key": "40186667", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9284181360829341, "res": {"Yes": 0.9284181360829341, "No": 0.07158180911031725}, "ground_truth": 0}, {"key": "40186667", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9582348489621301, "res": {"Yes": 0.9582348489621301, "No": 0.04176502133249272}, "ground_truth": 0}, {"key": "38622886", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.44444307387603715, "res": {"No": 0.555556508279772, "Yes": 0.44444307387603715}, "ground_truth": 0}, {"key": "38622886", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9853392193205296, "res": {"Yes": 0.9853392193205296, "No": 0.014660682915702288}, "ground_truth": 0}, {"key": "38622886", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9902117330035896, "res": {"Yes": 0.9902117330035896, "No": 0.009788123656517055}, "ground_truth": 1}, {"key": "38622886", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9902563895890443, "res": {"Yes": 0.9902563895890443, "No": 0.009743528090162077}, "ground_truth": 0}, {"key": "38622886", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9956125282001739, "res": {"Yes": 0.9956125282001739, "No": 0.0043875161907391685}, "ground_truth": 0}, {"key": "40686943", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7309975579029333, "res": {"Yes": 0.7309975579029333, "No": 0.2690020711412734}, "ground_truth": 0}, {"key": "40686943", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9673219867792604, "res": {"Yes": 0.9673219867792604, "No": 0.03267764415072294}, "ground_truth": 0}, {"key": "40686943", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9980533868138126, "res": {"Yes": 0.9980533868138126, "No": 0.0019465914699627526}, "ground_truth": 1}, {"key": "40686943", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9985894525585289, "res": {"Yes": 0.9985894525585289, "No": 0.0014105465860894029}, "ground_truth": 0}, {"key": "40686943", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9849370967665194, "res": {"Yes": 0.9849370967665194, "No": 0.015062832398869784}, "ground_truth": 0}, {"key": "30604567", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.760177378629577, "res": {"Yes": 0.760177378629577, "No": 0.23982262293308193}, "ground_truth": 0}, {"key": "30604567", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.26847512866341444, "res": {"No": 0.7315247313518752, "Yes": 0.26847512866341444}, "ground_truth": 0}, {"key": "30604567", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9804937153541354, "res": {"Yes": 0.9804937153541354, "No": 0.019506270519516544}, "ground_truth": 1}, {"key": "30604567", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9696653820095263, "res": {"Yes": 0.9696653820095263, "No": 0.030334530344663917}, "ground_truth": 0}, {"key": "30604567", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.904999829210417, "res": {"Yes": 0.904999829210417, "No": 0.09499989430131044}, "ground_truth": 0}, {"key": "35440903", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.998126432522864, "res": {"Yes": 0.998126432522864, "No": 0.0018735557182136702}, "ground_truth": 0}, {"key": "35440903", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9870982109404141, "res": {"Yes": 0.9870982109404141, "No": 0.012901657338682081}, "ground_truth": 0}, {"key": "35440903", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9879334754911893, "res": {"Yes": 0.9879334754911893, "No": 0.012066460917497634}, "ground_truth": 1}, {"key": "35440903", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9993569919183586, "res": {"Yes": 0.9993569919183586, "No": 0.0006429915341419711}, "ground_truth": 0}, {"key": "35440903", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9971076661803628, "res": {"Yes": 0.9971076661803628, "No": 0.002892275547516953}, "ground_truth": 0}, {"key": "37219533", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.3752519057001683, "res": {"No": 0.6247477129612914, "Yes": 0.3752519057001683}, "ground_truth": 0}, {"key": "37219533", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9769088315316933, "res": {"Yes": 0.9769088315316933, "No": 0.02309093875725003}, "ground_truth": 0}, {"key": "37219533", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9988405721564435, "res": {"Yes": 0.9988405721564435, "No": 0.0011594297378999625}, "ground_truth": 1}, {"key": "37219533", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999185128062618, "res": {"Yes": 0.9999185128062618, "No": 8.137267738196406e-05}, "ground_truth": 0}, {"key": "37219533", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9802550637980338, "res": {"Yes": 0.9802550637980338, "No": 0.019744952300946}, "ground_truth": 0}, {"key": "40178965", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8315905389391883, "res": {"Yes": 0.8315905389391883, "No": 0.1684092920924399}, "ground_truth": 0}, {"key": "40178965", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.18598811595968986, "res": {"No": 0.8140114229255404, "Yes": 0.18598811595968986}, "ground_truth": 0}, {"key": "40178965", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7425528740491856, "res": {"Yes": 0.7425528740491856, "No": 0.25744694987661526}, "ground_truth": 1}, {"key": "40178965", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8711259924065968, "res": {"Yes": 0.8711259924065968, "No": 0.12887375464114323}, "ground_truth": 0}, {"key": "40178965", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.6744331895880662, "res": {"Yes": 0.6744331895880662, "No": 0.32556654464326606}, "ground_truth": 0}, {"key": "13750468", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9983523636455722, "res": {"Yes": 0.9983523636455722, "No": 0.0016476294847287286}, "ground_truth": 0}, {"key": "13750468", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9986845572543008, "res": {"Yes": 0.9986845572543008, "No": 0.001315346039580239}, "ground_truth": 0}, {"key": "13750468", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9987682673062046, "res": {"Yes": 0.9987682673062046, "No": 0.0012315065767246947}, "ground_truth": 1}, {"key": "13750468", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995423872866515, "res": {"Yes": 0.9995423872866515, "No": 0.0004575009485122626}, "ground_truth": 0}, {"key": "13750468", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9864676803423932, "res": {"Yes": 0.9864676803423932, "No": 0.01353204097581969}, "ground_truth": 0}, {"key": "17754949", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9276118395916232, "res": {"Yes": 0.9276118395916232, "No": 0.07238810417973955}, "ground_truth": 0}, {"key": "17754949", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9152660966266297, "res": {"Yes": 0.9152660966266297, "No": 0.08473383401517316}, "ground_truth": 0}, {"key": "17754949", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9832642657534802, "res": {"Yes": 0.9832642657534802, "No": 0.016735768895838964}, "ground_truth": 1}, {"key": "17754949", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9111911084163417, "res": {"Yes": 0.9111911084163417, "No": 0.08880887413716468}, "ground_truth": 0}, {"key": "17754949", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9921066186448801, "res": {"Yes": 0.9921066186448801, "No": 0.007893359046969743}, "ground_truth": 0}, {"key": "36675623", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9178672616215818, "res": {"Yes": 0.9178672616215818, "No": 0.08213249032037748}, "ground_truth": 0}, {"key": "36675623", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.14899412549827265, "res": {"No": 0.8510054978256163, "Yes": 0.14899412549827265}, "ground_truth": 0}, {"key": "36675623", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9714844922292243, "res": {"Yes": 0.9714844922292243, "No": 0.028515391068045823}, "ground_truth": 1}, {"key": "36675623", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4535718897225688, "res": {"No": 0.5464280150426138, "Yes": 0.4535718897225688}, "ground_truth": 0}, {"key": "36675623", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9495397722829014, "res": {"Yes": 0.9495397722829014, "No": 0.05046005731094766}, "ground_truth": 0}, {"key": "40035440", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9033546827637213, "res": {"Yes": 0.9033546827637213, "No": 0.09664514571403994}, "ground_truth": 0}, {"key": "40035440", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.0629386397142492, "res": {"No": 0.9370612131582672, "Yes": 0.0629386397142492}, "ground_truth": 0}, {"key": "40035440", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7616034694804965, "res": {"Yes": 0.7616034694804965, "No": 0.23839636294750455}, "ground_truth": 1}, {"key": "40035440", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8113818558462924, "res": {"Yes": 0.8113818558462924, "No": 0.18861803267845978}, "ground_truth": 0}, {"key": "40035440", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.946582257578634, "res": {"Yes": 0.946582257578634, "No": 0.05341759996277889}, "ground_truth": 0}, {"key": "37685909", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7528288032142283, "res": {"Yes": 0.7528288032142283, "No": 0.24717084531951578}, "ground_truth": 0}, {"key": "37685909", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9484315109151458, "res": {"Yes": 0.9484315109151458, "No": 0.05156825546381492}, "ground_truth": 0}, {"key": "37685909", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9935844341885128, "res": {"Yes": 0.9935844341885128, "No": 0.006415495776223533}, "ground_truth": 1}, {"key": "37685909", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9780444246514204, "res": {"Yes": 0.9780444246514204, "No": 0.02195554394117049}, "ground_truth": 0}, {"key": "37685909", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9922260824185043, "res": {"Yes": 0.9922260824185043, "No": 0.007773875201497798}, "ground_truth": 0}, {"key": "36938787", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9949690570990738, "res": {"Yes": 0.9949690570990738, "No": 0.0050309753857566515}, "ground_truth": 0}, {"key": "36938787", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998453414008744, "res": {"Yes": 0.9998453414008744, "No": 0.00015458259639162497}, "ground_truth": 0}, {"key": "36938787", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999628539429318, "res": {"Yes": 0.9999628539429318, "No": 3.7021045860089635e-05}, "ground_truth": 1}, {"key": "36938787", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995968178997561, "res": {"Yes": 0.9995968178997561, "No": 0.0004030772205924319}, "ground_truth": 0}, {"key": "36938787", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999172016779703, "res": {"Yes": 0.9999172016779703, "No": 8.277512205439053e-05}, "ground_truth": 0}, {"key": "39398068", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9900651786504563, "res": {"Yes": 0.9900651786504563, "No": 0.009934691055893622}, "ground_truth": 0}, {"key": "39398068", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9448751373562962, "res": {"Yes": 0.9448751373562962, "No": 0.05512477648043703}, "ground_truth": 0}, {"key": "39398068", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9962431002730636, "res": {"Yes": 0.9962431002730636, "No": 0.0037568941006429074}, "ground_truth": 1}, {"key": "39398068", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.961254583406922, "res": {"Yes": 0.961254583406922, "No": 0.03874524656490686}, "ground_truth": 0}, {"key": "39398068", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8050179690592248, "res": {"Yes": 0.8050179690592248, "No": 0.19498200631182386}, "ground_truth": 0}, {"key": "39926408", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.949630603904414, "res": {"Yes": 0.949630603904414, "No": 0.05036910439030434}, "ground_truth": 0}, {"key": "39926408", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9990600376716128, "res": {"Yes": 0.9990600376716128, "No": 0.0009398542950318982}, "ground_truth": 0}, {"key": "39926408", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9993054405681485, "res": {"Yes": 0.9993054405681485, "No": 0.0006944697175224054}, "ground_truth": 1}, {"key": "39926408", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996047940544512, "res": {"Yes": 0.9996047940544512, "No": 0.0003951018413863826}, "ground_truth": 0}, {"key": "39926408", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.99797456127495, "res": {"Yes": 0.99797456127495, "No": 0.002025364894179902}, "ground_truth": 0}, {"key": "40465336", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9649024951919766, "res": {"Yes": 0.9649024951919766, "No": 0.03509713414971418}, "ground_truth": 0}, {"key": "40465336", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9829913081902033, "res": {"Yes": 0.9829913081902033, "No": 0.017008405771202623}, "ground_truth": 0}, {"key": "40465336", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9532904141234625, "res": {"Yes": 0.9532904141234625, "No": 0.046709322918613246}, "ground_truth": 1}, {"key": "40465336", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9806179655067335, "res": {"Yes": 0.9806179655067335, "No": 0.019381974389994575}, "ground_truth": 0}, {"key": "40465336", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.7670712787924464, "res": {"Yes": 0.7670712787924464, "No": 0.23292836943488313}, "ground_truth": 0}, {"key": "34173549", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7612006954022982, "res": {"Yes": 0.7612006954022982, "No": 0.23879894867664342}, "ground_truth": 0}, {"key": "34173549", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9220288640541493, "res": {"Yes": 0.9220288640541493, "No": 0.07797055683726757}, "ground_truth": 0}, {"key": "34173549", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9986646985094585, "res": {"Yes": 0.9986646985094585, "No": 0.0013352074579361776}, "ground_truth": 1}, {"key": "34173549", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9982589856269047, "res": {"Yes": 0.9982589856269047, "No": 0.0017408043542730999}, "ground_truth": 0}, {"key": "34173549", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9923691558581743, "res": {"Yes": 0.9923691558581743, "No": 0.007630639677835339}, "ground_truth": 0}, {"key": "33541535", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9187092382223712, "res": {"Yes": 0.9187092382223712, "No": 0.08129067237055168}, "ground_truth": 0}, {"key": "33541535", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.6199476127980257, "res": {"Yes": 0.6199476127980257, "No": 0.3800521495575107}, "ground_truth": 0}, {"key": "33541535", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9715245415314225, "res": {"Yes": 0.9715245415314225, "No": 0.028475369313147106}, "ground_truth": 1}, {"key": "33541535", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7517946982138728, "res": {"Yes": 0.7517946982138728, "No": 0.24820513933493452}, "ground_truth": 0}, {"key": "33541535", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8444779611466796, "res": {"Yes": 0.8444779611466796, "No": 0.15552180999428422}, "ground_truth": 0}, {"key": "35685195", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.3063063910601007, "res": {"No": 0.6936931938588834, "Yes": 0.3063063910601007}, "ground_truth": 0}, {"key": "35685195", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.14427545856224006, "res": {"No": 0.8557243292301474, "Yes": 0.14427545856224006}, "ground_truth": 0}, {"key": "35685195", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.30091799295759497, "res": {"No": 0.6990817575480357, "Yes": 0.30091799295759497}, "ground_truth": 1}, {"key": "35685195", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8783857684777716, "res": {"Yes": 0.8783857684777716, "No": 0.12161398595337156}, "ground_truth": 0}, {"key": "35685195", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.058765778018333714, "res": {"No": 0.9412340175864788, "Yes": 0.058765778018333714}, "ground_truth": 0}, {"key": "28440730", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.6190060391776379, "res": {"Yes": 0.6190060391776379, "No": 0.38099358929285504}, "ground_truth": 0}, {"key": "28440730", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9832617309014706, "res": {"Yes": 0.9832617309014706, "No": 0.016738272754149717}, "ground_truth": 0}, {"key": "28440730", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.996151055747529, "res": {"Yes": 0.996151055747529, "No": 0.0038489495964453765}, "ground_truth": 1}, {"key": "28440730", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994759372688684, "res": {"Yes": 0.9994759372688684, "No": 0.0005239726865009361}, "ground_truth": 0}, {"key": "28440730", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9985967064385176, "res": {"Yes": 0.9985967064385176, "No": 0.0014032202572274257}, "ground_truth": 0}, {"key": "38338714", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.998024903575042, "res": {"Yes": 0.998024903575042, "No": 0.0019750412396729302}, "ground_truth": 0}, {"key": "38338714", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998230540167875, "res": {"Yes": 0.9998230540167875, "No": 0.0001768292465108305}, "ground_truth": 0}, {"key": "38338714", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998275829655257, "res": {"Yes": 0.9998275829655257, "No": 0.00017232712464120508}, "ground_truth": 1}, {"key": "38338714", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998809674199398, "res": {"Yes": 0.9998809674199398, "No": 0.00011893707336099929}, "ground_truth": 0}, {"key": "38338714", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998648843089473, "res": {"Yes": 0.9998648843089473, "No": 0.0001350655961673365}, "ground_truth": 0}, {"key": "32191881", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.04587700811231443, "res": {"No": 0.9541226861194452, "Yes": 0.04587700811231443}, "ground_truth": 0}, {"key": "32191881", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9754753844645329, "res": {"Yes": 0.9754753844645329, "No": 0.024524545642549505}, "ground_truth": 0}, {"key": "32191881", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9992757945634348, "res": {"Yes": 0.9992757945634348, "No": 0.0007241514517175669}, "ground_truth": 1}, {"key": "32191881", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5087727631163703, "res": {"Yes": 0.5087727631163703, "No": 0.4912269686760016}, "ground_truth": 0}, {"key": "32191881", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.998126432522864, "res": {"Yes": 0.998126432522864, "No": 0.00187339122724287}, "ground_truth": 0}, {"key": "37707251", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8587967133593756, "res": {"Yes": 0.8587967133593756, "No": 0.14120321289047974}, "ground_truth": 0}, {"key": "37707251", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9716814068312634, "res": {"Yes": 0.9716814068312634, "No": 0.028318221847460974}, "ground_truth": 0}, {"key": "37707251", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9952626348801777, "res": {"Yes": 0.9952626348801777, "No": 0.004737304385906016}, "ground_truth": 1}, {"key": "37707251", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9765820838070226, "res": {"Yes": 0.9765820838070226, "No": 0.023417761642402196}, "ground_truth": 0}, {"key": "37707251", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9964858212305079, "res": {"Yes": 0.9964858212305079, "No": 0.0035141204943751675}, "ground_truth": 0}, {"key": "40172567", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8197101473939966, "res": {"Yes": 0.8197101473939966, "No": 0.18028911900209174}, "ground_truth": 0}, {"key": "40172567", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9164085210959391, "res": {"Yes": 0.9164085210959391, "No": 0.08359090137848885}, "ground_truth": 0}, {"key": "40172567", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9630328457783384, "res": {"Yes": 0.9630328457783384, "No": 0.03696695032385373}, "ground_truth": 1}, {"key": "40172567", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.954867495782033, "res": {"Yes": 0.954867495782033, "No": 0.04513225949647437}, "ground_truth": 0}, {"key": "40172567", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9859635501069348, "res": {"Yes": 0.9859635501069348, "No": 0.01403619820742122}, "ground_truth": 0}, {"key": "33113255", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.1626374969346967, "res": {"No": 0.8373622135100832, "Yes": 0.1626374969346967}, "ground_truth": 0}, {"key": "33113255", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9561116156249916, "res": {"Yes": 0.9561116156249916, "No": 0.043888155573882745}, "ground_truth": 0}, {"key": "33113255", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9682441116900341, "res": {"Yes": 0.9682441116900341, "No": 0.0317558251874887}, "ground_truth": 1}, {"key": "33113255", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9543789702170766, "res": {"Yes": 0.9543789702170766, "No": 0.04562088330873467}, "ground_truth": 0}, {"key": "33113255", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9124058654478585, "res": {"Yes": 0.9124058654478585, "No": 0.0875940154635167}, "ground_truth": 0}, {"key": "33022143", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9750322856942175, "res": {"Yes": 0.9750322856942175, "No": 0.02496753495070405}, "ground_truth": 0}, {"key": "33022143", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9989903256644935, "res": {"Yes": 0.9989903256644935, "No": 0.0010096044081022513}, "ground_truth": 0}, {"key": "33022143", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9964887801998986, "res": {"Yes": 0.9964887801998986, "No": 0.00351126221757652}, "ground_truth": 1}, {"key": "33022143", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9979908262970947, "res": {"Yes": 0.9979908262970947, "No": 0.002009099434197659}, "ground_truth": 0}, {"key": "33022143", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999677411203288, "res": {"Yes": 0.9999677411203288, "No": 3.220386762658472e-05}, "ground_truth": 0}, {"key": "32084473", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9279590632927727, "res": {"Yes": 0.9279590632927727, "No": 0.07204076065091819}, "ground_truth": 0}, {"key": "32084473", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9468502161405258, "res": {"Yes": 0.9468502161405258, "No": 0.05314953091049073}, "ground_truth": 0}, {"key": "32084473", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9475882400703296, "res": {"Yes": 0.9475882400703296, "No": 0.052411481450805514}, "ground_truth": 1}, {"key": "32084473", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9825208864415468, "res": {"Yes": 0.9825208864415468, "No": 0.017479141906095612}, "ground_truth": 0}, {"key": "32084473", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9981634857598475, "res": {"Yes": 0.9981634857598475, "No": 0.0018364470961639216}, "ground_truth": 0}, {"key": "40564245", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9742611082312396, "res": {"Yes": 0.9742611082312396, "No": 0.025738148763770584}, "ground_truth": 0}, {"key": "40564245", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9970423708283569, "res": {"Yes": 0.9970423708283569, "No": 0.00295749693044704}, "ground_truth": 0}, {"key": "40564245", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8628299521621284, "res": {"Yes": 0.8628299521621284, "No": 0.13716943026446635}, "ground_truth": 1}, {"key": "40564245", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9915107895838031, "res": {"Yes": 0.9915107895838031, "No": 0.008488989568946524}, "ground_truth": 0}, {"key": "40564245", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9667969005919039, "res": {"Yes": 0.9667969005919039, "No": 0.03320283550266042}, "ground_truth": 0}, {"key": "31717213", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8633937799685851, "res": {"Yes": 0.8633937799685851, "No": 0.13660612416736595}, "ground_truth": 0}, {"key": "31717213", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8842160144128957, "res": {"Yes": 0.8842160144128957, "No": 0.11578366337551033}, "ground_truth": 0}, {"key": "31717213", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9962162417218893, "res": {"Yes": 0.9962162417218893, "No": 0.003783775342236119}, "ground_truth": 1}, {"key": "31717213", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9761153676453408, "res": {"Yes": 0.9761153676453408, "No": 0.023884525460883016}, "ground_truth": 0}, {"key": "31717213", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9980802345115978, "res": {"Yes": 0.9980802345115978, "No": 0.0019197122000216605}, "ground_truth": 0}, {"key": "34861894", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8299263226692228, "res": {"Yes": 0.8299263226692228, "No": 0.17007356504093202}, "ground_truth": 0}, {"key": "34861894", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8668002334465494, "res": {"Yes": 0.8668002334465494, "No": 0.13319968310103997}, "ground_truth": 0}, {"key": "34861894", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.877051680159583, "res": {"Yes": 0.877051680159583, "No": 0.12294832916018918}, "ground_truth": 1}, {"key": "34861894", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9783933846775807, "res": {"Yes": 0.9783933846775807, "No": 0.021606540698802638}, "ground_truth": 0}, {"key": "34861894", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9238089802901754, "res": {"Yes": 0.9238089802901754, "No": 0.07619099493339926}, "ground_truth": 0}, {"key": "40838760", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.5742472650908873, "res": {"Yes": 0.5742472650908873, "No": 0.4257524733759832}, "ground_truth": 0}, {"key": "40838760", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.5548971098942675, "res": {"Yes": 0.5548971098942675, "No": 0.44510253839488034}, "ground_truth": 0}, {"key": "40838760", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.40775058613330467, "res": {"No": 0.5922492513628583, "Yes": 0.40775058613330467}, "ground_truth": 1}, {"key": "40838760", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.6873045574014609, "res": {"Yes": 0.6873045574014609, "No": 0.31269534534452886}, "ground_truth": 0}, {"key": "40838760", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.7438605087250134, "res": {"Yes": 0.7438605087250134, "No": 0.2561394131798881}, "ground_truth": 0}, {"key": "40044849", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.794925110018191, "res": {"Yes": 0.794925110018191, "No": 0.20507469507284}, "ground_truth": 0}, {"key": "40044849", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9993243699408435, "res": {"Yes": 0.9993243699408435, "No": 0.0006755441795256634}, "ground_truth": 0}, {"key": "40044849", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9986057384869944, "res": {"Yes": 0.9986057384869944, "No": 0.0013942313767013402}, "ground_truth": 1}, {"key": "40044849", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9993975857243853, "res": {"Yes": 0.9993975857243853, "No": 0.0006023860735636827}, "ground_truth": 0}, {"key": "40044849", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996812698052643, "res": {"Yes": 0.9996812698052643, "No": 0.000318616300148539}, "ground_truth": 0}, {"key": "30296116", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9715698787819762, "res": {"Yes": 0.9715698787819762, "No": 0.02843002710287859}, "ground_truth": 0}, {"key": "30296116", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9991642757621892, "res": {"Yes": 0.9991642757621892, "No": 0.0008356762338296128}, "ground_truth": 0}, {"key": "30296116", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999316242277296, "res": {"Yes": 0.9999316242277296, "No": 6.831316892115482e-05}, "ground_truth": 1}, {"key": "30296116", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9928822266956228, "res": {"Yes": 0.9928822266956228, "No": 0.007117681063047991}, "ground_truth": 0}, {"key": "30296116", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9988656660436974, "res": {"Yes": 0.9988656660436974, "No": 0.001134256056253531}, "ground_truth": 0}, {"key": "34931360", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.6882226804305503, "res": {"Yes": 0.6882226804305503, "No": 0.3117769969799328}, "ground_truth": 0}, {"key": "34931360", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9850949588823568, "res": {"Yes": 0.9850949588823568, "No": 0.014904851723593127}, "ground_truth": 0}, {"key": "34931360", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9985663927979926, "res": {"Yes": 0.9985663927979926, "No": 0.0014335515677081656}, "ground_truth": 1}, {"key": "34931360", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.897887856393499, "res": {"Yes": 0.897887856393499, "No": 0.10211179872583667}, "ground_truth": 0}, {"key": "34931360", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9977422695161215, "res": {"Yes": 0.9977422695161215, "No": 0.002257708315170351}, "ground_truth": 0}, {"key": "18862422", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7496911179617137, "res": {"Yes": 0.7496911179617137, "No": 0.25030883087604355}, "ground_truth": 0}, {"key": "18862422", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9734806354959792, "res": {"Yes": 0.9734806354959792, "No": 0.02651921538198893}, "ground_truth": 0}, {"key": "18862422", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9898722891830753, "res": {"Yes": 0.9898722891830753, "No": 0.010127655791390688}, "ground_truth": 1}, {"key": "18862422", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.983077831943901, "res": {"Yes": 0.983077831943901, "No": 0.016922210316042602}, "ground_truth": 0}, {"key": "18862422", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9975060510709268, "res": {"Yes": 0.9975060510709268, "No": 0.0024939120424714504}, "ground_truth": 0}, {"key": "36361140", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9965928505516191, "res": {"Yes": 0.9965928505516191, "No": 0.0034070822470061}, "ground_truth": 0}, {"key": "36361140", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9951769151081808, "res": {"Yes": 0.9951769151081808, "No": 0.004822905652891858}, "ground_truth": 0}, {"key": "36361140", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9992818696191378, "res": {"Yes": 0.9992818696191378, "No": 0.0007180980057299194}, "ground_truth": 1}, {"key": "36361140", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9971664572817918, "res": {"Yes": 0.9971664572817918, "No": 0.0028335158176459097}, "ground_truth": 0}, {"key": "36361140", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9835118897538668, "res": {"Yes": 0.9835118897538668, "No": 0.016487934631705514}, "ground_truth": 0}, {"key": "39703329", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9680435471424742, "res": {"Yes": 0.9680435471424742, "No": 0.03195617966871607}, "ground_truth": 0}, {"key": "39703329", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9985731631010871, "res": {"Yes": 0.9985731631010871, "No": 0.0014267696158749288}, "ground_truth": 0}, {"key": "39703329", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995180848084464, "res": {"Yes": 0.9995180848084464, "No": 0.000481830579748496}, "ground_truth": 1}, {"key": "39703329", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9885804537020666, "res": {"Yes": 0.9885804537020666, "No": 0.011419355641873}, "ground_truth": 0}, {"key": "39703329", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9956345006155125, "res": {"Yes": 0.9956345006155125, "No": 0.004365325596142253}, "ground_truth": 0}, {"key": "34033324", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.5362387435549208, "res": {"Yes": 0.5362387435549208, "No": 0.46376094233839527}, "ground_truth": 0}, {"key": "34033324", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.09707227255741588, "res": {"No": 0.902927442860515, "Yes": 0.09707227255741588}, "ground_truth": 0}, {"key": "34033324", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.5677258700002857, "res": {"Yes": 0.5677258700002857, "No": 0.4322739449006999}, "ground_truth": 1}, {"key": "34033324", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9392660373873942, "res": {"Yes": 0.9392660373873942, "No": 0.06073383637895119}, "ground_truth": 0}, {"key": "34033324", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.6365639975538808, "res": {"Yes": 0.6365639975538808, "No": 0.36343591406003084}, "ground_truth": 0}, {"key": "35658862", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.3438784249217535, "res": {"No": 0.6561210825581165, "Yes": 0.3438784249217535}, "ground_truth": 0}, {"key": "35658862", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.6833070224437053, "res": {"Yes": 0.6833070224437053, "No": 0.31669269237038034}, "ground_truth": 0}, {"key": "35658862", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9596263327608242, "res": {"Yes": 0.9596263327608242, "No": 0.04037353834807033}, "ground_truth": 1}, {"key": "35658862", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9805545833517353, "res": {"Yes": 0.9805545833517353, "No": 0.019445371624226836}, "ground_truth": 0}, {"key": "35658862", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9359137819954084, "res": {"Yes": 0.9359137819954084, "No": 0.06408600305347519}, "ground_truth": 0}, {"key": "36092657", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.048397520302086154, "res": {"No": 0.951602333852615, "Yes": 0.048397520302086154}, "ground_truth": 0}, {"key": "36092657", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.7915859692151962, "res": {"Yes": 0.7915859692151962, "No": 0.20841347747683306}, "ground_truth": 0}, {"key": "36092657", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.664100795881472, "res": {"Yes": 0.664100795881472, "No": 0.33589834817009223}, "ground_truth": 1}, {"key": "36092657", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7985728754949719, "res": {"Yes": 0.7985728754949719, "No": 0.20142694886053278}, "ground_truth": 0}, {"key": "36092657", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8797245780384081, "res": {"Yes": 0.8797245780384081, "No": 0.12027522748794357}, "ground_truth": 0}, {"key": "26333438", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9910761807532362, "res": {"Yes": 0.9910761807532362, "No": 0.008923780493823032}, "ground_truth": 0}, {"key": "26333438", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.999295084469773, "res": {"Yes": 0.999295084469773, "No": 0.0007048282513058555}, "ground_truth": 0}, {"key": "26333438", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995273745925912, "res": {"Yes": 0.9995273745925912, "No": 0.00047260387212760173}, "ground_truth": 1}, {"key": "26333438", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9977694471883944, "res": {"Yes": 0.9977694471883944, "No": 0.0022304877398583722}, "ground_truth": 0}, {"key": "26333438", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9992339064496234, "res": {"Yes": 0.9992339064496234, "No": 0.0007660504720420793}, "ground_truth": 0}, {"key": "34184963", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7312211311387455, "res": {"Yes": 0.7312211311387455, "No": 0.2687786501534656}, "ground_truth": 0}, {"key": "34184963", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8333132142378329, "res": {"Yes": 0.8333132142378329, "No": 0.1666864793822136}, "ground_truth": 0}, {"key": "34184963", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9799402583921919, "res": {"Yes": 0.9799402583921919, "No": 0.02005970103698166}, "ground_truth": 1}, {"key": "34184963", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9489153836938002, "res": {"Yes": 0.9489153836938002, "No": 0.05108458452425559}, "ground_truth": 0}, {"key": "34184963", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9475925260216321, "res": {"Yes": 0.9475925260216321, "No": 0.052407320144384366}, "ground_truth": 0}, {"key": "35069975", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9683087112190768, "res": {"Yes": 0.9683087112190768, "No": 0.03169126145440737}, "ground_truth": 0}, {"key": "35069975", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8355662752794326, "res": {"Yes": 0.8355662752794326, "No": 0.16443360619596853}, "ground_truth": 0}, {"key": "35069975", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9580329375750132, "res": {"Yes": 0.9580329375750132, "No": 0.041966848240037165}, "ground_truth": 1}, {"key": "35069975", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7794796712825991, "res": {"Yes": 0.7794796712825991, "No": 0.2205200188016803}, "ground_truth": 0}, {"key": "35069975", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.984683894111053, "res": {"Yes": 0.984683894111053, "No": 0.015315896546391733}, "ground_truth": 0}, {"key": "36443950", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.3654140444328669, "res": {"No": 0.6345854550287356, "Yes": 0.3654140444328669}, "ground_truth": 0}, {"key": "36443950", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.7525683695059459, "res": {"Yes": 0.7525683695059459, "No": 0.2474315007317543}, "ground_truth": 0}, {"key": "36443950", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9913283426072477, "res": {"Yes": 0.9913283426072477, "No": 0.008671547630717374}, "ground_truth": 1}, {"key": "36443950", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.976611755751234, "res": {"Yes": 0.976611755751234, "No": 0.023388206018051232}, "ground_truth": 0}, {"key": "36443950", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8343095019586376, "res": {"Yes": 0.8343095019586376, "No": 0.16569005076757806}, "ground_truth": 0}, {"key": "29460858", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9983545057127017, "res": {"Yes": 0.9983545057127017, "No": 0.0016454785914946047}, "ground_truth": 0}, {"key": "29460858", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9902962571235409, "res": {"Yes": 0.9902962571235409, "No": 0.009703709925966692}, "ground_truth": 0}, {"key": "29460858", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9992703188768441, "res": {"Yes": 0.9992703188768441, "No": 0.0007295706117232454}, "ground_truth": 1}, {"key": "29460858", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9924217597641942, "res": {"Yes": 0.9924217597641942, "No": 0.007578217236984734}, "ground_truth": 0}, {"key": "29460858", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9465057749290743, "res": {"Yes": 0.9465057749290743, "No": 0.053494031074786785}, "ground_truth": 0}, {"key": "36155704", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.11257150041223751, "res": {"No": 0.8874283795389117, "Yes": 0.11257150041223751}, "ground_truth": 0}, {"key": "36155704", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.5190574335462522, "res": {"Yes": 0.5190574335462522, "No": 0.48094228773208597}, "ground_truth": 0}, {"key": "36155704", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7471355359097933, "res": {"Yes": 0.7471355359097933, "No": 0.2528644258721909}, "ground_truth": 1}, {"key": "36155704", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.2674290183020644, "res": {"No": 0.7325707615192538, "Yes": 0.2674290183020644}, "ground_truth": 0}, {"key": "36155704", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9459813828372189, "res": {"Yes": 0.9459813828372189, "No": 0.05401851612434708}, "ground_truth": 0}, {"key": "37185211", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.06927511002493501, "res": {"No": 0.9307244384464409, "Yes": 0.06927511002493501}, "ground_truth": 0}, {"key": "37185211", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.4682643182072375, "res": {"No": 0.5317355498378703, "Yes": 0.4682643182072375}, "ground_truth": 0}, {"key": "37185211", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.858314917605045, "res": {"Yes": 0.858314917605045, "No": 0.1416849851704133}, "ground_truth": 1}, {"key": "37185211", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9946166733172637, "res": {"Yes": 0.9946166733172637, "No": 0.005383303821881739}, "ground_truth": 0}, {"key": "37185211", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.5318542145473903, "res": {"Yes": 0.5318542145473903, "No": 0.46814554398095454}, "ground_truth": 0}, {"key": "36454885", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.15549422287224135, "res": {"No": 0.8445053395659817, "Yes": 0.15549422287224135}, "ground_truth": 0}, {"key": "36454885", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.5205646714198979, "res": {"Yes": 0.5205646714198979, "No": 0.4794349480868899}, "ground_truth": 0}, {"key": "36454885", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9078875151938, "res": {"Yes": 0.9078875151938, "No": 0.09211223498249603}, "ground_truth": 1}, {"key": "36454885", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9398860880376531, "res": {"Yes": 0.9398860880376531, "No": 0.060113790772201844}, "ground_truth": 0}, {"key": "36454885", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.7081383462991752, "res": {"Yes": 0.7081383462991752, "No": 0.2918614720704093}, "ground_truth": 0}, {"key": "33148906", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9710653628226277, "res": {"Yes": 0.9710653628226277, "No": 0.02893450070068453}, "ground_truth": 0}, {"key": "33148906", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9985939721846191, "res": {"Yes": 0.9985939721846191, "No": 0.001406034427166174}, "ground_truth": 0}, {"key": "33148906", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.996490676918448, "res": {"Yes": 0.996490676918448, "No": 0.0035092462786172453}, "ground_truth": 1}, {"key": "33148906", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9988223736467741, "res": {"Yes": 0.9988223736467741, "No": 0.0011776216798033887}, "ground_truth": 0}, {"key": "33148906", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9937560509756826, "res": {"Yes": 0.9937560509756826, "No": 0.00624393304175699}, "ground_truth": 0}, {"key": "18086604", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9539385620256932, "res": {"Yes": 0.9539385620256932, "No": 0.046061031045736604}, "ground_truth": 0}, {"key": "18086604", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8928674387263295, "res": {"Yes": 0.8928674387263295, "No": 0.1071323813879202}, "ground_truth": 0}, {"key": "18086604", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9813643417990827, "res": {"Yes": 0.9813643417990827, "No": 0.01863565309460293}, "ground_truth": 1}, {"key": "18086604", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9499413909761514, "res": {"Yes": 0.9499413909761514, "No": 0.050058182503498284}, "ground_truth": 0}, {"key": "18086604", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9781823132052633, "res": {"Yes": 0.9781823132052633, "No": 0.021817530084756367}, "ground_truth": 0}, {"key": "33693397", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.982365100456402, "res": {"Yes": 0.982365100456402, "No": 0.017634878588629783}, "ground_truth": 0}, {"key": "33693397", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8740791908948748, "res": {"Yes": 0.8740791908948748, "No": 0.12592065406959718}, "ground_truth": 0}, {"key": "33693397", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8408855384466424, "res": {"Yes": 0.8408855384466424, "No": 0.15911423111930084}, "ground_truth": 1}, {"key": "33693397", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9959720012374955, "res": {"Yes": 0.9959720012374955, "No": 0.004028003641820455}, "ground_truth": 0}, {"key": "33693397", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9241099046383316, "res": {"Yes": 0.9241099046383316, "No": 0.07589001180592861}, "ground_truth": 0}, {"key": "39501530", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9955352502615697, "res": {"Yes": 0.9955352502615697, "No": 0.004464567204655578}, "ground_truth": 0}, {"key": "39501530", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9362773100156533, "res": {"Yes": 0.9362773100156533, "No": 0.06372249111456865}, "ground_truth": 0}, {"key": "39501530", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.99697624757578, "res": {"Yes": 0.99697624757578, "No": 0.00302374968565108}, "ground_truth": 1}, {"key": "39501530", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.995604257283012, "res": {"Yes": 0.995604257283012, "No": 0.004395554015194238}, "ground_truth": 0}, {"key": "39501530", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9931085070033574, "res": {"Yes": 0.9931085070033574, "No": 0.006891503520384334}, "ground_truth": 0}, {"key": "30948874", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9714136182205337, "res": {"Yes": 0.9714136182205337, "No": 0.028586062646150823}, "ground_truth": 0}, {"key": "30948874", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995338085810045, "res": {"Yes": 0.9995338085810045, "No": 0.0004661058517290712}, "ground_truth": 0}, {"key": "30948874", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999396103605277, "res": {"Yes": 0.9999396103605277, "No": 6.037140453380234e-05}, "ground_truth": 1}, {"key": "30948874", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994535499582378, "res": {"Yes": 0.9994535499582378, "No": 0.0005463422210954057}, "ground_truth": 0}, {"key": "30948874", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.999832946225036, "res": {"Yes": 0.999832946225036, "No": 0.00016695353797086882}, "ground_truth": 0}, {"key": "39410675", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9753344091950394, "res": {"Yes": 0.9753344091950394, "No": 0.024665479598234293}, "ground_truth": 0}, {"key": "39410675", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9519806243227901, "res": {"Yes": 0.9519806243227901, "No": 0.04801927433632028}, "ground_truth": 0}, {"key": "39410675", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9557408185415177, "res": {"Yes": 0.9557408185415177, "No": 0.044259094642599535}, "ground_truth": 1}, {"key": "39410675", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.929876910402841, "res": {"Yes": 0.929876910402841, "No": 0.0701230446014049}, "ground_truth": 0}, {"key": "39410675", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8882204786855354, "res": {"Yes": 0.8882204786855354, "No": 0.1117793768439516}, "ground_truth": 0}, {"key": "32903337", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.0420798826202206, "res": {"No": 0.9579200352563374, "Yes": 0.0420798826202206}, "ground_truth": 0}, {"key": "32903337", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9945686752694043, "res": {"Yes": 0.9945686752694043, "No": 0.005431269383897385}, "ground_truth": 0}, {"key": "32903337", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9529645541367248, "res": {"Yes": 0.9529645541367248, "No": 0.047035342113055184}, "ground_truth": 1}, {"key": "32903337", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9615988203109916, "res": {"Yes": 0.9615988203109916, "No": 0.03840105675905733}, "ground_truth": 0}, {"key": "32903337", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9705684366119205, "res": {"Yes": 0.9705684366119205, "No": 0.029431533835531622}, "ground_truth": 0}, {"key": "27685132", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.4624938404797327, "res": {"No": 0.5375060087355072, "Yes": 0.4624938404797327}, "ground_truth": 0}, {"key": "27685132", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9290065403393719, "res": {"Yes": 0.9290065403393719, "No": 0.0709933690474622}, "ground_truth": 0}, {"key": "27685132", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9899830353603212, "res": {"Yes": 0.9899830353603212, "No": 0.010016860057001642}, "ground_truth": 1}, {"key": "27685132", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5853889388101379, "res": {"Yes": 0.5853889388101379, "No": 0.41461075444077683}, "ground_truth": 0}, {"key": "27685132", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.8694579565073023, "res": {"Yes": 0.8694579565073023, "No": 0.13054193014934642}, "ground_truth": 0}, {"key": "22791471", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.5950453546732444, "res": {"Yes": 0.5950453546732444, "No": 0.40495373215267483}, "ground_truth": 0}, {"key": "22791471", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.779315584700584, "res": {"Yes": 0.779315584700584, "No": 0.2206839701964455}, "ground_truth": 0}, {"key": "22791471", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9253613167835817, "res": {"Yes": 0.9253613167835817, "No": 0.07463845664310563}, "ground_truth": 1}, {"key": "22791471", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.7678585365510546, "res": {"Yes": 0.7678585365510546, "No": 0.2321414720140781}, "ground_truth": 0}, {"key": "22791471", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 5.158633149825424e-07, "res": {"No": 0.9999993295729247, "Yes": 5.158633149825424e-07}, "ground_truth": 0}, {"key": "32292348", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9571627072074358, "res": {"Yes": 0.9571627072074358, "No": 0.04283715694930976}, "ground_truth": 0}, {"key": "32292348", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9912496283141096, "res": {"Yes": 0.9912496283141096, "No": 0.008750306943243913}, "ground_truth": 0}, {"key": "32292348", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9979009537851631, "res": {"Yes": 0.9979009537851631, "No": 0.0020990172919347285}, "ground_truth": 1}, {"key": "32292348", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9992471169987401, "res": {"Yes": 0.9992471169987401, "No": 0.0007528815098374119}, "ground_truth": 0}, {"key": "32292348", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.997875195998809, "res": {"Yes": 0.997875195998809, "No": 0.0021248480182224252}, "ground_truth": 0}, {"key": "20482930", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.994643915743714, "res": {"Yes": 0.994643915743714, "No": 0.005356084899449472}, "ground_truth": 0}, {"key": "20482930", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9952128109907312, "res": {"Yes": 0.9952128109907312, "No": 0.0047871147389280996}, "ground_truth": 0}, {"key": "20482930", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9948254525351758, "res": {"Yes": 0.9948254525351758, "No": 0.005174506017202109}, "ground_truth": 1}, {"key": "20482930", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994832012465027, "res": {"Yes": 0.9994832012465027, "No": 0.0005166879727441573}, "ground_truth": 0}, {"key": "20482930", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.99871844589274, "res": {"Yes": 0.99871844589274, "No": 0.001281502578585824}, "ground_truth": 0}, {"key": "11635754", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7868525304627311, "res": {"Yes": 0.7868525304627311, "No": 0.21314738589560112}, "ground_truth": 0}, {"key": "11635754", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.998782171256032, "res": {"Yes": 0.998782171256032, "No": 0.001217770683997486}, "ground_truth": 0}, {"key": "11635754", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9861645380612485, "res": {"Yes": 0.9861645380612485, "No": 0.01383538276085249}, "ground_truth": 1}, {"key": "11635754", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9800493710147483, "res": {"Yes": 0.9800493710147483, "No": 0.019950587249785975}, "ground_truth": 0}, {"key": "11635754", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9405882702937388, "res": {"Yes": 0.9405882702937388, "No": 0.059411587255269634}, "ground_truth": 0}, {"key": "40029096", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8524088809953065, "res": {"Yes": 0.8524088809953065, "No": 0.14759062279989174}, "ground_truth": 0}, {"key": "40029096", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9761864675496944, "res": {"Yes": 0.9761864675496944, "No": 0.023813388844230805}, "ground_truth": 0}, {"key": "40029096", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9992394974287799, "res": {"Yes": 0.9992394974287799, "No": 0.00076040657547337}, "ground_truth": 1}, {"key": "40029096", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9835505228596455, "res": {"Yes": 0.9835505228596455, "No": 0.01644943744874812}, "ground_truth": 0}, {"key": "40029096", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9891342588001019, "res": {"Yes": 0.9891342588001019, "No": 0.010865659148192734}, "ground_truth": 0}, {"key": "40414719", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9059496485820817, "res": {"Yes": 0.9059496485820817, "No": 0.09405012445034326}, "ground_truth": 0}, {"key": "40414719", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9919423741095127, "res": {"Yes": 0.9919423741095127, "No": 0.008057596241568012}, "ground_truth": 0}, {"key": "40414719", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9983458291512316, "res": {"Yes": 0.9983458291512316, "No": 0.0016540987176033952}, "ground_truth": 1}, {"key": "40414719", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998005324363003, "res": {"Yes": 0.9998005324363003, "No": 0.00019937601370984443}, "ground_truth": 0}, {"key": "40414719", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9986051433181516, "res": {"Yes": 0.9986051433181516, "No": 0.001394832419940212}, "ground_truth": 0}, {"key": "39537616", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.0412377079376257, "res": {"No": 0.9587620710267571, "Yes": 0.0412377079376257}, "ground_truth": 0}, {"key": "39537616", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.4159539175404675, "res": {"No": 0.5840458727554241, "Yes": 0.4159539175404675}, "ground_truth": 0}, {"key": "39537616", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.24210053626132377, "res": {"No": 0.7578990739711777, "Yes": 0.24210053626132377}, "ground_truth": 1}, {"key": "39537616", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3603359457634476, "res": {"No": 0.6396638084137349, "Yes": 0.3603359457634476}, "ground_truth": 0}, {"key": "39537616", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.7654793959907984, "res": {"Yes": 0.7654793959907984, "No": 0.23452023613505238}, "ground_truth": 0}, {"key": "33245830", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.0005437983826139038, "res": {"No": 0.9994560518934409, "Yes": 0.0005437983826139038}, "ground_truth": 0}, {"key": "33245830", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9904242824809804, "res": {"Yes": 0.9904242824809804, "No": 0.00957544375327277}, "ground_truth": 0}, {"key": "33245830", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9967441809114166, "res": {"Yes": 0.9967441809114166, "No": 0.0032556380533400667}, "ground_truth": 1}, {"key": "33245830", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9917017352683184, "res": {"Yes": 0.9917017352683184, "No": 0.008298085943424046}, "ground_truth": 0}, {"key": "33245830", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9337549755329151, "res": {"Yes": 0.9337549755329151, "No": 0.06624468789815276}, "ground_truth": 0}, {"key": "39243601", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.09390265672849057, "res": {"No": 0.9060969226668357, "Yes": 0.09390265672849057}, "ground_truth": 0}, {"key": "39243601", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9973325449973748, "res": {"Yes": 0.9973325449973748, "No": 0.002667367094559014}, "ground_truth": 0}, {"key": "39243601", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7437343606803902, "res": {"Yes": 0.7437343606803902, "No": 0.25626508037941403}, "ground_truth": 1}, {"key": "39243601", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9967740276666814, "res": {"Yes": 0.9967740276666814, "No": 0.003225892573663137}, "ground_truth": 0}, {"key": "39243601", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9994951118389446, "res": {"Yes": 0.9994951118389446, "No": 0.0005047864951336245}, "ground_truth": 0}, {"key": "35815905", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.2528045799456317, "res": {"No": 0.7471952120081797, "Yes": 0.2528045799456317}, "ground_truth": 0}, {"key": "35815905", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9991802248294164, "res": {"Yes": 0.9991802248294164, "No": 0.0008196569982768271}, "ground_truth": 0}, {"key": "35815905", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996512478281817, "res": {"Yes": 0.9996512478281817, "No": 0.000348737523632182}, "ground_truth": 1}, {"key": "35815905", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9993821133706129, "res": {"Yes": 0.9993821133706129, "No": 0.0006178746208331835}, "ground_truth": 0}, {"key": "35815905", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9969326459724481, "res": {"Yes": 0.9969326459724481, "No": 0.0030672864523142536}, "ground_truth": 0}, {"key": "35260212", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.012020569158973107, "res": {"No": 0.9879793097521039, "Yes": 0.012020569158973107}, "ground_truth": 0}, {"key": "35260212", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.8946813657659102, "res": {"Yes": 0.8946813657659102, "No": 0.10531828796928139}, "ground_truth": 0}, {"key": "35260212", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.687611019203833, "res": {"Yes": 0.687611019203833, "No": 0.3123888041022138}, "ground_truth": 1}, {"key": "35260212", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9932393824997376, "res": {"Yes": 0.9932393824997376, "No": 0.006760597535555682}, "ground_truth": 0}, {"key": "35260212", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9836261726066091, "res": {"Yes": 0.9836261726066091, "No": 0.01637382100086693}, "ground_truth": 0}, {"key": "39193924", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8635551719824198, "res": {"Yes": 0.8635551719824198, "No": 0.13644466925203386}, "ground_truth": 0}, {"key": "39193924", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9981548144518628, "res": {"Yes": 0.9981548144518628, "No": 0.0018451457635060892}, "ground_truth": 0}, {"key": "39193924", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9998897874838456, "res": {"Yes": 0.9998897874838456, "No": 0.00011010093742133468}, "ground_truth": 1}, {"key": "39193924", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996643557411417, "res": {"Yes": 0.9996643557411417, "No": 0.0003356008669805022}, "ground_truth": 0}, {"key": "39193924", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.998865427914151, "res": {"Yes": 0.998865427914151, "No": 0.0011345195745418636}, "ground_truth": 0}, {"key": "40658569", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.27748427890362953, "res": {"No": 0.7225151069072193, "Yes": 0.27748427890362953}, "ground_truth": 0}, {"key": "40658569", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9741877880993053, "res": {"Yes": 0.9741877880993053, "No": 0.025812113028634807}, "ground_truth": 0}, {"key": "40658569", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9855039399278421, "res": {"Yes": 0.9855039399278421, "No": 0.01449583372721682}, "ground_truth": 1}, {"key": "40658569", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9783705648964018, "res": {"Yes": 0.9783705648964018, "No": 0.021629373583012253}, "ground_truth": 0}, {"key": "40658569", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.991017057885085, "res": {"Yes": 0.991017057885085, "No": 0.00898270694999126}, "ground_truth": 0}, {"key": "33497596", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.8455657660917123, "res": {"Yes": 0.8455657660917123, "No": 0.15443370014809146}, "ground_truth": 0}, {"key": "33497596", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9936122112232206, "res": {"Yes": 0.9936122112232206, "No": 0.0063877895101124085}, "ground_truth": 0}, {"key": "33497596", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9864067737622654, "res": {"Yes": 0.9864067737622654, "No": 0.013593132187525092}, "ground_truth": 1}, {"key": "33497596", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9884421767854705, "res": {"Yes": 0.9884421767854705, "No": 0.011557721930423398}, "ground_truth": 0}, {"key": "33497596", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9877537452495271, "res": {"Yes": 0.9877537452495271, "No": 0.012246163190381704}, "ground_truth": 0}, {"key": "40339241", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.7264209872304638, "res": {"Yes": 0.7264209872304638, "No": 0.2735789044020689}, "ground_truth": 0}, {"key": "40339241", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9237404121896038, "res": {"Yes": 0.9237404121896038, "No": 0.07625947568019285}, "ground_truth": 0}, {"key": "40339241", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9913610361637559, "res": {"Yes": 0.9913610361637559, "No": 0.008638882887100779}, "ground_truth": 1}, {"key": "40339241", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8803825060357917, "res": {"Yes": 0.8803825060357917, "No": 0.11961726400677794}, "ground_truth": 0}, {"key": "40339241", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9898762645181711, "res": {"Yes": 0.9898762645181711, "No": 0.010123566057193962}, "ground_truth": 0}, {"key": "31792608", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9972608230101602, "res": {"Yes": 0.9972608230101602, "No": 0.0027391689705643088}, "ground_truth": 0}, {"key": "31792608", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9991315520194182, "res": {"Yes": 0.9991315520194182, "No": 0.0008684368382747172}, "ground_truth": 0}, {"key": "31792608", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999720323251027, "res": {"Yes": 0.9999720323251027, "No": 2.7906330366976336e-05}, "ground_truth": 1}, {"key": "31792608", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998672680596635, "res": {"Yes": 0.9998672680596635, "No": 0.00013263255467177952}, "ground_truth": 0}, {"key": "31792608", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998915753383786, "res": {"Yes": 0.9998915753383786, "No": 0.00010840579619008294}, "ground_truth": 0}, {"key": "33132662", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9951471632462787, "res": {"Yes": 0.9951471632462787, "No": 0.0048527680736792}, "ground_truth": 0}, {"key": "33132662", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9849760636187399, "res": {"Yes": 0.9849760636187399, "No": 0.015023829576466438}, "ground_truth": 0}, {"key": "33132662", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9983625763433115, "res": {"Yes": 0.9983625763433115, "No": 0.0016373891293733867}, "ground_truth": 1}, {"key": "33132662", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999731309643721, "res": {"Yes": 0.999731309643721, "No": 0.00026867153068581566}, "ground_truth": 0}, {"key": "33132662", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.992748852749751, "res": {"Yes": 0.992748852749751, "No": 0.007251084029372422}, "ground_truth": 0}, {"key": "37577457", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.22524609926547923, "res": {"No": 0.7747536121364085, "Yes": 0.22524609926547923}, "ground_truth": 0}, {"key": "37577457", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994054411502031, "res": {"Yes": 0.9994054411502031, "No": 0.0005945015111218989}, "ground_truth": 0}, {"key": "37577457", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9995458352940269, "res": {"Yes": 0.9995458352940269, "No": 0.0004541551855215939}, "ground_truth": 1}, {"key": "37577457", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995936007225669, "res": {"Yes": 0.9995936007225669, "No": 0.00040637385375599225}, "ground_truth": 0}, {"key": "37577457", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997085554482169, "res": {"Yes": 0.9997085554482169, "No": 0.0002913256046471062}, "ground_truth": 0}, {"key": "38701278", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9974570645521362, "res": {"Yes": 0.9974570645521362, "No": 0.0025428685113342018}, "ground_truth": 0}, {"key": "38701278", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9744818023905526, "res": {"Yes": 0.9744818023905526, "No": 0.025517928383389114}, "ground_truth": 0}, {"key": "38701278", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9931872901487221, "res": {"Yes": 0.9931872901487221, "No": 0.006812510348146138}, "ground_truth": 1}, {"key": "38701278", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9993087722780275, "res": {"Yes": 0.9993087722780275, "No": 0.0006911788084607879}, "ground_truth": 0}, {"key": "38701278", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9934339420864747, "res": {"Yes": 0.9934339420864747, "No": 0.0065658414980345}, "ground_truth": 0}, {"key": "34570783", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.462553432024945, "res": {"No": 0.5374461016544573, "Yes": 0.462553432024945}, "ground_truth": 0}, {"key": "34570783", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.788491990914252, "res": {"Yes": 0.788491990914252, "No": 0.2115077369163719}, "ground_truth": 0}, {"key": "34570783", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9838850508456077, "res": {"Yes": 0.9838850508456077, "No": 0.016114926897928768}, "ground_truth": 1}, {"key": "34570783", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9690394576028717, "res": {"Yes": 0.9690394576028717, "No": 0.03096040961157814}, "ground_truth": 0}, {"key": "34570783", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9160554785861283, "res": {"Yes": 0.9160554785861283, "No": 0.08394432479058553}, "ground_truth": 0}, {"key": "39064526", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.974904124173927, "res": {"Yes": 0.974904124173927, "No": 0.02509573418692874}, "ground_truth": 0}, {"key": "39064526", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.952609812416745, "res": {"Yes": 0.952609812416745, "No": 0.047390035210908205}, "ground_truth": 0}, {"key": "39064526", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.983207449469464, "res": {"Yes": 0.983207449469464, "No": 0.016792538972469722}, "ground_truth": 1}, {"key": "39064526", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9728595826826889, "res": {"Yes": 0.9728595826826889, "No": 0.027140310064747906}, "ground_truth": 0}, {"key": "39064526", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9138453209004727, "res": {"Yes": 0.9138453209004727, "No": 0.08615453298835912}, "ground_truth": 0}, {"key": "40741545", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9083230152274904, "res": {"Yes": 0.9083230152274904, "No": 0.091676766138156}, "ground_truth": 0}, {"key": "40741545", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9948537748231037, "res": {"Yes": 0.9948537748231037, "No": 0.0051462674194690024}, "ground_truth": 0}, {"key": "40741545", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9958422952865964, "res": {"Yes": 0.9958422952865964, "No": 0.004157669824443451}, "ground_truth": 1}, {"key": "40741545", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996402886616422, "res": {"Yes": 0.9996402886616422, "No": 0.0003595813931113118}, "ground_truth": 0}, {"key": "40741545", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.998091749429687, "res": {"Yes": 0.998091749429687, "No": 0.0019082639060135952}, "ground_truth": 0}, {"key": "36929751", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.6532406419873383, "res": {"Yes": 0.6532406419873383, "No": 0.3467588144232402}, "ground_truth": 0}, {"key": "36929751", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997818250336202, "res": {"Yes": 0.9997818250336202, "No": 0.00021813640060270282}, "ground_truth": 0}, {"key": "36929751", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.998077144859206, "res": {"Yes": 0.998077144859206, "No": 0.0019226602597129114}, "ground_truth": 1}, {"key": "36929751", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9961578026018624, "res": {"Yes": 0.9961578026018624, "No": 0.0038420799706629146}, "ground_truth": 0}, {"key": "36929751", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9991864147401068, "res": {"Yes": 0.9991864147401068, "No": 0.000813584308435186}, "ground_truth": 0}, {"key": "23984730", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9850082108071193, "res": {"Yes": 0.9850082108071193, "No": 0.01499163350234299}, "ground_truth": 0}, {"key": "23984730", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9659459226791507, "res": {"Yes": 0.9659459226791507, "No": 0.034053832420122734}, "ground_truth": 0}, {"key": "23984730", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9987241492904542, "res": {"Yes": 0.9987241492904542, "No": 0.001275637787439553}, "ground_truth": 1}, {"key": "23984730", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9957945358353952, "res": {"Yes": 0.9957945358353952, "No": 0.0042053716776933635}, "ground_truth": 0}, {"key": "23984730", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9951325343919595, "res": {"Yes": 0.9951325343919595, "No": 0.004867304431273404}, "ground_truth": 0}, {"key": "36007415", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9370497951369493, "res": {"Yes": 0.9370497951369493, "No": 0.06295012076917346}, "ground_truth": 0}, {"key": "36007415", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995375021551165, "res": {"Yes": 0.9995375021551165, "No": 0.0004623885513979945}, "ground_truth": 0}, {"key": "36007415", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9881692522109234, "res": {"Yes": 0.9881692522109234, "No": 0.011830569048419278}, "ground_truth": 1}, {"key": "36007415", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998764455634782, "res": {"Yes": 0.9998764455634782, "No": 0.00012345572107382995}, "ground_truth": 0}, {"key": "36007415", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9726361211016392, "res": {"Yes": 0.9726361211016392, "No": 0.027363584017915638}, "ground_truth": 0}, {"key": "38875041", "model": "xsum_length_ft_gpt35", "target_model": "human", "recognition_score": 0.9042785911640714, "res": {"Yes": 0.9042785911640714, "No": 0.09572089172611849}, "ground_truth": 0}, {"key": "38875041", "model": "xsum_length_ft_gpt35", "target_model": "claude", "recognition_score": 0.9021622744006468, "res": {"Yes": 0.9021622744006468, "No": 0.0978376310877777}, "ground_truth": 0}, {"key": "38875041", "model": "xsum_length_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7541488625676013, "res": {"Yes": 0.7541488625676013, "No": 0.24585080764910838}, "ground_truth": 1}, {"key": "38875041", "model": "xsum_length_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9956355652480532, "res": {"Yes": 0.9956355652480532, "No": 0.004364447380983716}, "ground_truth": 0}, {"key": "38875041", "model": "xsum_length_ft_gpt35", "target_model": "llama", "recognition_score": 0.9607724679175604, "res": {"Yes": 0.9607724679175604, "No": 0.03922731749569885}, "ground_truth": 0}]