[{"key": "33773576", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.351127799809651e-07}, "ground_truth": 0}, {"key": "33773576", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.381227279224981e-08}, "ground_truth": 1}, {"key": "33773576", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.858479321763696e-08}, "ground_truth": 0}, {"key": "33773576", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.434317230991077e-08}, "ground_truth": 0}, {"key": "37642631", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 4.87483691695685e-07, "res": {"No": 0.9999989719621284, "Yes": 4.87483691695685e-07}, "ground_truth": 0}, {"key": "37642631", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.2982895286999744e-07}, "ground_truth": 0}, {"key": "37642631", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.967292814562985e-08}, "ground_truth": 1}, {"key": "37642631", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9993682980577381, "res": {"Yes": 0.9993682980577381, "No": 0.0006307304284106402}, "ground_truth": 0}, {"key": "37642631", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 9.043214920246463e-08}, "ground_truth": 0}, {"key": "36609836", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.0019709916248673667, "res": {"No": 0.998028703262688, "Yes": 0.0019709916248673667}, "ground_truth": 0}, {"key": "36609836", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 2.448267547501943e-07}, "ground_truth": 0}, {"key": "36609836", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.7843659021838996e-08}, "ground_truth": 1}, {"key": "36609836", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 2.506460631307467e-07}, "ground_truth": 0}, {"key": "36609836", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.082603088767366e-08}, "ground_truth": 0}, {"key": "41035610", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 2.4905400087919284e-06, "res": {"No": 0.9999963494876631, "Yes": 2.4905400087919284e-06}, "ground_truth": 0}, {"key": "41035610", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 2.0146765289191804e-07}, "ground_truth": 0}, {"key": "41035610", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.0516997354075353e-07}, "ground_truth": 1}, {"key": "41035610", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.1159607029829961e-07}, "ground_truth": 0}, {"key": "41035610", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999937270200753, "res": {"Yes": 0.9999937270200753, "No": 5.935822231922728e-06}, "ground_truth": 0}, {"key": "37592684", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.001990553177960662, "res": {"No": 0.998009229218006, "Yes": 0.001990553177960662}, "ground_truth": 0}, {"key": "37592684", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999971839107652, "res": {"Yes": 0.9999971839107652, "No": 2.1001285517277787e-06}, "ground_truth": 0}, {"key": "37592684", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.628513433903956e-08}, "ground_truth": 1}, {"key": "37592684", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 1.087794934074542e-07}, "ground_truth": 0}, {"key": "37592684", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9994852266412931, "res": {"Yes": 0.9994852266412931, "No": 0.0005143271584228711}, "ground_truth": 0}, {"key": "38951040", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.826309432358838e-08}, "ground_truth": 0}, {"key": "38951040", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9741136873751736, "res": {"Yes": 0.9741136873751736, "No": 0.025885086161686225}, "ground_truth": 0}, {"key": "38951040", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.42177618618672e-08}, "ground_truth": 1}, {"key": "38951040", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.0750010635826201e-07}, "ground_truth": 0}, {"key": "38951040", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 5.911961928654412e-08}, "ground_truth": 0}, {"key": "40774469", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.35461268970933507, "res": {"No": 0.6453867561175903, "Yes": 0.35461268970933507}, "ground_truth": 0}, {"key": "40774469", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.8922357830562955e-08}, "ground_truth": 0}, {"key": "40774469", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.510349987173523e-08}, "ground_truth": 1}, {"key": "40774469", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.2297984704119574e-08}, "ground_truth": 0}, {"key": "40774469", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.8401197498774426e-07}, "ground_truth": 0}, {"key": "40876288", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 5.002199176116892e-07, "res": {"No": 0.999999091165773, "Yes": 5.002199176116892e-07}, "ground_truth": 0}, {"key": "40876288", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.0003537190160431424, "res": {"No": 0.999645650926468, "Yes": 0.0003537190160431424}, "ground_truth": 0}, {"key": "40876288", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.7903094974792226e-08}, "ground_truth": 1}, {"key": "40876288", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999933694113825, "res": {"Yes": 0.9999933694113825, "No": 6.126217780436881e-06}, "ground_truth": 0}, {"key": "40876288", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 5.538432718652089e-06, "res": {"No": 0.9999937270200753, "Yes": 5.538432718652089e-06}, "ground_truth": 0}, {"key": "40340131", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 6.15421096928657e-06, "res": {"No": 0.9999932502087799, "Yes": 6.15421096928657e-06}, "ground_truth": 0}, {"key": "40340131", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999946806438478, "res": {"Yes": 0.9999946806438478, "No": 5.150503917588517e-06}, "ground_truth": 1}, {"key": "40340131", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 3.112267782446904e-07}, "ground_truth": 0}, {"key": "30121591", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.0601810028564349e-07}, "ground_truth": 1}, {"key": "30121591", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8351899207450818, "res": {"Yes": 0.8351899207450818, "No": 0.16480844811284465}, "ground_truth": 0}, {"key": "30121591", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.9693283744858727e-06, "res": {"No": 0.9999968263007362, "Yes": 1.9693283744858727e-06}, "ground_truth": 0}, {"key": "35623366", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 2.2191507063141387e-06, "res": {"No": 0.9999974223173222, "Yes": 2.2191507063141387e-06}, "ground_truth": 0}, {"key": "35623366", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.434503899313218e-08}, "ground_truth": 1}, {"key": "35623366", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.988652371993246e-08}, "ground_truth": 0}, {"key": "35623366", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 2.198511742922483e-07}, "ground_truth": 0}, {"key": "41014093", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996904455918557, "res": {"Yes": 0.9996904455918557, "No": 0.00030899761485876284}, "ground_truth": 0}, {"key": "41014093", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 5.4741156850920554e-08}, "ground_truth": 1}, {"key": "41014093", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.825342196766986e-08}, "ground_truth": 0}, {"key": "41014093", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.804231812274e-08}, "ground_truth": 0}, {"key": "11387984", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 2.7944059896736867e-06, "res": {"No": 0.9999964686909351, "Yes": 2.7944059896736867e-06}, "ground_truth": 0}, {"key": "11387984", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "yes": 4.693702191714962e-07}, "ground_truth": 0}, {"key": "11387984", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.027222998855159e-08}, "ground_truth": 1}, {"key": "11387984", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 5.881881248480324e-07}, "ground_truth": 0}, {"key": "11387984", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "No": 4.4074251421943185e-07}, "ground_truth": 0}, {"key": "39508312", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9898411077072201, "res": {"Yes": 0.9898411077072201, "No": 0.010158561820010164}, "ground_truth": 0}, {"key": "39508312", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.0041341731720991e-07}, "ground_truth": 1}, {"key": "39508312", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.148497505689335e-08}, "ground_truth": 0}, {"key": "39508312", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.048573653700289904, "res": {"No": 0.951425759488453, "Yes": 0.048573653700289904}, "ground_truth": 0}, {"key": "35815369", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.0004197890226160628, "res": {"No": 0.9995791866967657, "Yes": 0.0004197890226160628}, "ground_truth": 0}, {"key": "35815369", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9991390516988469, "res": {"Yes": 0.9991390516988469, "No": 0.0008604732752750672}, "ground_truth": 0}, {"key": "35815369", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 1.1137478216736166e-08}, "ground_truth": 1}, {"key": "35815369", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.0316749545836712e-07}, "ground_truth": 0}, {"key": "35815369", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 2.8429201449005208e-08}, "ground_truth": 0}, {"key": "35802823", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 2.7824937499463676e-06, "res": {"No": 0.9999968263007362, "Yes": 2.7824937499463676e-06}, "ground_truth": 0}, {"key": "35802823", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 2.8492039827178487e-07}, "ground_truth": 0}, {"key": "35802823", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.378172030944856e-08}, "ground_truth": 1}, {"key": "35802823", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "yes": 2.0845638546735812e-07}, "ground_truth": 0}, {"key": "35802823", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.999995276659155, "res": {"Yes": 0.999995276659155, "No": 4.510209288924018e-06}, "ground_truth": 0}, {"key": "38499968", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9991173901693927, "res": {"Yes": 0.9991173901693927, "No": 0.0008819234474488536}, "ground_truth": 0}, {"key": "38499968", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.7321571823630928e-06, "res": {"No": 0.9999975415208221, "Yes": 1.7321571823630928e-06}, "ground_truth": 0}, {"key": "38499968", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.928197793371208e-08}, "ground_truth": 1}, {"key": "38499968", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 9.535062024423078e-08}, "ground_truth": 0}, {"key": "38499968", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.755133564829532e-08}, "ground_truth": 0}, {"key": "36926726", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999907469518097, "res": {"Yes": 0.9999907469518097, "No": 8.815791112236306e-06}, "ground_truth": 0}, {"key": "36926726", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.64683429046642e-08}, "ground_truth": 0}, {"key": "36926726", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 5.682781104361006e-08}, "ground_truth": 1}, {"key": "36926726", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 4.3443869300424743e-07}, "ground_truth": 0}, {"key": "36926726", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 7.191844990934151e-08}, "ground_truth": 0}, {"key": "40903712", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 3.4997745704012723e-06, "res": {"No": 0.999995276659155, "Yes": 3.4997745704012723e-06}, "ground_truth": 0}, {"key": "40903712", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.54074898325649e-07}, "ground_truth": 0}, {"key": "40903712", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.1879935125588157e-08}, "ground_truth": 1}, {"key": "40903712", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.794514788510329e-08}, "ground_truth": 0}, {"key": "40903712", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.001445601840716e-06, "res": {"No": 0.9999976607241361, "Yes": 1.001445601840716e-06}, "ground_truth": 0}, {"key": "19614862", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998493900428185, "res": {"Yes": 0.9998493900428185, "No": 0.0001497054291992906}, "ground_truth": 0}, {"key": "19614862", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.216062171585448e-08}, "ground_truth": 1}, {"key": "19614862", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.012338423795304344, "res": {"No": 0.9876612889120432, "Yes": 0.012338423795304344}, "ground_truth": 0}, {"key": "19614862", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.7409355670822519e-07}, "ground_truth": 0}, {"key": "38861704", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.005455137793388001, "res": {"No": 0.9945443879005372, "Yes": 0.005455137793388001}, "ground_truth": 0}, {"key": "38861704", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9945008796238146, "res": {"Yes": 0.9945008796238146, "No": 0.005498356772611703}, "ground_truth": 0}, {"key": "38861704", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.61660501255184e-08}, "ground_truth": 1}, {"key": "38861704", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.032109584514512e-08}, "ground_truth": 0}, {"key": "38861704", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.0491094390081488e-07}, "ground_truth": 0}, {"key": "34349607", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999987335551019, "res": {"Yes": 0.9999987335551019, "No": 1.0042184305989893e-06}, "ground_truth": 0}, {"key": "34349607", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 5.241673114098689e-08}, "ground_truth": 0}, {"key": "34349607", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.332922018565759e-08}, "ground_truth": 1}, {"key": "34349607", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 9.71958326859942e-08}, "ground_truth": 0}, {"key": "34349607", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 8.976703702647192e-08}, "ground_truth": 0}, {"key": "20773800", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9990409978867315, "res": {"Yes": 0.9990409978867315, "No": 0.0009569810195395581}, "ground_truth": 0}, {"key": "20773800", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.450998213808773, "res": {"No": 0.5490006244728161, "Yes": 0.450998213808773}, "ground_truth": 0}, {"key": "20773800", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.400546686782184e-07}, "ground_truth": 1}, {"key": "20773800", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999871708812939, "res": {"Yes": 0.9999871708812939, "No": 1.2052481326674067e-05}, "ground_truth": 0}, {"key": "20773800", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.633456399385517e-08}, "ground_truth": 0}, {"key": "35545608", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999968263007362, "res": {"Yes": 0.9999968263007362, "No": 2.6654810768392924e-06}, "ground_truth": 0}, {"key": "35545608", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.163465348410494e-08}, "ground_truth": 1}, {"key": "35545608", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.301349302191003e-07}, "ground_truth": 0}, {"key": "35545608", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9944551281660144, "res": {"Yes": 0.9944551281660144, "No": 0.005544162536354637}, "ground_truth": 0}, {"key": "37258984", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9995130807036959, "res": {"Yes": 0.9995130807036959, "No": 0.00048613183763361736}, "ground_truth": 0}, {"key": "37258984", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.7440478304680405e-08}, "ground_truth": 0}, {"key": "37258984", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 1.3730803560401863e-07}, "ground_truth": 1}, {"key": "37258984", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.646901086017213e-08}, "ground_truth": 0}, {"key": "37258984", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.3330228075075248e-07}, "ground_truth": 0}, {"key": "37274562", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998797755275411, "res": {"Yes": 0.9998797755275411, "No": 0.00011959757411175292}, "ground_truth": 0}, {"key": "37274562", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.577054356746069e-08}, "ground_truth": 1}, {"key": "37274562", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.590846707548341e-08}, "ground_truth": 0}, {"key": "37274562", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.2136946168351353e-08}, "ground_truth": 0}, {"key": "40828068", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 2.0199413270374988e-05, "res": {"No": 0.9999776347571058, "Yes": 2.0199413270374988e-05}, "ground_truth": 0}, {"key": "40828068", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.205220636435806e-07}, "ground_truth": 0}, {"key": "40828068", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 1.933292776435242e-07}, "ground_truth": 1}, {"key": "40828068", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999931310055916, "res": {"Yes": 0.9999931310055916, "No": 6.6010092761122e-06}, "ground_truth": 0}, {"key": "40828068", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.0002169807322154276, "res": {"No": 0.9997819442176188, "Yes": 0.0002169807322154276}, "ground_truth": 0}, {"key": "37807180", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999920581810364, "res": {"Yes": 0.9999920581810364, "No": 7.854625217487508e-06}, "ground_truth": 0}, {"key": "37807180", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.231488167999242e-08}, "ground_truth": 1}, {"key": "37807180", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 1.9699944863765762e-08}, "ground_truth": 0}, {"key": "37807180", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.4395254749743912, "res": {"No": 0.5604743501283104, "Yes": 0.4395254749743912}, "ground_truth": 0}, {"key": "40748607", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 8.530105622942003e-08}, "ground_truth": 1}, {"key": "40748607", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 9.416330460434103e-05, "res": {"No": 0.9999052823659984, "Yes": 9.416330460434103e-05}, "ground_truth": 0}, {"key": "40748607", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "No": 7.559822045492065e-07}, "ground_truth": 0}, {"key": "40123819", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.00020008325560718038, "res": {"No": 0.9997976721210466, "Yes": 0.00020008325560718038}, "ground_truth": 0}, {"key": "40123819", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999981375378344, "res": {"Yes": 0.9999981375378344, "yes": 6.581384614647037e-07}, "ground_truth": 0}, {"key": "40123819", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, " Yes": 1.820945098065244e-07}, "ground_truth": 1}, {"key": "40123819", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.43879168105365e-07}, "ground_truth": 0}, {"key": "40123819", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999921773835968, "res": {"Yes": 0.9999921773835968, "No": 5.0769493419417446e-06}, "ground_truth": 0}, {"key": "38453867", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 1.1322077242907984e-05, "res": {"No": 0.9999870516788303, "Yes": 1.1322077242907984e-05}, "ground_truth": 0}, {"key": "38453867", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.0011023749291789447, "res": {"No": 0.9988971866460964, "Yes": 0.0011023749291789447}, "ground_truth": 0}, {"key": "38453867", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.462284006155422e-08}, "ground_truth": 1}, {"key": "38453867", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 5.495017342779177e-08}, "ground_truth": 0}, {"key": "38453867", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.528163949019265e-08}, "ground_truth": 0}, {"key": "38944856", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 4.091646535543334e-05, "res": {"No": 0.9999588011756949, "Yes": 4.091646535543334e-05}, "ground_truth": 0}, {"key": "38944856", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 2.818189481593862e-07}, "ground_truth": 0}, {"key": "38944856", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.984991713853141e-08}, "ground_truth": 1}, {"key": "38944856", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.191505561091023e-08}, "ground_truth": 0}, {"key": "38944856", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999950382530095, "res": {"Yes": 0.9999950382530095, "No": 4.817458746276019e-06}, "ground_truth": 0}, {"key": "35778898", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.999975250738268, "res": {"Yes": 0.999975250738268, "No": 2.418376542811804e-05}, "ground_truth": 0}, {"key": "35778898", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996755535441385, "res": {"Yes": 0.9996755535441385, "No": 0.0003242829891764781}, "ground_truth": 0}, {"key": "35778898", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.347821885796342e-08}, "ground_truth": 1}, {"key": "35778898", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 2.405516522957429e-07}, "ground_truth": 0}, {"key": "35778898", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.6978515555285631, "res": {"Yes": 0.6978515555285631, "No": 0.30214797444593566}, "ground_truth": 0}, {"key": "32530125", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.7724907611176554, "res": {"Yes": 0.7724907611176554, "No": 0.22750792637256145}, "ground_truth": 0}, {"key": "32530125", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.373512585836347e-08}, "ground_truth": 0}, {"key": "32530125", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.676807616537555e-08}, "ground_truth": 1}, {"key": "32530125", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 9.219361134994108e-08}, "ground_truth": 0}, {"key": "32530125", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.0602414348958964e-07}, "ground_truth": 0}, {"key": "35010363", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.020933638840315537, "res": {"No": 0.9790659902031013, "Yes": 0.020933638840315537}, "ground_truth": 0}, {"key": "35010363", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999709595226828, "res": {"Yes": 0.9999709595226828, "No": 2.8627521861653967e-05}, "ground_truth": 0}, {"key": "35010363", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 5.2036314829528976e-08}, "ground_truth": 1}, {"key": "35010363", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 2.711284888045515e-07}, "ground_truth": 0}, {"key": "35010363", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 2.7936884392030267e-08}, "ground_truth": 0}, {"key": "27514800", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.002086326531287883, "res": {"No": 0.9979130648835818, "Yes": 0.002086326531287883}, "ground_truth": 0}, {"key": "27514800", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.2868825310183804e-07}, "ground_truth": 0}, {"key": "27514800", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 8.304111419906905e-08}, "ground_truth": 1}, {"key": "27514800", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.915607017289733e-08}, "ground_truth": 0}, {"key": "27514800", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, " Yes": 2.3596437422255443e-07}, "ground_truth": 0}, {"key": "25725840", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, "yes": 3.961658104778458e-07}, "ground_truth": 0}, {"key": "25725840", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 8.422854153809695e-08}, "ground_truth": 1}, {"key": "25725840", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "yes": 3.634823852069367e-07}, "ground_truth": 0}, {"key": "25725840", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.0012090141542683772, "res": {"No": 0.9987906171935388, "Yes": 0.0012090141542683772}, "ground_truth": 0}, {"key": "38327225", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.00022427557874662903, "res": {"No": 0.999775508631959, "Yes": 0.00022427557874662903}, "ground_truth": 0}, {"key": "38327225", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.2205842663599792e-07}, "ground_truth": 0}, {"key": "38327225", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.618619005969332e-08}, "ground_truth": 1}, {"key": "38327225", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 8.506969289433398e-08}, "ground_truth": 0}, {"key": "38327225", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 3.6959611817769986e-07}, "ground_truth": 0}, {"key": "11991724", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.0002584531805885501, "res": {"No": 0.9997409589369314, "Yes": 0.0002584531805885501}, "ground_truth": 0}, {"key": "11991724", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9993578257821805, "res": {"Yes": 0.9993578257821805, "No": 0.000640898346130262}, "ground_truth": 0}, {"key": "11991724", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.3354250770666395e-07}, "ground_truth": 1}, {"key": "11991724", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 5.29536264534767e-05, "res": {"No": 0.9999462853826817, "Yes": 5.29536264534767e-05}, "ground_truth": 0}, {"key": "11991724", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9985268143049648, "res": {"Yes": 0.9985268143049648, "No": 0.0014720081823930077}, "ground_truth": 0}, {"key": "32217545", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.8055940769104377, "res": {"Yes": 0.8055940769104377, "No": 0.19440532409711228}, "ground_truth": 0}, {"key": "32217545", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "No": 5.536234912095311e-07}, "ground_truth": 0}, {"key": "32217545", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.4227358187491916e-07}, "ground_truth": 1}, {"key": "32217545", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.409862013542049e-08}, "ground_truth": 0}, {"key": "32217545", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 9.776492151117804e-08}, "ground_truth": 0}, {"key": "12731847", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9979649437406483, "res": {"Yes": 0.9979649437406483, "No": 0.0020342969477515236}, "ground_truth": 0}, {"key": "12731847", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, "No": 5.498529269635192e-07}, "ground_truth": 0}, {"key": "12731847", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 2.5598730078650403e-07}, "ground_truth": 1}, {"key": "12731847", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.674828297317303e-07}, "ground_truth": 0}, {"key": "12731847", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9838571256325122, "res": {"Yes": 0.9838571256325122, "No": 0.01614197430324712}, "ground_truth": 0}, {"key": "36827234", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.0003720041677964469, "res": {"No": 0.9996273111857361, "Yes": 0.0003720041677964469}, "ground_truth": 0}, {"key": "36827234", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 1.90594252682088e-07}, "ground_truth": 1}, {"key": "36827234", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 2.319551042895631e-07}, "ground_truth": 0}, {"key": "36827234", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 2.947930969370647e-06, "res": {"No": 0.9999962302846054, "Yes": 2.947930969370647e-06}, "ground_truth": 0}, {"key": "29111539", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9934759353273821, "res": {"Yes": 0.9934759353273821, "No": 0.006523599401012936}, "ground_truth": 0}, {"key": "29111539", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.03519091944396716, "res": {"No": 0.9648088308637061, "Yes": 0.03519091944396716}, "ground_truth": 0}, {"key": "29111539", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.388329996762791e-08}, "ground_truth": 1}, {"key": "29111539", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.998491518603549, "res": {"Yes": 0.998491518603549, "No": 0.001508104823812808}, "ground_truth": 0}, {"key": "29111539", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.10544307976053094, "res": {"No": 0.8945565699985688, "Yes": 0.10544307976053094}, "ground_truth": 0}, {"key": "37763052", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 9.029353263168009e-08}, "ground_truth": 0}, {"key": "37763052", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.0076919134751447515, "res": {"No": 0.992307418523995, "Yes": 0.0076919134751447515}, "ground_truth": 0}, {"key": "37763052", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.1556444681246455e-08}, "ground_truth": 1}, {"key": "37763052", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999978991308068, "res": {"Yes": 0.9999978991308068, "No": 1.5180514291792688e-06}, "ground_truth": 0}, {"key": "37763052", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998879996225106, "res": {"Yes": 0.9998879996225106, "No": 0.00011145941771473954}, "ground_truth": 0}, {"key": "30682335", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.0001055004858336987, "res": {"No": 0.9998941975374753, "Yes": 0.0001055004858336987}, "ground_truth": 0}, {"key": "30682335", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 9.271393982552604e-08}, "ground_truth": 0}, {"key": "30682335", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 2.099123221452717e-08}, "ground_truth": 1}, {"key": "30682335", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.87395807954715e-08}, "ground_truth": 0}, {"key": "30682335", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 2.9478194983814633e-08}, "ground_truth": 0}, {"key": "12261276", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 9.012003986911758e-05, "res": {"No": 0.9999094541095266, "Yes": 9.012003986911758e-05}, "ground_truth": 0}, {"key": "12261276", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.738274608494337e-08}, "ground_truth": 0}, {"key": "12261276", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 5.165132439429608e-08}, "ground_truth": 1}, {"key": "12261276", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "No": 6.743045321243523e-07}, "ground_truth": 0}, {"key": "12261276", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 1.9815565078554226e-07}, "ground_truth": 0}, {"key": "36912979", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.00021743643565737367, "res": {"No": 0.9997820633916336, "Yes": 0.00021743643565737367}, "ground_truth": 0}, {"key": "36912979", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999891973193493, "res": {"Yes": 0.9999891973193493, "No": 1.057462811585174e-05}, "ground_truth": 0}, {"key": "36912979", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 7.491924079277251e-07}, "ground_truth": 1}, {"key": "36912979", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.5964998687447468e-07}, "ground_truth": 0}, {"key": "36912979", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.970372286046482e-08}, "ground_truth": 0}, {"key": "30205259", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 4.6051426708711295e-07, "res": {"No": 0.9999987335551019, "Yes": 4.6051426708711295e-07}, "ground_truth": 0}, {"key": "30205259", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, "No": 5.806141002488391e-07}, "ground_truth": 0}, {"key": "30205259", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 9.69930995144323e-08}, "ground_truth": 1}, {"key": "30205259", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.826076139193819e-08}, "ground_truth": 0}, {"key": "30205259", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.649261257570519e-05, "res": {"No": 0.9999829988145218, "Yes": 1.649261257570519e-05}, "ground_truth": 0}, {"key": "39458032", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.3805984106634239, "res": {"No": 0.6194003973099431, "Yes": 0.3805984106634239}, "ground_truth": 0}, {"key": "39458032", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 8.835951034509415e-08}, "ground_truth": 1}, {"key": "39458032", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999987886094374, "res": {"Yes": 0.999987886094374, "No": 1.1482326853776713e-05}, "ground_truth": 0}, {"key": "35116452", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 1.1458653903412477e-07}, "ground_truth": 0}, {"key": "35116452", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.4816789594454425e-08}, "ground_truth": 1}, {"key": "35116452", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 9.151893007475582e-08}, "ground_truth": 0}, {"key": "35116452", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.263860652534657e-08}, "ground_truth": 0}, {"key": "40107476", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999955150656573, "res": {"Yes": 0.9999955150656573, "No": 3.818350633198164e-06}, "ground_truth": 0}, {"key": "40107476", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.2991861784092494e-08}, "ground_truth": 0}, {"key": "40107476", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.2256668911459806, "res": {"No": 0.7743319510565213, "Yes": 0.2256668911459806}, "ground_truth": 1}, {"key": "40107476", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.5623930479260345e-07}, "ground_truth": 0}, {"key": "40107476", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.0009887158282854934, "res": {"No": 0.9990107898912187, "Yes": 0.0009887158282854934}, "ground_truth": 0}, {"key": "39501049", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.00033298037621685576, "res": {"No": 0.9996666162047319, "Yes": 0.00033298037621685576}, "ground_truth": 0}, {"key": "39501049", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.99497625542775, "res": {"Yes": 0.99497625542775, "No": 0.005023205516302314}, "ground_truth": 0}, {"key": "39501049", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.3816447688593164e-07}, "ground_truth": 1}, {"key": "39501049", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.7013697313401508e-07}, "ground_truth": 0}, {"key": "39642178", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 1.1192194894653714e-06, "res": {"No": 0.9999981375378344, "Yes": 1.1192194894653714e-06}, "ground_truth": 0}, {"key": "39642178", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9656359173175658, "res": {"Yes": 0.9656359173175658, "No": 0.034363219059699995}, "ground_truth": 0}, {"key": "39642178", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 8.952239674591796e-08}, "ground_truth": 1}, {"key": "39642178", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.992585934787717, "res": {"Yes": 0.992585934787717, "No": 0.007413133904508276}, "ground_truth": 0}, {"key": "39642178", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.6370326428885126, "res": {"Yes": 0.6370326428885126, "No": 0.36296607731752656}, "ground_truth": 0}, {"key": "38024796", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 9.377859680819916e-06, "res": {"No": 0.9999899125338788, "Yes": 9.377859680819916e-06}, "ground_truth": 0}, {"key": "38024796", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.9647814903731294e-08}, "ground_truth": 0}, {"key": "38024796", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 5.093823584565519e-08}, "ground_truth": 1}, {"key": "38024796", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 7.270228662594401e-08}, "ground_truth": 0}, {"key": "38024796", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.769520269721808e-08}, "ground_truth": 0}, {"key": "36652079", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9706235586998045, "res": {"Yes": 0.9706235586998045, "No": 0.029374762254922887}, "ground_truth": 0}, {"key": "36652079", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.5618541152957254e-07}, "ground_truth": 0}, {"key": "36652079", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.771466709728964e-08}, "ground_truth": 1}, {"key": "36652079", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999602315637852, "res": {"Yes": 0.9999602315637852, "No": 3.8888499255947684e-05}, "ground_truth": 0}, {"key": "36652079", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 2.8497964137704013e-06, "res": {"No": 0.9999963494876631, "Yes": 2.8497964137704013e-06}, "ground_truth": 0}, {"key": "32193402", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9362499289861029, "res": {"Yes": 0.9362499289861029, "No": 0.06374906893445376}, "ground_truth": 0}, {"key": "32193402", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.6780520908170472e-07}, "ground_truth": 0}, {"key": "32193402", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.3415620593257383e-07}, "ground_truth": 1}, {"key": "32193402", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 5.059854110457445e-07}, "ground_truth": 0}, {"key": "32193402", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9989131204818721, "res": {"Yes": 0.9989131204818721, "No": 0.0010862455261633137}, "ground_truth": 0}, {"key": "32589706", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.0003653214160344999, "res": {"No": 0.9996329117133985, "Yes": 0.0003653214160344999}, "ground_truth": 0}, {"key": "32589706", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "yes": 3.3765337007633345e-07}, "ground_truth": 1}, {"key": "32589706", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.8405989546398381, "res": {"Yes": 0.8405989546398381, "No": 0.1593995556932072}, "ground_truth": 0}, {"key": "32589706", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.7093285814070514e-07}, "ground_truth": 0}, {"key": "38590589", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 6.7790607368272944e-06, "res": {"No": 0.9999924157887603, "Yes": 6.7790607368272944e-06}, "ground_truth": 0}, {"key": "38590589", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "yes": 5.198450762757037e-07}, "ground_truth": 0}, {"key": "38590589", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 7.417022853280351e-08}, "ground_truth": 1}, {"key": "38590589", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.158917224815368e-08}, "ground_truth": 0}, {"key": "38590589", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.919346733538359e-08}, "ground_truth": 0}, {"key": "37045414", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.007128941736943e-07}, "ground_truth": 0}, {"key": "37045414", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 7.064051430703827e-08}, "ground_truth": 1}, {"key": "37045414", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 9.247624834952522e-08}, "ground_truth": 0}, {"key": "33310095", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.313876136235568e-07}, "ground_truth": 1}, {"key": "33310095", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.5321449028606813e-07}, "ground_truth": 0}, {"key": "33310095", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 4.4253512372446424e-07}, "ground_truth": 0}, {"key": "37934604", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.00024098111223832085, "res": {"No": 0.9997585856551338, "Yes": 0.00024098111223832085}, "ground_truth": 0}, {"key": "37934604", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999987335551019, "res": {"Yes": 0.9999987335551019, "yes": 5.178273248348374e-07}, "ground_truth": 0}, {"key": "37934604", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 8.102808613239264e-08}, "ground_truth": 1}, {"key": "37934604", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 4.2699095782323483e-07}, "ground_truth": 0}, {"key": "37934604", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "yes": 5.720399530593582e-07}, "ground_truth": 0}, {"key": "39012181", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.0003404065225571149, "res": {"No": 0.9996593508740934, "Yes": 0.0003404065225571149}, "ground_truth": 0}, {"key": "39012181", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.12925488129514454, "res": {"No": 0.8707444008656471, "Yes": 0.12925488129514454}, "ground_truth": 0}, {"key": "39012181", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 1.8063707899156124e-08}, "ground_truth": 1}, {"key": "39012181", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 2.8473557140341807e-08}, "ground_truth": 0}, {"key": "39012181", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.0001440469888727515, "res": {"No": 0.9998554685256801, "Yes": 0.0001440469888727515}, "ground_truth": 0}, {"key": "40221674", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 1.632637096242986e-06, "res": {"No": 0.9999977799274644, "Yes": 1.632637096242986e-06}, "ground_truth": 0}, {"key": "40221674", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 3.328369235494982e-05, "res": {"No": 0.9999664299234876, "Yes": 3.328369235494982e-05}, "ground_truth": 0}, {"key": "40221674", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.513472075874917e-08}, "ground_truth": 1}, {"key": "40221674", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.6069976926757888e-07}, "ground_truth": 0}, {"key": "40221674", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.34720900754474e-08}, "ground_truth": 0}, {"key": "36884862", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.0001723267799873004, "res": {"No": 0.9998255568469765, "Yes": 0.0001723267799873004}, "ground_truth": 0}, {"key": "36884862", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.6225997565767411e-07}, "ground_truth": 0}, {"key": "36884862", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 8.480630037783498e-08}, "ground_truth": 1}, {"key": "36884862", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.7686749509631142e-07}, "ground_truth": 0}, {"key": "36884862", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 2.575033536628468e-06, "res": {"No": 0.9999961110815618, "Yes": 2.575033536628468e-06}, "ground_truth": 0}, {"key": "39054429", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.766022604412542, "res": {"Yes": 0.766022604412542, "No": 0.23397687343100987}, "ground_truth": 0}, {"key": "39054429", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 8.375583475094379e-08}, "ground_truth": 1}, {"key": "39054429", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999957534720165, "res": {"Yes": 0.9999957534720165, "No": 3.7572734871398994e-06}, "ground_truth": 0}, {"key": "39054429", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.0639226074961536e-07}, "ground_truth": 0}, {"key": "36753964", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9987545904669499, "res": {"Yes": 0.9987545904669499, "No": 0.0012451421236979946}, "ground_truth": 0}, {"key": "36753964", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 2.599938136156295e-07}, "ground_truth": 0}, {"key": "36753964", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 2.6443194725692347e-08}, "ground_truth": 1}, {"key": "36753964", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "yes": 4.927430477967124e-07}, "ground_truth": 0}, {"key": "36753964", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.0102104252233448e-07}, "ground_truth": 0}, {"key": "37612459", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9989697464759552, "res": {"Yes": 0.9989697464759552, "No": 0.001029888742052142}, "ground_truth": 0}, {"key": "37612459", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.790959688941746e-08}, "ground_truth": 0}, {"key": "37612459", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.1151389062828424e-08}, "ground_truth": 1}, {"key": "37612459", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 2.346825922620922e-08}, "ground_truth": 0}, {"key": "37612459", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.6280922583452604e-08}, "ground_truth": 0}, {"key": "36805789", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 8.570581283001436e-06, "res": {"No": 0.9999903893441826, "Yes": 8.570581283001436e-06}, "ground_truth": 0}, {"key": "36805789", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.2429300590281455, "res": {"No": 0.7570688337666555, "Yes": 0.2429300590281455}, "ground_truth": 0}, {"key": "36805789", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 2.4900297319769506e-07}, "ground_truth": 1}, {"key": "36805789", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.004925014796485795, "res": {"No": 0.9950748100201168, "Yes": 0.004925014796485795}, "ground_truth": 0}, {"key": "36805789", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.4931105449272695e-06, "res": {"No": 0.9999975415208221, "Yes": 1.4931105449272695e-06}, "ground_truth": 0}, {"key": "12757394", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.002055933040026636, "res": {"No": 0.9979438089630712, "Yes": 0.002055933040026636}, "ground_truth": 0}, {"key": "12757394", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "yes": 3.035029519909777e-07}, "ground_truth": 0}, {"key": "12757394", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.855881953022597e-08}, "ground_truth": 1}, {"key": "12757394", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 1.9883074395773078e-08}, "ground_truth": 0}, {"key": "12757394", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 2.9493439150898866e-08}, "ground_truth": 0}, {"key": "32192542", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.00018596966294499265, "res": {"No": 0.9998137614845458, "Yes": 0.00018596966294499265}, "ground_truth": 0}, {"key": "32192542", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 3.0363601074611932e-05, "res": {"No": 0.9999690523188893, "Yes": 3.0363601074611932e-05}, "ground_truth": 0}, {"key": "32192542", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 7.476851223645098e-08}, "ground_truth": 1}, {"key": "32192542", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 8.501781620566724e-08}, "ground_truth": 0}, {"key": "32192542", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 8.368232940160281e-08}, "ground_truth": 0}, {"key": "34856060", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.0001413258981801717, "res": {"No": 0.9998580906400859, "Yes": 0.0001413258981801717}, "ground_truth": 0}, {"key": "34856060", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.8899123537837315, "res": {"Yes": 0.8899123537837315, "No": 0.11008666056956135}, "ground_truth": 0}, {"key": "34856060", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.1090232255756104e-07}, "ground_truth": 1}, {"key": "34856060", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.1685231084395588e-07}, "ground_truth": 0}, {"key": "34856060", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.8300706013015066, "res": {"Yes": 0.8300706013015066, "No": 0.16992867309592097}, "ground_truth": 0}, {"key": "36083416", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9994105638857601, "res": {"Yes": 0.9994105638857601, "No": 0.0005887740878058039}, "ground_truth": 0}, {"key": "36083416", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999789459686392, "res": {"Yes": 0.9999789459686392, "No": 2.053501664476912e-05}, "ground_truth": 0}, {"key": "36083416", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.849397854463074e-08}, "ground_truth": 1}, {"key": "36083416", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.605838528233542e-08}, "ground_truth": 0}, {"key": "36083416", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.711213230488942e-08}, "ground_truth": 0}, {"key": "33839050", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999958726752174, "res": {"Yes": 0.9999958726752174, "No": 3.6605395623842773e-06}, "ground_truth": 0}, {"key": "33839050", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 3.041548166121929e-07}, "ground_truth": 0}, {"key": "33839050", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.812647042334836e-08}, "ground_truth": 1}, {"key": "33839050", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.0422185382612524e-07}, "ground_truth": 0}, {"key": "33839050", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.1572976218509621e-07}, "ground_truth": 0}, {"key": "18464690", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 1.9577337572330356e-06, "res": {"No": 0.9999976607241361, "Yes": 1.9577337572330356e-06}, "ground_truth": 0}, {"key": "18464690", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, "No": 4.222513855136758e-07}, "ground_truth": 0}, {"key": "18464690", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.116715941594618e-08}, "ground_truth": 1}, {"key": "18464690", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 9.156113002786224e-08}, "ground_truth": 0}, {"key": "18464690", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 2.186085826676702e-07}, "ground_truth": 0}, {"key": "39212665", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 2.783565475139137e-07}, "ground_truth": 0}, {"key": "39212665", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 2.1479859828672713e-07}, "ground_truth": 1}, {"key": "39212665", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999236381607137, "res": {"Yes": 0.9999236381607137, "No": 7.516168235660108e-05}, "ground_truth": 0}, {"key": "39212665", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998328270349577, "res": {"Yes": 0.9998328270349577, "No": 0.00016604395757394133}, "ground_truth": 0}, {"key": "40094011", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 4.562142980375417e-05, "res": {"No": 0.9999534372470786, "Yes": 4.562142980375417e-05}, "ground_truth": 0}, {"key": "40094011", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999965878943212, "res": {"Yes": 0.9999965878943212, "No": 2.4972410886645625e-06}, "ground_truth": 0}, {"key": "40094011", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 7.941329812772857e-08}, "ground_truth": 1}, {"key": "40094011", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.36406150624323e-07}, "ground_truth": 0}, {"key": "40094011", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.2981480228535547e-07}, "ground_truth": 0}, {"key": "36036272", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.0019353501747698023, "res": {"No": 0.9980639607464319, "Yes": 0.0019353501747698023}, "ground_truth": 0}, {"key": "36036272", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999891973193493, "res": {"Yes": 0.9999891973193493, "No": 1.0385707249188706e-05}, "ground_truth": 0}, {"key": "36036272", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.6702535392785915e-08}, "ground_truth": 1}, {"key": "36036272", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.4230260864625168e-07}, "ground_truth": 0}, {"key": "36036272", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999958726752174, "res": {"Yes": 0.9999958726752174, "No": 3.342368067344612e-06}, "ground_truth": 0}, {"key": "30681904", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 2.757759710305708e-07}, "ground_truth": 0}, {"key": "30681904", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.6893598175026243e-07}, "ground_truth": 0}, {"key": "30681904", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.561721840993661e-08}, "ground_truth": 1}, {"key": "30681904", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 9.56632141050876e-08}, "ground_truth": 0}, {"key": "30681904", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.7934598851769492e-07}, "ground_truth": 0}, {"key": "27834240", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.30558382372697e-08}, "ground_truth": 0}, {"key": "27834240", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 5.638650387917196e-08}, "ground_truth": 1}, {"key": "27834240", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.09876514398965e-08}, "ground_truth": 0}, {"key": "27834240", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.179579920307536e-08}, "ground_truth": 0}, {"key": "35025075", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.005010981374170412, "res": {"No": 0.994988650521156, "Yes": 0.005010981374170412}, "ground_truth": 0}, {"key": "35025075", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999837140256179, "res": {"Yes": 0.9999837140256179, "No": 1.5561618934385154e-05}, "ground_truth": 0}, {"key": "35025075", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 7.083950582413497e-08}, "ground_truth": 1}, {"key": "35025075", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 5.453658914936916e-08}, "ground_truth": 0}, {"key": "35025075", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.5109227972689074e-08}, "ground_truth": 0}, {"key": "33316985", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 7.917224828282512e-08}, "ground_truth": 0}, {"key": "33316985", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 1.1691636345911201e-07}, "ground_truth": 0}, {"key": "33316985", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.979175202438313e-08}, "ground_truth": 1}, {"key": "33316985", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 8.732561531625607e-08}, "ground_truth": 0}, {"key": "33316985", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 6.453829941422528e-08}, "ground_truth": 0}, {"key": "17037056", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.0002338496898665534, "res": {"No": 0.9997658553661344, "Yes": 0.0002338496898665534}, "ground_truth": 0}, {"key": "17037056", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.6049548911079372e-07}, "ground_truth": 0}, {"key": "17037056", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 1.9904898096496547e-08}, "ground_truth": 1}, {"key": "17037056", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 2.5174682755253394e-08}, "ground_truth": 0}, {"key": "17037056", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 7.757481588464195e-08}, "ground_truth": 0}, {"key": "34050457", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 6.842441127599328e-06, "res": {"No": 0.9999921773835968, "Yes": 6.842441127599328e-06}, "ground_truth": 0}, {"key": "34050457", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9993932969587641, "res": {"Yes": 0.9993932969587641, "No": 0.0006056601123205311}, "ground_truth": 0}, {"key": "34050457", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.248170769014475e-08}, "ground_truth": 1}, {"key": "34050457", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.1295627952050985e-07}, "ground_truth": 0}, {"key": "34050457", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.42912524769285854, "res": {"No": 0.5708738625195507, "Yes": 0.42912524769285854}, "ground_truth": 0}, {"key": "34713745", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.999220815192263, "res": {"Yes": 0.999220815192263, "No": 0.0007787865926577069}, "ground_truth": 0}, {"key": "34713745", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.0836550696582033e-07}, "ground_truth": 0}, {"key": "34713745", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.0698562731890056e-08}, "ground_truth": 1}, {"key": "34713745", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9393973089885086, "res": {"Yes": 0.9393973089885086, "No": 0.06060229592944763}, "ground_truth": 0}, {"key": "34713745", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 8.946914676329489e-08}, "ground_truth": 0}, {"key": "40856210", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 9.232397300159571e-08}, "ground_truth": 0}, {"key": "40856210", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.462535570459284e-08}, "ground_truth": 1}, {"key": "40856210", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 9.306580624383857e-08}, "ground_truth": 0}, {"key": "40856210", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 7.718551708817552e-08}, "ground_truth": 0}, {"key": "40848302", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.1311452906066908e-07}, "ground_truth": 0}, {"key": "40848302", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 5.7223453292545756e-08}, "ground_truth": 1}, {"key": "40848302", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, "No": 5.611492073672886e-07}, "ground_truth": 0}, {"key": "40848302", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.079183084382234e-08}, "ground_truth": 0}, {"key": "40636168", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.6577440150218538e-07}, "ground_truth": 0}, {"key": "40636168", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 8.46120623101196e-08}, "ground_truth": 1}, {"key": "40636168", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.956578796394505e-08}, "ground_truth": 0}, {"key": "34423311", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 9.187443767282784e-08}, "ground_truth": 0}, {"key": "34423311", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "yes": 3.7292519015689905e-07}, "ground_truth": 1}, {"key": "34423311", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 8.349083262825642e-08}, "ground_truth": 0}, {"key": "34423311", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 2.5344094490727076e-07}, "ground_truth": 0}, {"key": "34833945", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.033158638767331196, "res": {"No": 0.9668405775952538, "Yes": 0.033158638767331196}, "ground_truth": 0}, {"key": "34833945", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "\"Yes": 8.178655264047269e-08}, "ground_truth": 1}, {"key": "34833945", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 9.2079177962488e-08}, "ground_truth": 0}, {"key": "34833945", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "\"Yes": 2.0692322628270405e-07}, "ground_truth": 0}, {"key": "21272328", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "YES": 1.1221827256859429e-07}, "ground_truth": 0}, {"key": "21272328", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.43370805880268e-08}, "ground_truth": 1}, {"key": "21272328", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.1201399702750302e-07}, "ground_truth": 0}, {"key": "21272328", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9988205876540845, "res": {"Yes": 0.9988205876540845, "No": 0.0011787835052857504}, "ground_truth": 0}, {"key": "38648957", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.1534352354769948e-07}, "ground_truth": 0}, {"key": "38648957", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 5.9142503006922297e-08}, "ground_truth": 1}, {"key": "38648957", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.995511173754214e-08}, "ground_truth": 0}, {"key": "38648957", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 8.201366149183726e-08}, "ground_truth": 0}, {"key": "24942981", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.0007193945517905894, "res": {"No": 0.9992794872940292, "Yes": 0.0007193945517905894}, "ground_truth": 0}, {"key": "24942981", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.799125644629714e-08}, "ground_truth": 0}, {"key": "24942981", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.5338149797003055e-08}, "ground_truth": 1}, {"key": "24942981", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 9.479112975959033e-08}, "ground_truth": 0}, {"key": "24942981", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 2.9821178736620915e-08}, "ground_truth": 0}, {"key": "35882366", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.0002689287724112146, "res": {"No": 0.9997309521398686, "Yes": 0.0002689287724112146}, "ground_truth": 0}, {"key": "35882366", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.4741023702480906e-08}, "ground_truth": 1}, {"key": "35882366", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.841165145764961e-08}, "ground_truth": 0}, {"key": "35882366", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 5.495781203276902e-08}, "ground_truth": 0}, {"key": "40559523", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.6591728022241168, "res": {"Yes": 0.6591728022241168, "No": 0.34082439670959047}, "ground_truth": 0}, {"key": "40559523", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 7.495311891410767e-08}, "ground_truth": 0}, {"key": "40559523", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.295756796344311e-08}, "ground_truth": 1}, {"key": "40559523", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.036507120793644e-08}, "ground_truth": 0}, {"key": "40559523", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 8.790211246062255e-08}, "ground_truth": 0}, {"key": "24632722", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 7.940716138600479e-05, "res": {"No": 0.9999203007211562, "Yes": 7.940716138600479e-05}, "ground_truth": 0}, {"key": "24632722", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.4099277829004254e-08}, "ground_truth": 1}, {"key": "24632722", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 8.890726293118686e-08}, "ground_truth": 0}, {"key": "24632722", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.007197730579045831, "res": {"No": 0.9928020869588725, "Yes": 0.007197730579045831}, "ground_truth": 0}, {"key": "36002759", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.00010675351275787378, "res": {"No": 0.9998925288654362, "Yes": 0.00010675351275787378}, "ground_truth": 0}, {"key": "36002759", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "yes": 2.690840641582377e-07}, "ground_truth": 0}, {"key": "36002759", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "yes": 2.772087512397558e-07}, "ground_truth": 1}, {"key": "36002759", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, " Yes": 1.2716657445687963e-07}, "ground_truth": 0}, {"key": "36002759", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "No": 4.424541472064001e-07}, "ground_truth": 0}, {"key": "29508534", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 4.138599965106149e-06, "res": {"No": 0.999994561441089, "Yes": 4.138599965106149e-06}, "ground_truth": 0}, {"key": "29508534", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.025152424135095327, "res": {"No": 0.9748464525016364, "Yes": 0.025152424135095327}, "ground_truth": 0}, {"key": "29508534", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.620893229758619e-08}, "ground_truth": 1}, {"key": "29508534", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.434764394850072e-07}, "ground_truth": 0}, {"key": "29508534", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997494202403536, "res": {"Yes": 0.9997494202403536, "No": 0.00024955705289390613}, "ground_truth": 0}, {"key": "15631612", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.023497444697989613, "res": {"No": 0.9765020493489169, "Yes": 0.023497444697989613}, "ground_truth": 0}, {"key": "15631612", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.0010196926710285361, "res": {"No": 0.9989800955564443, "Yes": 0.0010196926710285361}, "ground_truth": 0}, {"key": "15631612", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 2.3969827140971134e-08}, "ground_truth": 1}, {"key": "15631612", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.484965234395347e-08}, "ground_truth": 0}, {"key": "15631612", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.3512476747504514e-08}, "ground_truth": 0}, {"key": "40731892", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.010212321474764736, "res": {"No": 0.9897869124207407, "Yes": 0.010212321474764736}, "ground_truth": 0}, {"key": "40731892", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 8.26744690693494e-08}, "ground_truth": 0}, {"key": "40731892", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 8.673832580744651e-08}, "ground_truth": 1}, {"key": "40731892", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 9.683338652608205e-08}, "ground_truth": 0}, {"key": "40731892", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.776684773995344e-08}, "ground_truth": 0}, {"key": "35971910", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.99872676844797, "res": {"Yes": 0.99872676844797, "No": 0.0012716105911084762}, "ground_truth": 0}, {"key": "35971910", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.0010634821581547193, "res": {"No": 0.998936199137749, "Yes": 0.0010634821581547193}, "ground_truth": 0}, {"key": "35971910", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 7.491010816911568e-08}, "ground_truth": 1}, {"key": "35971910", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9830377408812611, "res": {"Yes": 0.9830377408812611, "No": 0.016961198242894333}, "ground_truth": 0}, {"key": "35971910", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999976607241361, "res": {"Yes": 0.9999976607241361, "No": 1.6333735816383648e-06}, "ground_truth": 0}, {"key": "34428424", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.7014565034091196e-07}, "ground_truth": 1}, {"key": "34428424", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998816825540612, "res": {"Yes": 0.9998816825540612, "No": 0.00011642538060510442}, "ground_truth": 0}, {"key": "34428424", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.005945120380568967, "res": {"No": 0.994053997858241, "Yes": 0.005945120380568967}, "ground_truth": 0}, {"key": "36971005", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.999966549126493, "res": {"Yes": 0.999966549126493, "No": 3.3311262664752775e-05}, "ground_truth": 0}, {"key": "36971005", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.6335811851694485e-07}, "ground_truth": 1}, {"key": "36971005", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9987022733764026, "res": {"Yes": 0.9987022733764026, "No": 0.0012971665113088386}, "ground_truth": 0}, {"key": "34649067", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.382531107224711e-08}, "ground_truth": 0}, {"key": "34649067", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 3.8838994077582235e-07}, "ground_truth": 0}, {"key": "34649067", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 7.251807469641124e-08}, "ground_truth": 1}, {"key": "34649067", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 5.633121584208264e-08}, "ground_truth": 0}, {"key": "34649067", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.969794521712975e-08}, "ground_truth": 0}, {"key": "37355154", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.12964566548367953, "res": {"No": 0.8703537593834777, "Yes": 0.12964566548367953}, "ground_truth": 0}, {"key": "37355154", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.239904493839688e-08}, "ground_truth": 0}, {"key": "37355154", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 5.661567197763681e-08}, "ground_truth": 1}, {"key": "37355154", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 5.963340102315934e-08}, "ground_truth": 0}, {"key": "37355154", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 9.106812355537571e-08}, "ground_truth": 0}, {"key": "38674697", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 1.006047678782256e-05, "res": {"No": 0.9999895549275502, "Yes": 1.006047678782256e-05}, "ground_truth": 0}, {"key": "38674697", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.0197450560016946e-07}, "ground_truth": 0}, {"key": "38674697", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.899743352630351e-08}, "ground_truth": 1}, {"key": "38674697", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9996248088718013, "res": {"Yes": 0.9996248088718013, "No": 0.00037402811110201327}, "ground_truth": 0}, {"key": "38674697", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999683371212795, "res": {"Yes": 0.9999683371212795, "No": 3.097893811598179e-05}, "ground_truth": 0}, {"key": "40525767", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.6094419540212558, "res": {"Yes": 0.6094419540212558, "No": 0.39055721623004985}, "ground_truth": 0}, {"key": "40525767", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, " Yes": 2.0233380569218428e-07}, "ground_truth": 1}, {"key": "40525767", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 9.880931057929901e-08}, "ground_truth": 0}, {"key": "40525767", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "yes": 2.882746976830615e-07}, "ground_truth": 0}, {"key": "27165110", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.02956038082104017, "res": {"No": 0.9704394265539812, "Yes": 0.02956038082104017}, "ground_truth": 0}, {"key": "27165110", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998529656106155, "res": {"Yes": 0.9998529656106155, "No": 0.00014637979181722277}, "ground_truth": 0}, {"key": "27165110", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.5421563294339742e-07}, "ground_truth": 1}, {"key": "27165110", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999715555225518, "res": {"Yes": 0.9999715555225518, "No": 2.795232996336008e-05}, "ground_truth": 0}, {"key": "27165110", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 3.5137106733452265e-07}, "ground_truth": 0}, {"key": "35497491", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.6512396641749884e-08}, "ground_truth": 0}, {"key": "35497491", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, " Yes": 1.627352808814851e-07}, "ground_truth": 0}, {"key": "35497491", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 7.317412222492703e-08}, "ground_truth": 1}, {"key": "35497491", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999671451237333, "res": {"Yes": 0.9999671451237333, "No": 3.181740789102288e-05}, "ground_truth": 0}, {"key": "35497491", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 2.056278393761348e-08}, "ground_truth": 0}, {"key": "40690716", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.0009193064784168609, "res": {"No": 0.9990800305715906, "Yes": 0.0009193064784168609}, "ground_truth": 0}, {"key": "40690716", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999982567412194, "res": {"Yes": 0.9999982567412194, "yes": 8.036503905187527e-07}, "ground_truth": 0}, {"key": "40690716", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 8.92385246413725e-08}, "ground_truth": 1}, {"key": "40690716", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 9.523817286934128e-08}, "ground_truth": 0}, {"key": "40690716", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 7.114285066722619e-08}, "ground_truth": 0}, {"key": "34835193", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 7.4156642830596e-06, "res": {"No": 0.9999920581810364, "Yes": 7.4156642830596e-06}, "ground_truth": 0}, {"key": "34835193", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 4.940662665267664e-05, "res": {"No": 0.9999500997050611, "Yes": 4.940662665267664e-05}, "ground_truth": 0}, {"key": "34835193", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.4030692464240524e-08}, "ground_truth": 1}, {"key": "34835193", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.803571833797681e-08}, "ground_truth": 0}, {"key": "34835193", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.6390136876684264e-07}, "ground_truth": 0}, {"key": "39471712", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 6.012896931454394e-07, "res": {"No": 0.9999989719621284, "Yes": 6.012896931454394e-07}, "ground_truth": 0}, {"key": "39471712", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.07159192433456832, "res": {"No": 0.928407450051686, "Yes": 0.07159192433456832}, "ground_truth": 0}, {"key": "39471712", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.542718448105647e-08}, "ground_truth": 1}, {"key": "39471712", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.9542043938872628e-07}, "ground_truth": 0}, {"key": "39471712", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9996763876737683, "res": {"Yes": 0.9996763876737683, "No": 0.0003224601484698664}, "ground_truth": 0}, {"key": "39115192", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.5590649069252774, "res": {"Yes": 0.5590649069252774, "No": 0.44093438564077675}, "ground_truth": 0}, {"key": "39115192", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 8.82850546653516e-08}, "ground_truth": 0}, {"key": "39115192", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.478363743796526e-08}, "ground_truth": 1}, {"key": "39115192", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 1.835128936117729e-08}, "ground_truth": 0}, {"key": "39115192", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.1086318094659895e-07}, "ground_truth": 0}, {"key": "23520673", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999921773835968, "res": {"Yes": 0.9999921773835968, "No": 6.371529546587723e-06}, "ground_truth": 0}, {"key": "23520673", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.248231660989852e-08}, "ground_truth": 1}, {"key": "23520673", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 5.178035534783931e-08}, "ground_truth": 0}, {"key": "23520673", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.6879548546800408e-07}, "ground_truth": 0}, {"key": "35764233", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.10732224874509964, "res": {"No": 0.892677313921786, "Yes": 0.10732224874509964}, "ground_truth": 0}, {"key": "35764233", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 2.6875745993287886e-07}, "ground_truth": 0}, {"key": "35764233", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.120907247329759e-08}, "ground_truth": 1}, {"key": "35764233", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 9.31068573154883e-08}, "ground_truth": 0}, {"key": "35764233", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.7331364213190274e-08}, "ground_truth": 0}, {"key": "35228910", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.2715759870852141, "res": {"No": 0.7284237251965097, "Yes": 0.2715759870852141}, "ground_truth": 0}, {"key": "35228910", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.7187609459381874e-07}, "ground_truth": 0}, {"key": "35228910", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 2.3119093555218017e-08}, "ground_truth": 1}, {"key": "35228910", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 2.1246172127174925e-07}, "ground_truth": 0}, {"key": "35228910", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.1608113278133036e-08}, "ground_truth": 0}, {"key": "36795599", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 8.895171940593225e-07, "res": {"No": 0.9999983759447187, "Yes": 8.895171940593225e-07}, "ground_truth": 0}, {"key": "36795599", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997235671645989, "res": {"Yes": 0.9997235671645989, "No": 0.00027571633293372433}, "ground_truth": 0}, {"key": "36795599", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 2.107060084355799e-07}, "ground_truth": 1}, {"key": "36795599", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999673835219289, "res": {"Yes": 0.9999673835219289, "No": 3.2128341647595395e-05}, "ground_truth": 0}, {"key": "36795599", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.2000829158617056e-07}, "ground_truth": 0}, {"key": "38641949", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 9.436476430127271e-07, "res": {"No": 0.99999861435166, "Yes": 9.436476430127271e-07}, "ground_truth": 0}, {"key": "38641949", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.659523257967299e-08}, "ground_truth": 0}, {"key": "38641949", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 9.546415502211598e-08}, "ground_truth": 1}, {"key": "38641949", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 4.266771554197009e-05, "res": {"No": 0.9999568939990904, "Yes": 4.266771554197009e-05}, "ground_truth": 0}, {"key": "38641949", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999673835219289, "res": {"Yes": 0.9999673835219289, "No": 3.197881198292698e-05}, "ground_truth": 0}, {"key": "29968443", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 6.913987025689258e-05, "res": {"No": 0.999929955493231, "Yes": 6.913987025689258e-05}, "ground_truth": 0}, {"key": "29968443", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "No": 3.9726669737603397e-07}, "ground_truth": 0}, {"key": "29968443", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 2.5018029475219643e-07}, "ground_truth": 1}, {"key": "29968443", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "yes": 5.075432032992994e-07}, "ground_truth": 0}, {"key": "29968443", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.9315767744674035e-07}, "ground_truth": 0}, {"key": "21268042", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 1.7430172286299663e-07}, "ground_truth": 0}, {"key": "21268042", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999944422379444, "res": {"Yes": 0.9999944422379444, "No": 4.863029307165101e-06}, "ground_truth": 0}, {"key": "21268042", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.3778105858752016e-08}, "ground_truth": 1}, {"key": "21268042", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.479716399857544e-08}, "ground_truth": 0}, {"key": "21268042", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.230086117238044e-08}, "ground_truth": 0}, {"key": "26808572", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 3.2862978280899757e-07, "res": {"No": 0.999999091165773, "Yes": 3.2862978280899757e-07}, "ground_truth": 0}, {"key": "26808572", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.0025159973334035737, "res": {"No": 0.9974838707729766, "Yes": 0.0025159973334035737}, "ground_truth": 0}, {"key": "26808572", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.186180503912572e-08}, "ground_truth": 1}, {"key": "26808572", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.497553646302996e-08}, "ground_truth": 0}, {"key": "26808572", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.338687600544589e-08}, "ground_truth": 0}, {"key": "37829390", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 9.993297826603174e-07, "res": {"No": 0.9999980183344636, "Yes": 9.993297826603174e-07}, "ground_truth": 0}, {"key": "37829390", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.022365434826733643, "res": {"No": 0.9776337390879455, "Yes": 0.022365434826733643}, "ground_truth": 0}, {"key": "37829390", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 5.662099410093843e-08}, "ground_truth": 1}, {"key": "37829390", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, "No": 4.1496610282145606e-07}, "ground_truth": 0}, {"key": "37829390", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 9.434675112612777e-08}, "ground_truth": 0}, {"key": "35716045", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9755161182406162, "res": {"Yes": 0.9755161182406162, "No": 0.024483503861087188}, "ground_truth": 0}, {"key": "35716045", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.1780454313047142e-07}, "ground_truth": 0}, {"key": "35716045", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.0895725374717307e-08}, "ground_truth": 1}, {"key": "35716045", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.012505037681785e-08}, "ground_truth": 0}, {"key": "35716045", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.5963654704959786e-08}, "ground_truth": 0}, {"key": "34367070", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9965466737940982, "res": {"Yes": 0.9965466737940982, "No": 0.003452519584986106}, "ground_truth": 0}, {"key": "34367070", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.7205269425511502, "res": {"Yes": 0.7205269425511502, "No": 0.2794714886494595}, "ground_truth": 0}, {"key": "34367070", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, " Yes": 2.839198093784956e-07}, "ground_truth": 1}, {"key": "34367070", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.4244270332843912e-07}, "ground_truth": 0}, {"key": "34367070", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999982567412194, "res": {"Yes": 0.9999982567412194, "No": 1.3353288057447973e-06}, "ground_truth": 0}, {"key": "35239748", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.47344487187901424, "res": {"No": 0.5265545205330495, "Yes": 0.47344487187901424}, "ground_truth": 0}, {"key": "35239748", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999820452021894, "res": {"Yes": 0.9999820452021894, "No": 1.7454169465603615e-05}, "ground_truth": 0}, {"key": "35239748", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 5.918723164449014e-08}, "ground_truth": 1}, {"key": "35239748", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999847868417213, "res": {"Yes": 0.9999847868417213, "No": 1.4957002098274065e-05}, "ground_truth": 0}, {"key": "35239748", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999973031140366, "res": {"Yes": 0.9999973031140366, "No": 2.229783639154358e-06}, "ground_truth": 0}, {"key": "40421370", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.99999861435166, "res": {"Yes": 0.99999861435166, "No": 8.78000213627342e-07}, "ground_truth": 0}, {"key": "40421370", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997427465353486, "res": {"Yes": 0.9997427465353486, "No": 0.00025654614679578627}, "ground_truth": 0}, {"key": "40421370", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 5.443764057597183e-08}, "ground_truth": 1}, {"key": "40421370", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 9.867245573714983e-08}, "ground_truth": 0}, {"key": "40421370", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 1.1151641914295267e-07}, "ground_truth": 0}, {"key": "37288396", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 1.0821258271944421e-05, "res": {"No": 0.9999888397127765, "Yes": 1.0821258271944421e-05}, "ground_truth": 0}, {"key": "37288396", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.9335054503374988e-07}, "ground_truth": 0}, {"key": "37288396", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.305460321547905e-08}, "ground_truth": 1}, {"key": "37288396", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.9488932570368732e-07}, "ground_truth": 0}, {"key": "37288396", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9861528303853495, "res": {"Yes": 0.9861528303853495, "No": 0.013845984724080829}, "ground_truth": 0}, {"key": "38903688", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.0003957899170426143, "res": {"No": 0.9996032523251821, "Yes": 0.0003957899170426143}, "ground_truth": 0}, {"key": "38903688", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.999852727245697, "res": {"Yes": 0.999852727245697, "No": 0.0001460730089417549}, "ground_truth": 0}, {"key": "38903688", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.921290245679908e-08}, "ground_truth": 1}, {"key": "38903688", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 9.409640454110382e-08}, "ground_truth": 0}, {"key": "38903688", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.049347447054049e-08}, "ground_truth": 0}, {"key": "28071228", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "yes": 3.3448225771353685e-07}, "ground_truth": 0}, {"key": "28071228", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.267377938624899e-07}, "ground_truth": 1}, {"key": "28071228", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 5.965869094627791e-08}, "ground_truth": 0}, {"key": "28071228", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, "yes": 6.98510937372832e-07}, "ground_truth": 0}, {"key": "36855834", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.00019232902129224193, "res": {"No": 0.9998074449011132, "Yes": 0.00019232902129224193}, "ground_truth": 0}, {"key": "36855834", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.8129674488513084e-08}, "ground_truth": 1}, {"key": "36855834", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 6.390227287121135e-09}, "ground_truth": 0}, {"key": "36855834", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.839128990786385e-08}, "ground_truth": 0}, {"key": "40548717", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.00018244979840778913, "res": {"No": 0.9998165026676953, "Yes": 0.00018244979840778913}, "ground_truth": 0}, {"key": "40548717", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997874227277509, "res": {"Yes": 0.9997874227277509, "No": 0.00021137219945738887}, "ground_truth": 0}, {"key": "40548717", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.234349621062986e-08}, "ground_truth": 1}, {"key": "40548717", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 1.0817094901247953e-07}, "ground_truth": 0}, {"key": "40548717", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, " Yes": 1.4561611898454375e-07}, "ground_truth": 0}, {"key": "37051175", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9997699073453761, "res": {"Yes": 0.9997699073453761, "No": 0.0002291174050455646}, "ground_truth": 0}, {"key": "37051175", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999869324773808, "res": {"Yes": 0.9999869324773808, "No": 1.2550463007991026e-05}, "ground_truth": 0}, {"key": "37051175", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, "No": 3.0361420620897257e-07}, "ground_truth": 1}, {"key": "37051175", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "yes": 2.0809357054741798e-07}, "ground_truth": 0}, {"key": "37051175", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 3.7975706435615144e-06, "res": {"No": 0.999995276659155, "Yes": 3.7975706435615144e-06}, "ground_truth": 0}, {"key": "38882119", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 6.664697444750864e-06, "res": {"No": 0.9999926541946805, "Yes": 6.664697444750864e-06}, "ground_truth": 0}, {"key": "38882119", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999947998470209, "res": {"Yes": 0.9999947998470209, "No": 5.056302869132304e-06}, "ground_truth": 0}, {"key": "38882119", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.532272159858953e-08}, "ground_truth": 1}, {"key": "38882119", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.004179172577526176, "res": {"No": 0.9958203113147456, "Yes": 0.004179172577526176}, "ground_truth": 0}, {"key": "38882119", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.5908709776816694e-07}, "ground_truth": 0}, {"key": "19485402", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999789459686392, "res": {"Yes": 0.9999789459686392, "No": 2.0497143971275443e-05}, "ground_truth": 0}, {"key": "19485402", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.6671314821403906e-08}, "ground_truth": 1}, {"key": "19485402", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 7.622866384953276e-05, "res": {"No": 0.9999235189648235, "Yes": 7.622866384953276e-05}, "ground_truth": 0}, {"key": "19485402", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 9.858043726923745e-08}, "ground_truth": 0}, {"key": "36060907", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9921943961845149, "res": {"Yes": 0.9921943961845149, "No": 0.007805333475863892}, "ground_truth": 0}, {"key": "36060907", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999957534720165, "res": {"Yes": 0.9999957534720165, "No": 4.2077941603160845e-06}, "ground_truth": 0}, {"key": "36060907", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 1.817121660468965e-08}, "ground_truth": 1}, {"key": "36060907", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.929037540536311e-08}, "ground_truth": 0}, {"key": "36060907", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 2.4822136030867213e-08}, "ground_truth": 0}, {"key": "24037309", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.021674615409051187, "res": {"No": 0.9783248026835009, "Yes": 0.021674615409051187}, "ground_truth": 0}, {"key": "24037309", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.7404383228243344, "res": {"Yes": 0.7404383228243344, "No": 0.25956062014906406}, "ground_truth": 1}, {"key": "24037309", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 1.6924000187764064e-07}, "ground_truth": 0}, {"key": "35605805", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999182744197171, "res": {"Yes": 0.9999182744197171, "No": 8.142509826391025e-05}, "ground_truth": 0}, {"key": "35605805", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999770387506135, "res": {"Yes": 0.9999770387506135, "No": 2.2447439583789935e-05}, "ground_truth": 0}, {"key": "35605805", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 7.325260694424537e-08}, "ground_truth": 1}, {"key": "35605805", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.249615702457338e-08}, "ground_truth": 0}, {"key": "35605805", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.006408812388628478, "res": {"No": 0.9935909048288518, "Yes": 0.006408812388628478}, "ground_truth": 0}, {"key": "17706248", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.5198560357836977, "res": {"Yes": 0.5198560357836977, "No": 0.4801430973643159}, "ground_truth": 0}, {"key": "17706248", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.7336944486124022e-07}, "ground_truth": 0}, {"key": "17706248", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 5.879852896246078e-08}, "ground_truth": 1}, {"key": "17706248", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 8.485745399822953e-08}, "ground_truth": 0}, {"key": "17706248", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 3.4944505427728127e-07}, "ground_truth": 0}, {"key": "36883559", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.8356922965318011, "res": {"Yes": 0.8356922965318011, "No": 0.16430602235253447}, "ground_truth": 0}, {"key": "36883559", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996728127374306, "res": {"Yes": 0.9996728127374306, "No": 0.00032679810292445596}, "ground_truth": 0}, {"key": "36883559", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.768528342666113e-08}, "ground_truth": 1}, {"key": "36883559", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "yes": 3.0075826535262967e-07}, "ground_truth": 0}, {"key": "36883559", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.6100183202425568e-06, "res": {"No": 0.9999971839107652, "Yes": 1.6100183202425568e-06}, "ground_truth": 0}, {"key": "32799471", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999816875976741, "res": {"Yes": 0.9999816875976741, "No": 1.754844878987786e-05}, "ground_truth": 0}, {"key": "32799471", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.46963732249363e-07}, "ground_truth": 1}, {"key": "32799471", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999978991308068, "res": {"Yes": 0.9999978991308068, "No": 1.3321065454079805e-06}, "ground_truth": 0}, {"key": "32799471", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.99999861435166, "res": {"Yes": 0.99999861435166, "No": 6.310782326990059e-07}, "ground_truth": 0}, {"key": "34797243", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 9.233366752771472e-08}, "ground_truth": 0}, {"key": "34797243", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.349786983528668e-08}, "ground_truth": 1}, {"key": "34797243", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.2576741068756134e-08}, "ground_truth": 0}, {"key": "34797243", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.802471801204115e-08}, "ground_truth": 0}, {"key": "32154876", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 6.542396037544218e-05, "res": {"No": 0.999934246531854, "Yes": 6.542396037544218e-05}, "ground_truth": 0}, {"key": "32154876", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9775085366646332, "res": {"Yes": 0.9775085366646332, "No": 0.022491068593001266}, "ground_truth": 0}, {"key": "32154876", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 1.2576158474438733e-08}, "ground_truth": 1}, {"key": "32154876", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.45557271076999e-08}, "ground_truth": 0}, {"key": "32154876", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 7.4800894012398e-08}, "ground_truth": 0}, {"key": "37962274", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999064742888021, "res": {"Yes": 0.9999064742888021, "No": 9.306103404638062e-05}, "ground_truth": 0}, {"key": "37962274", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 6.398283453194178e-08}, "ground_truth": 0}, {"key": "37962274", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.4114586185981254e-07}, "ground_truth": 1}, {"key": "37962274", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.600152428232045e-08}, "ground_truth": 0}, {"key": "37962274", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.650198876686277e-08}, "ground_truth": 0}, {"key": "35574030", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 3.6989890698926776e-06, "res": {"No": 0.9999949190499081, "Yes": 3.6989890698926776e-06}, "ground_truth": 0}, {"key": "35574030", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 6.790569042015361e-08}, "ground_truth": 0}, {"key": "35574030", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 7.459733904581628e-08}, "ground_truth": 1}, {"key": "35574030", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.5912347454595973, "res": {"Yes": 0.5912347454595973, "No": 0.4087637574365271}, "ground_truth": 0}, {"key": "35574030", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 8.9038141752732e-08}, "ground_truth": 0}, {"key": "39105949", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.809112195891859e-08}, "ground_truth": 0}, {"key": "39105949", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 2.401219525573611e-08}, "ground_truth": 1}, {"key": "39105949", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.001735867865332e-08}, "ground_truth": 0}, {"key": "39105949", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.326140902926625e-08}, "ground_truth": 0}, {"key": "41064322", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.6386036807006192e-05, "res": {"No": 0.9999828796125555, "Yes": 1.6386036807006192e-05}, "ground_truth": 0}, {"key": "41064322", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 6.062356894525764e-07}, "ground_truth": 1}, {"key": "41064322", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999953958625991, "res": {"Yes": 0.9999953958625991, "No": 3.8254593744558344e-06}, "ground_truth": 0}, {"key": "41064322", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.001547969201011396, "res": {"No": 0.9984512251914944, "Yes": 0.001547969201011396}, "ground_truth": 0}, {"key": "28105101", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9805840416150187, "res": {"Yes": 0.9805840416150187, "No": 0.019415122621615228}, "ground_truth": 0}, {"key": "28105101", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.4168833718838142e-07}, "ground_truth": 0}, {"key": "28105101", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "yes": 2.2577543096188717e-07}, "ground_truth": 1}, {"key": "28105101", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "yes": 1.817719422812486e-07}, "ground_truth": 0}, {"key": "28105101", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 3.147528843891267e-07}, "ground_truth": 0}, {"key": "36036068", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 2.7559922964750127e-06, "res": {"No": 0.9999958726752174, "Yes": 2.7559922964750127e-06}, "ground_truth": 0}, {"key": "36036068", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.1097598615334661e-07}, "ground_truth": 0}, {"key": "36036068", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.224192679825117e-08}, "ground_truth": 1}, {"key": "36036068", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "yes": 2.3366403526299392e-07}, "ground_truth": 0}, {"key": "36036068", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999906277489198, "res": {"Yes": 0.9999906277489198, "No": 8.67767432587488e-06}, "ground_truth": 0}, {"key": "37991460", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999264988413425, "res": {"Yes": 0.9999264988413425, "No": 7.279167235738574e-05}, "ground_truth": 0}, {"key": "37991460", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 5.1072992600046704e-08}, "ground_truth": 0}, {"key": "37991460", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.417689155836502e-08}, "ground_truth": 1}, {"key": "37991460", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.332306623573844e-08}, "ground_truth": 0}, {"key": "37991460", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 8.332460110356579e-08}, "ground_truth": 0}, {"key": "38437830", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9998168602121388, "res": {"Yes": 0.9998168602121388, "No": 0.00018298086013846546}, "ground_truth": 0}, {"key": "38437830", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.999832946225036, "res": {"Yes": 0.999832946225036, "No": 0.00016634959335788687}, "ground_truth": 0}, {"key": "38437830", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.02170420324254e-08}, "ground_truth": 1}, {"key": "38437830", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.0959917388571498e-07}, "ground_truth": 0}, {"key": "38437830", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 1.816247666221893e-07}, "ground_truth": 0}, {"key": "36507138", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.019738149362596692, "res": {"No": 0.9802612413849114, "Yes": 0.019738149362596692}, "ground_truth": 0}, {"key": "36507138", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999936078174301, "res": {"Yes": 0.9999936078174301, "No": 5.759597293635413e-06}, "ground_truth": 0}, {"key": "36507138", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.645610145663611e-08}, "ground_truth": 1}, {"key": "36507138", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.149722058275533e-08}, "ground_truth": 0}, {"key": "36507138", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.592154532649139e-07}, "ground_truth": 0}, {"key": "37824866", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.999315201181811, "res": {"Yes": 0.999315201181811, "No": 0.0006836506612328782}, "ground_truth": 0}, {"key": "37824866", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 8.640804949712928e-08}, "ground_truth": 0}, {"key": "37824866", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.0636630420481947e-07}, "ground_truth": 1}, {"key": "37824866", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9454157907916586, "res": {"Yes": 0.9454157907916586, "No": 0.05458302183450133}, "ground_truth": 0}, {"key": "37824866", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 9.27090261169296e-08}, "ground_truth": 0}, {"key": "25088134", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 2.2237044246492328e-05, "res": {"No": 0.9999771579518836, "Yes": 2.2237044246492328e-05}, "ground_truth": 0}, {"key": "25088134", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 8.534678985107472e-08}, "ground_truth": 0}, {"key": "25088134", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 9.823787534204958e-08}, "ground_truth": 1}, {"key": "25088134", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.1790684186513651e-07}, "ground_truth": 0}, {"key": "25088134", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 9.362241552775486e-08}, "ground_truth": 0}, {"key": "40172531", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9834997808300229, "res": {"Yes": 0.9834997808300229, "No": 0.016499057845147116}, "ground_truth": 0}, {"key": "40172531", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 8.60218772867913e-06, "res": {"No": 0.9999905085465441, "Yes": 8.60218772867913e-06}, "ground_truth": 0}, {"key": "40172531", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.674432665277975e-08}, "ground_truth": 1}, {"key": "40172531", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999977799274644, "res": {"Yes": 0.9999977799274644, "No": 1.5519924514057971e-06}, "ground_truth": 0}, {"key": "37035874", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 2.4483140650272646e-07}, "ground_truth": 0}, {"key": "37035874", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 7.498363104230281e-08}, "ground_truth": 1}, {"key": "37035874", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, "No": 3.548679814060501e-07}, "ground_truth": 0}, {"key": "37035874", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.6894400639997934e-07}, "ground_truth": 0}, {"key": "36404465", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999303130782463, "res": {"Yes": 0.9999303130782463, "No": 6.922524152002566e-05}, "ground_truth": 0}, {"key": "36404465", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 2.7781872619187707e-07}, "ground_truth": 0}, {"key": "36404465", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.1801766448757083e-08}, "ground_truth": 1}, {"key": "36404465", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.227084909235847e-08}, "ground_truth": 0}, {"key": "36404465", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.04994021444933811, "res": {"No": 0.9500591948123205, "Yes": 0.04994021444933811}, "ground_truth": 0}, {"key": "39602052", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 3.991381713117414e-05, "res": {"No": 0.9999582051834277, "Yes": 3.991381713117414e-05}, "ground_truth": 0}, {"key": "39602052", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 3.385561091948659e-07}, "ground_truth": 0}, {"key": "39602052", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.25906112991378e-08}, "ground_truth": 1}, {"key": "39602052", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 9.815342708720988e-08}, "ground_truth": 0}, {"key": "39602052", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999983759447187, "res": {"Yes": 0.9999983759447187, "No": 1.088695900028055e-06}, "ground_truth": 0}, {"key": "33792789", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.12652008610254334, "res": {"No": 0.8734791405519795, "Yes": 0.12652008610254334}, "ground_truth": 0}, {"key": "33792789", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999814491960682, "res": {"Yes": 0.9999814491960682, "No": 1.691584163913808e-05}, "ground_truth": 0}, {"key": "33792789", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.687937724996621e-08}, "ground_truth": 1}, {"key": "33792789", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "yes": 1.76449521260944e-07}, "ground_truth": 0}, {"key": "33792789", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999984951481323, "res": {"Yes": 0.9999984951481323, "No": 6.871769560114963e-07}, "ground_truth": 0}, {"key": "32776626", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 8.463768003706204e-05, "res": {"No": 0.9999146986083706, "Yes": 8.463768003706204e-05}, "ground_truth": 0}, {"key": "32776626", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, " Yes": 1.2867101003123403e-07}, "ground_truth": 0}, {"key": "32776626", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 9.981403821941366e-08}, "ground_truth": 1}, {"key": "32776626", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "No": 4.1800518508041035e-07}, "ground_truth": 0}, {"key": "32776626", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.3374698412802936e-07}, "ground_truth": 0}, {"key": "37195090", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 2.891459980032096e-06, "res": {"No": 0.9999951574563252, "Yes": 2.891459980032096e-06}, "ground_truth": 0}, {"key": "37195090", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "yes": 2.464348940132584e-07}, "ground_truth": 0}, {"key": "37195090", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.816063997994309e-08}, "ground_truth": 1}, {"key": "37195090", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.2456489647767668e-07}, "ground_truth": 0}, {"key": "37195090", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.604054271198157e-08}, "ground_truth": 0}, {"key": "33981824", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.7445664770335542, "res": {"Yes": 0.7445664770335542, "No": 0.2554326767872106}, "ground_truth": 0}, {"key": "33981824", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.805089199496617e-08}, "ground_truth": 0}, {"key": "33981824", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 8.154571937991443e-08}, "ground_truth": 1}, {"key": "33981824", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 8.897779435270394e-08}, "ground_truth": 0}, {"key": "33981824", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.925877396840169e-08}, "ground_truth": 0}, {"key": "39569142", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.00013315103134452615, "res": {"No": 0.9998664337507658, "Yes": 0.00013315103134452615}, "ground_truth": 0}, {"key": "39569142", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9756954014081716, "res": {"Yes": 0.9756954014081716, "No": 0.024304020248368845}, "ground_truth": 0}, {"key": "39569142", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999964686909351, "res": {"Yes": 0.9999964686909351, "No": 3.3092350433904104e-06}, "ground_truth": 1}, {"key": "39569142", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 5.49970058110987e-07}, "ground_truth": 0}, {"key": "39569142", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.00023324993427874764, "res": {"No": 0.9997660937103428, "Yes": 0.00023324993427874764}, "ground_truth": 0}, {"key": "40268210", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.0021912897095981007, "res": {"No": 0.9978084833867417, "Yes": 0.0021912897095981007}, "ground_truth": 0}, {"key": "40268210", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999852636485811, "res": {"Yes": 0.9999852636485811, "No": 1.4429944952230674e-05}, "ground_truth": 0}, {"key": "40268210", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.919047909028792e-08}, "ground_truth": 1}, {"key": "40268210", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.3669550171670897e-07}, "ground_truth": 0}, {"key": "40268210", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.0536932018853606e-08}, "ground_truth": 0}, {"key": "34925159", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 3.350422837309487e-06, "res": {"No": 0.9999957534720165, "Yes": 3.350422837309487e-06}, "ground_truth": 0}, {"key": "34925159", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 9.156927933107324e-08}, "ground_truth": 1}, {"key": "34925159", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.959467300726199e-07}, "ground_truth": 0}, {"key": "34925159", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9840410847566835, "res": {"Yes": 0.9840410847566835, "No": 0.01595854271042495}, "ground_truth": 0}, {"key": "36181903", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.4094216028700165, "res": {"No": 0.5905777700408432, "Yes": 0.4094216028700165}, "ground_truth": 0}, {"key": "36181903", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, "No": 5.960561278918493e-07}, "ground_truth": 0}, {"key": "36181903", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 3.1983934950825334e-07}, "ground_truth": 1}, {"key": "36181903", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.0049023137080188, "res": {"No": 0.9950973563820632, "Yes": 0.0049023137080188}, "ground_truth": 0}, {"key": "36181903", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.0012453881880919986, "res": {"No": 0.9987542332124968, "Yes": 0.0012453881880919986}, "ground_truth": 0}, {"key": "38620559", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999932502087799, "res": {"Yes": 0.9999932502087799, "No": 6.572114076381023e-06}, "ground_truth": 0}, {"key": "38620559", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.7810717839473466e-08}, "ground_truth": 0}, {"key": "38620559", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 1.0242110930801501e-07}, "ground_truth": 1}, {"key": "38620559", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 9.538008813844123e-08}, "ground_truth": 0}, {"key": "38620559", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999188703911848, "res": {"Yes": 0.9999188703911848, "No": 8.095290436329986e-05}, "ground_truth": 0}, {"key": "32719657", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.07532823105726771, "res": {"No": 0.9246715829247871, "Yes": 0.07532823105726771}, "ground_truth": 0}, {"key": "32719657", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 9.9764723706585e-06, "res": {"No": 0.9999896741293122, "Yes": 9.9764723706585e-06}, "ground_truth": 0}, {"key": "32719657", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.1718423298067906e-08}, "ground_truth": 1}, {"key": "32719657", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9988304590467317, "res": {"Yes": 0.9988304590467317, "No": 0.0011692961411035566}, "ground_truth": 0}, {"key": "32719657", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.0008736885464449129, "res": {"No": 0.999126080730072, "Yes": 0.0008736885464449129}, "ground_truth": 0}, {"key": "37530914", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 8.49417593007048e-08}, "ground_truth": 0}, {"key": "37530914", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 2.2181571620114487e-08}, "ground_truth": 1}, {"key": "37530914", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 3.614131428370108e-06, "res": {"No": 0.9999959918780326, "Yes": 3.614131428370108e-06}, "ground_truth": 0}, {"key": "37530914", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 2.679734557617656e-08}, "ground_truth": 0}, {"key": "33306933", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.1981088404647515, "res": {"No": 0.8018903188553842, "Yes": 0.1981088404647515}, "ground_truth": 0}, {"key": "33306933", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.3519266872268484, "res": {"No": 0.6480729965353345, "Yes": 0.3519266872268484}, "ground_truth": 0}, {"key": "33306933", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 2.9141487232217377e-08}, "ground_truth": 1}, {"key": "33306933", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 2.6150746683153625e-07}, "ground_truth": 0}, {"key": "33306933", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998743001608869, "res": {"Yes": 0.9998743001608869, "No": 0.0001252910659351209}, "ground_truth": 0}, {"key": "33837212", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 7.421763845469279e-08}, "ground_truth": 0}, {"key": "33837212", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.21986037350539311, "res": {"No": 0.7801389766324518, "Yes": 0.21986037350539311}, "ground_truth": 0}, {"key": "33837212", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 2.141035629428466e-07}, "ground_truth": 1}, {"key": "33837212", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 5.4984967898859874e-08}, "ground_truth": 0}, {"key": "33837212", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9923616502016851, "res": {"Yes": 0.9923616502016851, "No": 0.007637336975364714}, "ground_truth": 0}, {"key": "40945179", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.004461951734681201, "res": {"No": 0.995536788862488, "Yes": 0.004461951734681201}, "ground_truth": 0}, {"key": "40945179", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.6689332518137815e-07}, "ground_truth": 0}, {"key": "40945179", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.4744553883322384e-08}, "ground_truth": 1}, {"key": "40945179", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.329234532060009e-08}, "ground_truth": 0}, {"key": "40945179", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 1.2154734248225092e-07}, "ground_truth": 0}, {"key": "34152358", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.0027174536370435133, "res": {"No": 0.9972820400595924, "Yes": 0.0027174536370435133}, "ground_truth": 0}, {"key": "34152358", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998697710105311, "res": {"Yes": 0.9998697710105311, "No": 0.0001294882389328931}, "ground_truth": 0}, {"key": "34152358", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 1.891493528927441e-08}, "ground_truth": 1}, {"key": "34152358", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "\"Yes": 2.5223367256421263e-08}, "ground_truth": 0}, {"key": "34152358", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.399496691520498e-08}, "ground_truth": 0}, {"key": "34136541", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9998619046160455, "res": {"Yes": 0.9998619046160455, "No": 0.00013772811463175964}, "ground_truth": 0}, {"key": "34136541", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999967070975216, "res": {"Yes": 0.9999967070975216, "No": 2.8140410826515113e-06}, "ground_truth": 0}, {"key": "34136541", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 2.8493210674458124e-08}, "ground_truth": 1}, {"key": "34136541", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 2.397201825124319e-07}, "ground_truth": 0}, {"key": "34136541", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 1.7734692401981977e-07}, "ground_truth": 0}, {"key": "37469603", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999963494876631, "res": {"Yes": 0.9999963494876631, "No": 1.7378080156927671e-06}, "ground_truth": 0}, {"key": "37469603", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 2.850420852601701e-07}, "ground_truth": 1}, {"key": "37469603", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "yes": 4.528060268341168e-07}, "ground_truth": 0}, {"key": "37469603", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.0004947328688266193, "res": {"No": 0.9995038056851366, "Yes": 0.0004947328688266193}, "ground_truth": 0}, {"key": "37353611", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.2539377806605626, "res": {"No": 0.7460599948209914, "Yes": 0.2539377806605626}, "ground_truth": 0}, {"key": "37353611", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.999996945503965, "res": {"Yes": 0.999996945503965, "No": 1.9025147391252046e-06}, "ground_truth": 0}, {"key": "37353611", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.6014278714916417e-07}, "ground_truth": 1}, {"key": "37353611", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999911045595646, "res": {"Yes": 0.9999911045595646, "No": 8.148729764296693e-06}, "ground_truth": 0}, {"key": "37353611", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 2.1124902502456892e-05, "res": {"No": 0.9999757275415809, "Yes": 2.1124902502456892e-05}, "ground_truth": 0}, {"key": "37211649", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999905085465441, "res": {"Yes": 0.9999905085465441, "No": 8.5465146619614e-06}, "ground_truth": 0}, {"key": "37211649", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.3193771775138919, "res": {"No": 0.6806216753557195, "Yes": 0.3193771775138919}, "ground_truth": 0}, {"key": "37211649", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 9.082402443930486e-08}, "ground_truth": 1}, {"key": "37211649", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 6.31541041350256e-08}, "ground_truth": 0}, {"key": "37211649", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999970647075079, "res": {"Yes": 0.9999970647075079, "No": 2.5370641294401734e-06}, "ground_truth": 0}, {"key": "37320976", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 3.23638397988157e-07}, "ground_truth": 0}, {"key": "37320976", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 3.809182697207559e-07}, "ground_truth": 0}, {"key": "37320976", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.1698695149922794e-08}, "ground_truth": 1}, {"key": "37320976", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 2.2000402396749114e-07}, "ground_truth": 0}, {"key": "37320976", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 9.921029974209787e-08}, "ground_truth": 0}, {"key": "34492412", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.0014753323389905574, "res": {"No": 0.9985243183904592, "Yes": 0.0014753323389905574}, "ground_truth": 0}, {"key": "34492412", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 2.130481402973081e-05, "res": {"No": 0.9999779923581718, "Yes": 2.130481402973081e-05}, "ground_truth": 0}, {"key": "34492412", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.119369019261203e-08}, "ground_truth": 1}, {"key": "34492412", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.16906860107444e-08}, "ground_truth": 0}, {"key": "34492412", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.999984906043415, "res": {"Yes": 0.999984906043415, "No": 1.4647556913374141e-05}, "ground_truth": 0}, {"key": "36655016", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "yes": 2.6223494457313116e-07}, "ground_truth": 0}, {"key": "36655016", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.4171582739225959e-07}, "ground_truth": 0}, {"key": "36655016", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 2.824332775273655e-07}, "ground_truth": 1}, {"key": "36655016", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 8.018543075382557e-08}, "ground_truth": 0}, {"key": "36655016", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999947998470209, "res": {"Yes": 0.9999947998470209, "No": 4.330762216142093e-06}, "ground_truth": 0}, {"key": "35220773", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9988424698554493, "res": {"Yes": 0.9988424698554493, "No": 0.0011571120386258866}, "ground_truth": 0}, {"key": "35220773", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 9.048071589366647e-07, "res": {"No": 0.9999988527586581, "Yes": 9.048071589366647e-07}, "ground_truth": 0}, {"key": "35220773", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.573791070098424e-08}, "ground_truth": 1}, {"key": "35220773", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.0336111597486122e-07}, "ground_truth": 0}, {"key": "35220773", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999934886141991, "res": {"Yes": 0.9999934886141991, "No": 5.953215882274182e-06}, "ground_truth": 0}, {"key": "31569808", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 8.754601649842462e-05, "res": {"No": 0.9999121955450369, "Yes": 8.754601649842462e-05}, "ground_truth": 0}, {"key": "31569808", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.770874792521703e-08}, "ground_truth": 0}, {"key": "31569808", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.168149757745427e-08}, "ground_truth": 1}, {"key": "31569808", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.675477744927434e-08}, "ground_truth": 0}, {"key": "31569808", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999958726752174, "res": {"Yes": 0.9999958726752174, "No": 3.559695679645077e-06}, "ground_truth": 0}, {"key": "37696256", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.0062005951475355035, "res": {"No": 0.9937985538371228, "Yes": 0.0062005951475355035}, "ground_truth": 0}, {"key": "37696256", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 8.40281479157089e-08}, "ground_truth": 0}, {"key": "37696256", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.126470902957569e-07}, "ground_truth": 1}, {"key": "37696256", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.267866222196417e-08}, "ground_truth": 0}, {"key": "37696256", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999108844260287, "res": {"Yes": 0.9999108844260287, "No": 8.859279714409576e-05}, "ground_truth": 0}, {"key": "36874328", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.00023562735620570182, "res": {"No": 0.9997626376149095, "Yes": 0.00023562735620570182}, "ground_truth": 0}, {"key": "36874328", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "yes": 2.4370557502179447e-07}, "ground_truth": 1}, {"key": "36874328", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999470005685195, "res": {"Yes": 0.9999470005685195, "No": 5.169962654380816e-05}, "ground_truth": 0}, {"key": "36874328", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 4.348080824441898e-06, "res": {"No": 0.9999939654258081, "Yes": 4.348080824441898e-06}, "ground_truth": 0}, {"key": "24532377", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "yes": 2.277093048948441e-07}, "ground_truth": 0}, {"key": "24532377", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9994549795776181, "res": {"Yes": 0.9994549795776181, "No": 0.0005442335953818908}, "ground_truth": 0}, {"key": "24532377", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 9.094289999093375e-08}, "ground_truth": 1}, {"key": "24532377", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 2.0788287849399556e-07}, "ground_truth": 0}, {"key": "24532377", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9972449336803245, "res": {"Yes": 0.9972449336803245, "No": 0.0027542648888109283}, "ground_truth": 0}, {"key": "39560618", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.00014492183932723993, "res": {"No": 0.9998545150339643, "Yes": 0.00014492183932723993}, "ground_truth": 0}, {"key": "39560618", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999983759447187, "res": {"Yes": 0.9999983759447187, "No": 6.857971175321418e-07}, "ground_truth": 0}, {"key": "39560618", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 2.221169037960555e-07}, "ground_truth": 1}, {"key": "39560618", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.1549275918369376e-07}, "ground_truth": 0}, {"key": "39560618", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "yes": 4.0132510598229467e-07}, "ground_truth": 0}, {"key": "34922693", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.06944404212491227, "res": {"No": 0.930555638956684, "Yes": 0.06944404212491227}, "ground_truth": 0}, {"key": "34922693", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "\"Yes": 4.3837685059525076e-08}, "ground_truth": 1}, {"key": "34922693", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999413983091462, "res": {"Yes": 0.9999413983091462, "No": 5.803844048315081e-05}, "ground_truth": 0}, {"key": "34922693", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9945803507785274, "res": {"Yes": 0.9945803507785274, "No": 0.005419100698951241}, "ground_truth": 0}, {"key": "33629577", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 5.860926230776915e-07, "res": {"No": 0.999999091165773, "Yes": 5.860926230776915e-07}, "ground_truth": 0}, {"key": "33629577", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 2.4943257818506607e-05, "res": {"No": 0.9999747739361825, "Yes": 2.4943257818506607e-05}, "ground_truth": 0}, {"key": "33629577", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.9071989802917136e-08}, "ground_truth": 1}, {"key": "33629577", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.287400593713527e-08}, "ground_truth": 0}, {"key": "33629577", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998528464381476, "res": {"Yes": 0.9998528464381476, "No": 0.00014645270710794824}, "ground_truth": 0}, {"key": "32284359", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9985980158992374, "res": {"Yes": 0.9985980158992374, "No": 0.0014018632981633548}, "ground_truth": 0}, {"key": "32284359", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.003604130272301e-07}, "ground_truth": 0}, {"key": "32284359", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.0702350268919867e-08}, "ground_truth": 1}, {"key": "32284359", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 9.84664456601791e-08}, "ground_truth": 0}, {"key": "32284359", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 1.4473403792156749e-07}, "ground_truth": 0}, {"key": "28082962", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.01257421487918945, "res": {"No": 0.987425406284244, "Yes": 0.01257421487918945}, "ground_truth": 0}, {"key": "28082962", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9359009507054159, "res": {"Yes": 0.9359009507054159, "No": 0.06409833438669889}, "ground_truth": 0}, {"key": "28082962", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.332574953805938e-08}, "ground_truth": 1}, {"key": "28082962", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.0989999731149825e-07}, "ground_truth": 0}, {"key": "28082962", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.99999861435166, "res": {"Yes": 0.99999861435166, "No": 6.82969933209363e-07}, "ground_truth": 0}, {"key": "24796803", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.003695660446347626, "res": {"No": 0.996304157500155, "Yes": 0.003695660446347626}, "ground_truth": 0}, {"key": "24796803", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 7.1791697592113e-08}, "ground_truth": 0}, {"key": "24796803", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 2.4471589610394643e-08}, "ground_truth": 1}, {"key": "24796803", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.4417182781652172e-07}, "ground_truth": 0}, {"key": "24796803", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.181474421675283e-08}, "ground_truth": 0}, {"key": "35466150", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999863364673469, "res": {"Yes": 0.9999863364673469, "No": 1.2556689581705617e-05}, "ground_truth": 0}, {"key": "35466150", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.0797004601545917e-07}, "ground_truth": 1}, {"key": "35466150", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 7.978949362444308e-08}, "ground_truth": 0}, {"key": "35466150", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 7.482034477333037e-08}, "ground_truth": 0}, {"key": "35754289", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 5.800172456318331e-07}, "ground_truth": 0}, {"key": "35754289", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.379447298106555e-08}, "ground_truth": 0}, {"key": "35754289", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "yes": 3.1450495681252233e-07}, "ground_truth": 1}, {"key": "35754289", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 2.7686328736844993e-07}, "ground_truth": 0}, {"key": "35754289", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999946806438478, "res": {"Yes": 0.9999946806438478, "No": 4.651699133757767e-06}, "ground_truth": 0}, {"key": "36678662", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.9263415824704864e-07}, "ground_truth": 1}, {"key": "36678662", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.008918632957685087, "res": {"No": 0.991078869546562, "Yes": 0.008918632957685087}, "ground_truth": 0}, {"key": "36678662", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.08356688916204286, "res": {"No": 0.916431853153902, "Yes": 0.08356688916204286}, "ground_truth": 0}, {"key": "35399671", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.010545300168239133, "res": {"No": 0.9894544097779459, "Yes": 0.010545300168239133}, "ground_truth": 0}, {"key": "35399671", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.00047171621151042767, "res": {"No": 0.9995279703410699, "Yes": 0.00047171621151042767}, "ground_truth": 0}, {"key": "35399671", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.085367404021632e-08}, "ground_truth": 1}, {"key": "35399671", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9995668021572923, "res": {"Yes": 0.9995668021572923, "No": 0.000433051456430675}, "ground_truth": 0}, {"key": "35399671", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.5173746217541261, "res": {"Yes": 0.5173746217541261, "No": 0.4826252868712929}, "ground_truth": 0}, {"key": "36888180", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999913429644723, "res": {"Yes": 0.9999913429644723, "No": 8.321267258552503e-06}, "ground_truth": 0}, {"key": "36888180", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9537090739238431, "res": {"Yes": 0.9537090739238431, "No": 0.04629000573713471}, "ground_truth": 0}, {"key": "36888180", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.8980512312322184e-08}, "ground_truth": 1}, {"key": "36888180", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.775314729101085, "res": {"Yes": 0.775314729101085, "No": 0.22468465346709646}, "ground_truth": 0}, {"key": "36888180", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.0946258889432094e-07}, "ground_truth": 0}, {"key": "28061069", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 1.0083331802114995e-07}, "ground_truth": 0}, {"key": "28061069", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 2.6850588638935004e-07}, "ground_truth": 0}, {"key": "28061069", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.4959695365448536e-08}, "ground_truth": 1}, {"key": "28061069", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.5167004753745594e-08}, "ground_truth": 0}, {"key": "28061069", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.1935041583540797e-07}, "ground_truth": 0}, {"key": "22259982", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.00015217779754765303, "res": {"No": 0.9998472483677188, "Yes": 0.00015217779754765303}, "ground_truth": 0}, {"key": "22259982", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, " Yes": 9.963144592968788e-08}, "ground_truth": 0}, {"key": "22259982", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.492056242517647e-08}, "ground_truth": 1}, {"key": "22259982", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 8.260744725595278e-08}, "ground_truth": 0}, {"key": "22259982", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.715634647630933e-08}, "ground_truth": 0}, {"key": "34026805", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9997668087733017, "res": {"Yes": 0.9997668087733017, "No": 0.0002326340360287995}, "ground_truth": 0}, {"key": "34026805", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 4.8705626532552424e-05, "res": {"No": 0.9999506956924973, "Yes": 4.8705626532552424e-05}, "ground_truth": 0}, {"key": "34026805", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.246668290917001e-08}, "ground_truth": 1}, {"key": "34026805", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999960112362533, "res": {"Yes": 0.999960112362533, "No": 3.9428486314302274e-05}, "ground_truth": 0}, {"key": "34026805", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.00013591185599922748, "res": {"No": 0.9998636924207224, "Yes": 0.00013591185599922748}, "ground_truth": 0}, {"key": "36713809", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 7.775919004052048e-07, "res": {"No": 0.9999984951481323, "Yes": 7.775919004052048e-07}, "ground_truth": 0}, {"key": "36713809", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 2.248545907471971e-08}, "ground_truth": 0}, {"key": "36713809", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.2977719788051405e-08}, "ground_truth": 1}, {"key": "36713809", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 2.4287864855463753e-08}, "ground_truth": 0}, {"key": "36713809", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 3.6794881431019124e-07}, "ground_truth": 0}, {"key": "39726411", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 2.0811207234619122e-06, "res": {"No": 0.9999975415208221, "Yes": 2.0811207234619122e-06}, "ground_truth": 0}, {"key": "39726411", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.01957648848292492, "res": {"No": 0.9804226733143214, "Yes": 0.01957648848292492}, "ground_truth": 0}, {"key": "39726411", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 8.977852794261427e-08}, "ground_truth": 1}, {"key": "39726411", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 2.0342916015777205e-07}, "ground_truth": 0}, {"key": "39726411", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, " Yes": 9.306971508978606e-08}, "ground_truth": 0}, {"key": "37069841", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999983759447187, "res": {"Yes": 0.9999983759447187, "No": 9.785068985572277e-07}, "ground_truth": 0}, {"key": "37069841", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, " Yes": 2.408889013484078e-07}, "ground_truth": 1}, {"key": "37069841", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 2.0164683727099416e-07}, "ground_truth": 0}, {"key": "37069841", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 4.637287282896169e-06, "res": {"No": 0.9999911045595646, "Yes": 4.637287282896169e-06}, "ground_truth": 0}, {"key": "38894693", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9837229445422545, "res": {"Yes": 0.9837229445422545, "No": 0.016275733870739446}, "ground_truth": 0}, {"key": "38894693", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "yes": 3.241563097466408e-07}, "ground_truth": 0}, {"key": "38894693", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 8.403344185578503e-08}, "ground_truth": 1}, {"key": "38894693", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.8828595358557512e-07}, "ground_truth": 0}, {"key": "38894693", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 2.282788337838989e-07}, "ground_truth": 0}, {"key": "33946032", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999884821053314, "res": {"Yes": 0.9999884821053314, "No": 1.0939691069378565e-05}, "ground_truth": 0}, {"key": "33946032", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9819921613416587, "res": {"Yes": 0.9819921613416587, "No": 0.01800641132108306}, "ground_truth": 0}, {"key": "33946032", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 2.2935908608738148e-07}, "ground_truth": 1}, {"key": "33946032", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.093655284557852e-06, "res": {"No": 0.9999983759447187, "Yes": 1.093655284557852e-06}, "ground_truth": 0}, {"key": "33946032", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.26254005975591593, "res": {"No": 0.7374586943482793, "Yes": 0.26254005975591593}, "ground_truth": 0}, {"key": "39035311", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 1.5441559725171644e-05, "res": {"No": 0.9999840716318578, "Yes": 1.5441559725171644e-05}, "ground_truth": 0}, {"key": "39035311", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.5829867630951293e-08}, "ground_truth": 0}, {"key": "39035311", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.274292589691955e-08}, "ground_truth": 1}, {"key": "39035311", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 7.4945324195073e-08}, "ground_truth": 0}, {"key": "39035311", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 1.414434282723402e-07}, "ground_truth": 0}, {"key": "27680038", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9583277907507074, "res": {"Yes": 0.9583277907507074, "No": 0.041670718128123715}, "ground_truth": 0}, {"key": "27680038", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.975118657879452, "res": {"Yes": 0.975118657879452, "No": 0.024880184389128885}, "ground_truth": 0}, {"key": "27680038", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 5.793854251475766e-08}, "ground_truth": 1}, {"key": "27680038", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.7527024974602446e-08}, "ground_truth": 0}, {"key": "27680038", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.7614646508530605e-07}, "ground_truth": 0}, {"key": "36901907", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.558584117757145e-08}, "ground_truth": 0}, {"key": "36901907", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 7.263434175712497e-08}, "ground_truth": 1}, {"key": "36901907", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.2008860399474702e-07}, "ground_truth": 0}, {"key": "36901907", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 7.566152968978728e-08}, "ground_truth": 0}, {"key": "21530542", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999464045822857, "res": {"Yes": 0.9999464045822857, "No": 5.334683004873071e-05}, "ground_truth": 0}, {"key": "21530542", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 2.433750908407208e-07}, "ground_truth": 0}, {"key": "21530542", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.243418103571628e-08}, "ground_truth": 1}, {"key": "21530542", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.416720439664831e-07}, "ground_truth": 0}, {"key": "21530542", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0746531571104187e-05, "res": {"No": 0.9999888397127765, "Yes": 1.0746531571104187e-05}, "ground_truth": 0}, {"key": "38192532", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 5.636699228182899e-07}, "ground_truth": 0}, {"key": "38192532", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 1.6323615074870563e-08}, "ground_truth": 1}, {"key": "38192532", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 1.9872360306380238e-08}, "ground_truth": 0}, {"key": "38192532", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.689104493651139e-08}, "ground_truth": 0}, {"key": "34102400", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 7.297105363376021e-08}, "ground_truth": 0}, {"key": "34102400", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9967307913380707, "res": {"Yes": 0.9967307913380707, "No": 0.0032685114885083956}, "ground_truth": 0}, {"key": "34102400", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 8.16933690951752e-08}, "ground_truth": 1}, {"key": "34102400", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 8.529559713651433e-08}, "ground_truth": 0}, {"key": "34102400", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.407357358751738e-08}, "ground_truth": 0}, {"key": "36133399", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 4.007779971599861e-05, "res": {"No": 0.9999592779711644, "Yes": 4.007779971599861e-05}, "ground_truth": 0}, {"key": "36133399", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.9164655897811035e-07}, "ground_truth": 1}, {"key": "36133399", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.1272416726283683e-07}, "ground_truth": 0}, {"key": "36133399", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9560270579846917, "res": {"Yes": 0.9560270579846917, "No": 0.04397208404224774}, "ground_truth": 0}, {"key": "34314544", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.3860689554794422e-07}, "ground_truth": 0}, {"key": "34314544", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "\"Yes": 1.5087789041983157e-07}, "ground_truth": 1}, {"key": "34314544", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999970647075079, "res": {"Yes": 0.9999970647075079, "No": 2.395472645039148e-06}, "ground_truth": 0}, {"key": "34314544", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999888397127765, "res": {"Yes": 0.9999888397127765, "No": 1.0610356558631336e-05}, "ground_truth": 0}, {"key": "33460074", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.3578377678393368e-06, "res": {"No": 0.9999977799274644, "Yes": 1.3578377678393368e-06}, "ground_truth": 0}, {"key": "33460074", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.3796359374998357e-07}, "ground_truth": 1}, {"key": "33460074", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.7330600325494322e-07}, "ground_truth": 0}, {"key": "33460074", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9920332858261143, "res": {"Yes": 0.9920332858261143, "No": 0.007965034533289159}, "ground_truth": 0}, {"key": "36191495", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.016850322211877145, "res": {"No": 0.9831490349173995, "Yes": 0.016850322211877145}, "ground_truth": 0}, {"key": "36191495", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.8092096244300069, "res": {"Yes": 0.8092096244300069, "No": 0.19078695353594893}, "ground_truth": 1}, {"key": "36191495", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.5829299966424157e-06, "res": {"No": 0.9999973031140366, "Yes": 1.5829299966424157e-06}, "ground_truth": 0}, {"key": "36191495", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999828796125555, "res": {"Yes": 0.9999828796125555, "No": 1.60828350608474e-05}, "ground_truth": 0}, {"key": "39532668", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.00018304216899527195, "res": {"No": 0.9998163834895752, "Yes": 0.00018304216899527195}, "ground_truth": 0}, {"key": "39532668", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.7033044372281978e-07}, "ground_truth": 1}, {"key": "39532668", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.231485478553495e-08}, "ground_truth": 0}, {"key": "39532668", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9967740276666814, "res": {"Yes": 0.9967740276666814, "No": 0.003224945268132881}, "ground_truth": 0}, {"key": "20328247", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.0, "res": {"Yes": 1.0, "\"Yes": 2.1999854439419645e-08}, "ground_truth": 0}, {"key": "20328247", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 8.710469944073436e-08}, "ground_truth": 1}, {"key": "20328247", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999965878943212, "res": {"Yes": 0.9999965878943212, "No": 2.8836433824042726e-06}, "ground_truth": 0}, {"key": "20328247", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "yes": 2.8534524609824027e-07}, "ground_truth": 0}, {"key": "39112675", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 5.4927428764462875e-08}, "ground_truth": 0}, {"key": "39112675", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 8.071616273514421e-08}, "ground_truth": 0}, {"key": "39112675", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 2.6360477840278057e-08}, "ground_truth": 1}, {"key": "39112675", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.7330466894045e-08}, "ground_truth": 0}, {"key": "39112675", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.6936399984365021e-06, "res": {"No": 0.999996945503965, "Yes": 1.6936399984365021e-06}, "ground_truth": 0}, {"key": "31620300", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 2.92952284421748e-06, "res": {"No": 0.9999964686909351, "Yes": 2.92952284421748e-06}, "ground_truth": 0}, {"key": "31620300", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999669067235946, "res": {"Yes": 0.9999669067235946, "No": 3.237691140987782e-05}, "ground_truth": 1}, {"key": "31620300", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, " Yes": 1.3484268962374574e-07}, "ground_truth": 0}, {"key": "37518509", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999982567412194, "res": {"Yes": 0.9999982567412194, "No": 1.106640177482774e-06}, "ground_truth": 0}, {"key": "37518509", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 5.975171163106937e-08}, "ground_truth": 1}, {"key": "37518509", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.574076342367257e-07}, "ground_truth": 0}, {"key": "37518509", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.4857529056747355e-07}, "ground_truth": 0}, {"key": "35454095", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999884821053314, "res": {"Yes": 0.9999884821053314, "No": 1.0816671031000931e-05}, "ground_truth": 0}, {"key": "35454095", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.809153050687591e-08}, "ground_truth": 1}, {"key": "35454095", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.225083066344203e-07}, "ground_truth": 0}, {"key": "35454095", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.8465588654633258, "res": {"Yes": 0.8465588654633258, "No": 0.15343936596375501}, "ground_truth": 0}, {"key": "38542788", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9990379088067898, "res": {"Yes": 0.9990379088067898, "No": 0.0009615887437525729}, "ground_truth": 0}, {"key": "38542788", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998879996225106, "res": {"Yes": 0.9998879996225106, "No": 0.00011094444249677292}, "ground_truth": 0}, {"key": "38542788", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "yes": 5.299930201580899e-07}, "ground_truth": 1}, {"key": "38542788", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.008397330678529614, "res": {"No": 0.9916022091098651, "Yes": 0.008397330678529614}, "ground_truth": 0}, {"key": "23944937", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 1.2061162237982341e-05, "res": {"No": 0.9999870516788303, "Yes": 1.2061162237982341e-05}, "ground_truth": 0}, {"key": "23944937", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999064742888021, "res": {"Yes": 0.9999064742888021, "No": 9.306987526456623e-05}, "ground_truth": 0}, {"key": "23944937", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.926475107305622e-08}, "ground_truth": 1}, {"key": "23944937", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 1.3126219834787233e-07}, "ground_truth": 0}, {"key": "23944937", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.125852825330364e-08}, "ground_truth": 0}, {"key": "31753944", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999971839107652, "res": {"Yes": 0.9999971839107652, "No": 2.0889709116919085e-06}, "ground_truth": 0}, {"key": "31753944", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "yes": 1.9458592454511574e-07}, "ground_truth": 1}, {"key": "31753944", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999987335551019, "res": {"Yes": 0.9999987335551019, "No": 1.0134542628456324e-06}, "ground_truth": 0}, {"key": "31753944", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 1.7599715619882904e-07}, "ground_truth": 0}, {"key": "35527214", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9913011627393606, "res": {"Yes": 0.9913011627393606, "No": 0.008697679613389383}, "ground_truth": 0}, {"key": "35527214", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.99999861435166, "res": {"Yes": 0.99999861435166, "yes": 4.115262614632041e-07}, "ground_truth": 1}, {"key": "35527214", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998844239234294, "res": {"Yes": 0.9998844239234294, "No": 0.0001145291708693252}, "ground_truth": 0}, {"key": "35527214", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999597547668612, "res": {"Yes": 0.9999597547668612, "No": 3.9003701737217026e-05}, "ground_truth": 0}, {"key": "40400404", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 3.6338640472998782e-06, "res": {"No": 0.9999955150656573, "Yes": 3.6338640472998782e-06}, "ground_truth": 0}, {"key": "40400404", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.0275389333044596e-06, "res": {"No": 0.99999861435166, "Yes": 1.0275389333044596e-06}, "ground_truth": 0}, {"key": "40400404", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.709896350364368e-08}, "ground_truth": 1}, {"key": "40400404", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.257373662764737e-07}, "ground_truth": 0}, {"key": "40400404", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.025296126442751983, "res": {"No": 0.9747031703523628, "Yes": 0.025296126442751983}, "ground_truth": 0}, {"key": "21713119", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.939706457303494e-07}, "ground_truth": 0}, {"key": "21713119", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.734604260637348e-08}, "ground_truth": 1}, {"key": "21713119", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.3467768352476784e-08}, "ground_truth": 0}, {"key": "21713119", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "yes": 4.1722007356852187e-07}, "ground_truth": 0}, {"key": "28730678", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9928591900101615, "res": {"Yes": 0.9928591900101615, "No": 0.007139650167353307}, "ground_truth": 0}, {"key": "28730678", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9753934977186858, "res": {"Yes": 0.9753934977186858, "No": 0.024605039482794203}, "ground_truth": 0}, {"key": "28730678", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.4076837891701145e-07}, "ground_truth": 1}, {"key": "28730678", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 9.230818695195833e-08}, "ground_truth": 0}, {"key": "28730678", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999976607241361, "res": {"Yes": 0.9999976607241361, "No": 1.8045785206706255e-06}, "ground_truth": 0}, {"key": "36823733", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 4.940395876687094e-05, "res": {"No": 0.999950338100193, "Yes": 4.940395876687094e-05}, "ground_truth": 0}, {"key": "36823733", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.453506413323315e-08}, "ground_truth": 1}, {"key": "36823733", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 1.1989529700430004e-07}, "ground_truth": 0}, {"key": "36823733", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999647611309035, "res": {"Yes": 0.9999647611309035, "No": 3.478892392020901e-05}, "ground_truth": 0}, {"key": "35988862", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.001486616717911604, "res": {"No": 0.9985125529475837, "Yes": 0.001486616717911604}, "ground_truth": 0}, {"key": "35988862", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.00020217538392712883, "res": {"No": 0.9997971954076322, "Yes": 0.00020217538392712883}, "ground_truth": 0}, {"key": "35988862", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.834530272403544e-08}, "ground_truth": 1}, {"key": "35988862", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.995889938517486, "res": {"Yes": 0.995889938517486, "No": 0.004109476626774488}, "ground_truth": 0}, {"key": "40499665", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9990427841936136, "res": {"Yes": 0.9990427841936136, "No": 0.0009567638095023948}, "ground_truth": 0}, {"key": "40499665", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 2.8995964089047102e-06, "res": {"No": 0.9999967070975216, "Yes": 2.8995964089047102e-06}, "ground_truth": 0}, {"key": "40499665", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999961110815618, "res": {"Yes": 0.9999961110815618, "No": 3.479378054922649e-06}, "ground_truth": 1}, {"key": "40499665", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9854128426760275, "res": {"Yes": 0.9854128426760275, "No": 0.014586628880488332}, "ground_truth": 0}, {"key": "32829820", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999506956924973, "res": {"Yes": 0.9999506956924973, "No": 4.857381297269387e-05}, "ground_truth": 0}, {"key": "32829820", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 5.3009536797818044e-08}, "ground_truth": 1}, {"key": "32829820", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 9.821931013807708e-08}, "ground_truth": 0}, {"key": "32829820", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.0832249439811457e-07}, "ground_truth": 0}, {"key": "20583553", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "yes": 3.2740267552680234e-07}, "ground_truth": 0}, {"key": "20583553", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.929008601022372e-08}, "ground_truth": 1}, {"key": "20583553", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.224951341560344e-08}, "ground_truth": 0}, {"key": "20583553", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.72962044069388e-08}, "ground_truth": 0}, {"key": "30501550", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.19214048150354798, "res": {"No": 0.8078591782454616, "Yes": 0.19214048150354798}, "ground_truth": 0}, {"key": "30501550", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.2887044437950346, "res": {"No": 0.7112950900856554, "Yes": 0.2887044437950346}, "ground_truth": 0}, {"key": "30501550", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 8.361181492394555e-08}, "ground_truth": 1}, {"key": "30501550", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.735302268478364e-08}, "ground_truth": 0}, {"key": "30501550", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997459641725666, "res": {"Yes": 0.9997459641725666, "No": 0.00025352629816952077}, "ground_truth": 0}, {"key": "38755897", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.00845354837264834, "res": {"No": 0.9915457102172327, "Yes": 0.00845354837264834}, "ground_truth": 0}, {"key": "38755897", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, " Yes": 1.2609667024427823e-07}, "ground_truth": 0}, {"key": "38755897", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.1739305595056446e-08}, "ground_truth": 1}, {"key": "38755897", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999983759447187, "res": {"Yes": 0.9999983759447187, "YES": 7.789897103158997e-07}, "ground_truth": 0}, {"key": "38755897", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999951574563252, "res": {"Yes": 0.9999951574563252, "No": 4.419398310443405e-06}, "ground_truth": 0}, {"key": "35507201", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.04220640467104277, "res": {"No": 0.9577923003472695, "Yes": 0.04220640467104277}, "ground_truth": 0}, {"key": "35507201", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 5.980885045697709e-06, "res": {"No": 0.9999931310055916, "Yes": 5.980885045697709e-06}, "ground_truth": 1}, {"key": "35507201", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.99999861435166, "res": {"Yes": 0.99999861435166, "No": 6.997560938349899e-07}, "ground_truth": 0}, {"key": "36453511", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.2648488639101244e-08}, "ground_truth": 0}, {"key": "36453511", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.430319679870166e-08}, "ground_truth": 1}, {"key": "36453511", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.313059657236888e-08}, "ground_truth": 0}, {"key": "36453511", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 6.925281796995204e-08}, "ground_truth": 0}, {"key": "38066835", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.00012579537236831725, "res": {"No": 0.9998737042159841, "Yes": 0.00012579537236831725}, "ground_truth": 0}, {"key": "38066835", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.020103004449256633, "res": {"No": 0.9798964256648419, "Yes": 0.020103004449256633}, "ground_truth": 0}, {"key": "38066835", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.246184166596453e-08}, "ground_truth": 1}, {"key": "38066835", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999847868417213, "res": {"Yes": 0.9999847868417213, "No": 1.455370438272742e-05}, "ground_truth": 0}, {"key": "38066835", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9991341721754097, "res": {"Yes": 0.9991341721754097, "No": 0.0008652249733940357}, "ground_truth": 0}, {"key": "39697181", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.998843065265623, "res": {"Yes": 0.998843065265623, "No": 0.0011563272043928023}, "ground_truth": 0}, {"key": "39697181", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.1079435628350094e-07}, "ground_truth": 0}, {"key": "39697181", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.2283682356742554e-07}, "ground_truth": 1}, {"key": "39697181", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 9.666765292631815e-08}, "ground_truth": 0}, {"key": "39697181", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.018714499148867823, "res": {"No": 0.9812847858167384, "Yes": 0.018714499148867823}, "ground_truth": 0}, {"key": "21820893", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 4.90950268712283e-07, "res": {"No": 0.9999989719621284, "Yes": 4.90950268712283e-07}, "ground_truth": 0}, {"key": "21820893", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.0017102255783570629, "res": {"No": 0.9982885648781727, "Yes": 0.0017102255783570629}, "ground_truth": 0}, {"key": "21820893", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.5421069812210067e-07}, "ground_truth": 1}, {"key": "21820893", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999319818133418, "res": {"Yes": 0.9999319818133418, "No": 6.732768761826818e-05}, "ground_truth": 0}, {"key": "21820893", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999977799274644, "res": {"Yes": 0.9999977799274644, "yes": 1.0102982234018673e-06}, "ground_truth": 0}, {"key": "40519933", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 1.0168337685550328e-07, "res": {"No": 0.9999995679800934, "Yes": 1.0168337685550328e-07}, "ground_truth": 0}, {"key": "40519933", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999249493064443, "res": {"Yes": 0.9999249493064443, "No": 7.420878637826526e-05}, "ground_truth": 0}, {"key": "40519933", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999714363229496, "res": {"Yes": 0.9999714363229496, "No": 2.7986424630062848e-05}, "ground_truth": 1}, {"key": "40519933", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.06774292857137967, "res": {"No": 0.9322558917670274, "Yes": 0.06774292857137967}, "ground_truth": 0}, {"key": "40519933", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9910648211031547, "res": {"Yes": 0.9910648211031547, "No": 0.008933625280629102}, "ground_truth": 0}, {"key": "30446033", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9836085274949671, "res": {"Yes": 0.9836085274949671, "No": 0.016390779956808123}, "ground_truth": 0}, {"key": "30446033", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 2.990673613899102e-07}, "ground_truth": 0}, {"key": "30446033", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 2.3639329683320917e-08}, "ground_truth": 1}, {"key": "30446033", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.1056199061039836e-08}, "ground_truth": 0}, {"key": "30446033", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.014168043616277874, "res": {"No": 0.9858316966519294, "Yes": 0.014168043616277874}, "ground_truth": 0}, {"key": "40216291", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, " Yes": 2.2665858103160876e-07}, "ground_truth": 1}, {"key": "40216291", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 7.99657611921724e-05, "res": {"No": 0.9999194663630074, "Yes": 7.99657611921724e-05}, "ground_truth": 0}, {"key": "33479118", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.21063668535166571, "res": {"No": 0.7893623036978127, "Yes": 0.21063668535166571}, "ground_truth": 0}, {"key": "33479118", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 1.1768586692345378e-08}, "ground_truth": 0}, {"key": "33479118", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.4322730687679126e-08}, "ground_truth": 1}, {"key": "33479118", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.7195136024185785e-08}, "ground_truth": 0}, {"key": "33479118", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.394830196333671e-08}, "ground_truth": 0}, {"key": "22297373", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 4.8264657206955615e-05, "res": {"No": 0.9999511724841019, "Yes": 4.8264657206955615e-05}, "ground_truth": 0}, {"key": "22297373", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 7.31774883119686e-08}, "ground_truth": 1}, {"key": "22297373", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.4406273104120388e-07}, "ground_truth": 0}, {"key": "22297373", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 7.284724037167113e-06, "res": {"No": 0.9999920581810364, "Yes": 7.284724037167113e-06}, "ground_truth": 0}, {"key": "36463668", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999983759447187, "res": {"Yes": 0.9999983759447187, "No": 1.4240964721113867e-06}, "ground_truth": 0}, {"key": "36463668", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999851444463448, "res": {"Yes": 0.9999851444463448, "No": 1.4516726814187973e-05}, "ground_truth": 0}, {"key": "36463668", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.963453377590573e-08}, "ground_truth": 1}, {"key": "36463668", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.300004402445533e-07}, "ground_truth": 0}, {"key": "36463668", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.1181266499930935e-07}, "ground_truth": 0}, {"key": "35264615", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.3080970728132068e-05, "res": {"No": 0.9999863364673469, "Yes": 1.3080970728132068e-05}, "ground_truth": 0}, {"key": "35264615", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "yes": 2.303513695648507e-07}, "ground_truth": 1}, {"key": "35264615", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9976709496564137, "res": {"Yes": 0.9976709496564137, "No": 0.002327903969915614}, "ground_truth": 0}, {"key": "35264615", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.005714268422446244, "res": {"No": 0.994285044842302, "Yes": 0.005714268422446244}, "ground_truth": 0}, {"key": "39898482", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 3.762198390882742e-05, "res": {"No": 0.9999620195462757, "Yes": 3.762198390882742e-05}, "ground_truth": 0}, {"key": "39898482", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.988406314776989, "res": {"Yes": 0.988406314776989, "No": 0.011592936369604488}, "ground_truth": 0}, {"key": "39898482", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9973312372957991, "res": {"Yes": 0.9973312372957991, "No": 0.002668239733047888}, "ground_truth": 1}, {"key": "39898482", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.992287459750994, "res": {"Yes": 0.992287459750994, "No": 0.007711708659052279}, "ground_truth": 0}, {"key": "39898482", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.10531003133916872, "res": {"No": 0.8946896729209572, "Yes": 0.10531003133916872}, "ground_truth": 0}, {"key": "37228721", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 2.0419610619140096e-07}, "ground_truth": 0}, {"key": "37228721", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.917145656430092e-08}, "ground_truth": 0}, {"key": "37228721", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999992773397112, "res": {"Yes": 0.999992773397112, "No": 6.978601635598142e-06}, "ground_truth": 1}, {"key": "37228721", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.635583947733992e-08}, "ground_truth": 0}, {"key": "37228721", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.8102729788647235e-07}, "ground_truth": 0}, {"key": "24535799", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.017092269392559496, "res": {"No": 0.9829063135895274, "Yes": 0.017092269392559496}, "ground_truth": 0}, {"key": "24535799", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9890593876324589, "res": {"Yes": 0.9890593876324589, "No": 0.010939735058160214}, "ground_truth": 0}, {"key": "24535799", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.247789642222036e-08}, "ground_truth": 1}, {"key": "24535799", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 1.1809499428914696e-07}, "ground_truth": 0}, {"key": "24535799", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 1.197779769460718e-07}, "ground_truth": 0}, {"key": "35177759", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9859991326945701, "res": {"Yes": 0.9859991326945701, "No": 0.014000343695355032}, "ground_truth": 0}, {"key": "35177759", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.401754152914115e-08}, "ground_truth": 0}, {"key": "35177759", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.881370094003418e-08}, "ground_truth": 1}, {"key": "35177759", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 8.154498547174271e-08}, "ground_truth": 0}, {"key": "35177759", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 9.97546665315018e-08}, "ground_truth": 0}, {"key": "34364829", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 2.9770844972013577e-05, "res": {"No": 0.9999669067235946, "Yes": 2.9770844972013577e-05}, "ground_truth": 0}, {"key": "34364829", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.0016962271801887752, "res": {"No": 0.9983018599741527, "Yes": 0.0016962271801887752}, "ground_truth": 0}, {"key": "34364829", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999994561441089, "res": {"Yes": 0.999994561441089, "No": 4.559393261406089e-06}, "ground_truth": 1}, {"key": "34364829", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.1753231733854282e-07}, "ground_truth": 0}, {"key": "34364829", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999958726752174, "res": {"Yes": 0.9999958726752174, "No": 3.603056661938077e-06}, "ground_truth": 0}, {"key": "38090732", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 5.1706569183152134e-08}, "ground_truth": 0}, {"key": "38090732", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.142742055066689e-08}, "ground_truth": 0}, {"key": "38090732", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 9.64282141738341e-08}, "ground_truth": 1}, {"key": "38090732", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 8.720422982852994e-08}, "ground_truth": 0}, {"key": "38090732", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, "yes": 5.737448796320858e-07}, "ground_truth": 0}, {"key": "30651479", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 9.274191779037783e-05, "res": {"No": 0.999906712677533, "Yes": 9.274191779037783e-05}, "ground_truth": 0}, {"key": "30651479", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.99999861435166, "res": {"Yes": 0.99999861435166, "No": 7.997289333563668e-07}, "ground_truth": 0}, {"key": "30651479", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.5311499637031445e-08}, "ground_truth": 1}, {"key": "30651479", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "yes": 3.496703442066443e-07}, "ground_truth": 0}, {"key": "30651479", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9302236759304299, "res": {"Yes": 0.9302236759304299, "No": 0.06977315793359741}, "ground_truth": 0}, {"key": "39380921", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999640459343629, "res": {"Yes": 0.9999640459343629, "No": 3.534569341707706e-05}, "ground_truth": 0}, {"key": "39380921", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996965194595382, "res": {"Yes": 0.9996965194595382, "No": 0.0003026701376773129}, "ground_truth": 1}, {"key": "39380921", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999568939990904, "res": {"Yes": 0.9999568939990904, "No": 4.1766081299673495e-05}, "ground_truth": 0}, {"key": "39380921", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9171912481178764, "res": {"Yes": 0.9171912481178764, "No": 0.08280819078162795}, "ground_truth": 0}, {"key": "39037490", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 3.089686279125548e-06, "res": {"No": 0.9999962302846054, "Yes": 3.089686279125548e-06}, "ground_truth": 0}, {"key": "39037490", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 5.892778145151584e-06, "res": {"No": 0.9999936078174301, "Yes": 5.892778145151584e-06}, "ground_truth": 0}, {"key": "39037490", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 2.4008689731138273e-08}, "ground_truth": 1}, {"key": "39037490", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.4198749539168986e-08}, "ground_truth": 0}, {"key": "39037490", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.897599083514462e-08}, "ground_truth": 0}, {"key": "35917499", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 3.421864749793892e-07}, "ground_truth": 0}, {"key": "35917499", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.99999861435166, "res": {"Yes": 0.99999861435166, "No": 7.929275352532037e-07}, "ground_truth": 0}, {"key": "35917499", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.0303552554294482e-07}, "ground_truth": 1}, {"key": "35917499", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 7.209565918113807e-08}, "ground_truth": 0}, {"key": "35917499", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999933694113825, "res": {"Yes": 0.9999933694113825, "No": 5.66520203724124e-06}, "ground_truth": 0}, {"key": "34908073", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 5.237114841625467e-08}, "ground_truth": 0}, {"key": "34908073", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.821713891018983e-08}, "ground_truth": 1}, {"key": "34908073", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 7.682451822654739e-08}, "ground_truth": 0}, {"key": "34908073", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999981375378344, "res": {"Yes": 0.9999981375378344, "No": 1.5670683142121966e-06}, "ground_truth": 0}, {"key": "36344759", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.7676320670435727, "res": {"Yes": 0.7676320670435727, "No": 0.23236710703860652}, "ground_truth": 0}, {"key": "36344759", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.1773198881297843, "res": {"No": 0.8226790500526749, "Yes": 0.1773198881297843}, "ground_truth": 0}, {"key": "36344759", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.533624563469924e-08}, "ground_truth": 1}, {"key": "36344759", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.047134941395052e-08}, "ground_truth": 0}, {"key": "36344759", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 4.001801564936468e-07}, "ground_truth": 0}, {"key": "39984637", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.879186404062009, "res": {"Yes": 0.879186404062009, "No": 0.12081256306578299}, "ground_truth": 0}, {"key": "39984637", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9610183648968588, "res": {"Yes": 0.9610183648968588, "No": 0.03898000720750878}, "ground_truth": 0}, {"key": "39984637", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 8.991824215321104e-08}, "ground_truth": 1}, {"key": "39984637", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, "No": 6.874566939554024e-07}, "ground_truth": 0}, {"key": "39984637", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "yes": 4.2448974046626217e-07}, "ground_truth": 0}, {"key": "17917326", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9995875274901848, "res": {"Yes": 0.9995875274901848, "No": 0.00041091531949014986}, "ground_truth": 0}, {"key": "17917326", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.2493527921641886e-07}, "ground_truth": 0}, {"key": "17917326", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.389894136219204e-08}, "ground_truth": 1}, {"key": "17917326", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.990998685960378e-08}, "ground_truth": 0}, {"key": "17917326", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 2.2077075091506338e-07}, "ground_truth": 0}, {"key": "32193638", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.012588280712760247, "res": {"No": 0.9874109909659626, "Yes": 0.012588280712760247}, "ground_truth": 0}, {"key": "32193638", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 9.342077435022976e-07, "res": {"No": 0.99999861435166, "Yes": 9.342077435022976e-07}, "ground_truth": 0}, {"key": "32193638", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.020084687650872e-07}, "ground_truth": 1}, {"key": "32193638", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999877668918251, "res": {"Yes": 0.9999877668918251, "No": 1.1309895584709914e-05}, "ground_truth": 0}, {"key": "34564692", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.7779827556684267, "res": {"Yes": 0.7779827556684267, "No": 0.22201578411948436}, "ground_truth": 0}, {"key": "34564692", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.13365956118146913, "res": {"No": 0.8663390277686341, "Yes": 0.13365956118146913}, "ground_truth": 0}, {"key": "34564692", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.1505874648695412e-07}, "ground_truth": 1}, {"key": "34564692", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.3219725775347038, "res": {"No": 0.6780268025760641, "Yes": 0.3219725775347038}, "ground_truth": 0}, {"key": "34564692", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 5.007453778016069e-06, "res": {"No": 0.9999928926002577, "Yes": 5.007453778016069e-06}, "ground_truth": 0}, {"key": "39329284", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 1.6013364437896763e-06, "res": {"No": 0.999996945503965, "Yes": 1.6013364437896763e-06}, "ground_truth": 0}, {"key": "39329284", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.6687964048979345e-07}, "ground_truth": 0}, {"key": "39329284", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.123332699231015e-08}, "ground_truth": 1}, {"key": "39329284", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.086402053395013e-08}, "ground_truth": 0}, {"key": "39329284", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.4728502440055426e-05, "res": {"No": 0.9999839524287637, "Yes": 1.4728502440055426e-05}, "ground_truth": 0}, {"key": "37438541", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995714490542004, "res": {"Yes": 0.9995714490542004, "No": 0.0004277046374613659}, "ground_truth": 0}, {"key": "37438541", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.2838888690761662e-07}, "ground_truth": 1}, {"key": "37438541", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.780385922074567e-07}, "ground_truth": 0}, {"key": "34652757", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 2.57776194157919e-06, "res": {"No": 0.9999961110815618, "Yes": 2.57776194157919e-06}, "ground_truth": 0}, {"key": "34652757", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.999984906043415, "res": {"Yes": 0.999984906043415, "No": 1.3900121178332243e-05}, "ground_truth": 0}, {"key": "34652757", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.6868628063496156e-08}, "ground_truth": 1}, {"key": "34652757", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.053367591951426e-08}, "ground_truth": 0}, {"key": "34652757", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.5442581773881994e-07}, "ground_truth": 0}, {"key": "31361004", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999115995815473, "res": {"Yes": 0.9999115995815473, "No": 8.714780262744931e-05}, "ground_truth": 0}, {"key": "31361004", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.6648675374314612, "res": {"Yes": 0.6648675374314612, "No": 0.33513102641470244}, "ground_truth": 0}, {"key": "31361004", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.441760220560626e-08}, "ground_truth": 1}, {"key": "31361004", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.5137312827077063e-07}, "ground_truth": 0}, {"key": "31361004", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.0088722335933782, "res": {"No": 0.9911267690502579, "Yes": 0.0088722335933782}, "ground_truth": 0}, {"key": "26150727", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 4.6931756536324264e-05, "res": {"No": 0.9999523644646081, "Yes": 4.6931756536324264e-05}, "ground_truth": 0}, {"key": "26150727", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.0002584376738629209, "res": {"No": 0.999741197265207, "Yes": 0.0002584376738629209}, "ground_truth": 0}, {"key": "26150727", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 2.0001151300984492e-08}, "ground_truth": 1}, {"key": "26150727", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 2.6283457370233055e-07}, "ground_truth": 0}, {"key": "26150727", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 8.784499465282168e-08}, "ground_truth": 0}, {"key": "36997402", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 1.4389598957138188e-06, "res": {"No": 0.9999977799274644, "Yes": 1.4389598957138188e-06}, "ground_truth": 0}, {"key": "36997402", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 2.336309741411338e-07}, "ground_truth": 0}, {"key": "36997402", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 2.811993467759431e-07}, "ground_truth": 1}, {"key": "36997402", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 9.050135626493055e-08}, "ground_truth": 0}, {"key": "36997402", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.7343464402918335e-07}, "ground_truth": 0}, {"key": "37430643", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.471462356832766e-08}, "ground_truth": 0}, {"key": "37430643", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.306357396749166e-07}, "ground_truth": 1}, {"key": "37430643", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.714665377719179e-08}, "ground_truth": 0}, {"key": "37430643", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.409605624189338e-08}, "ground_truth": 0}, {"key": "36964631", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 7.272626205027485e-05, "res": {"No": 0.9999266180325883, "Yes": 7.272626205027485e-05}, "ground_truth": 0}, {"key": "36964631", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.975977953769475e-08}, "ground_truth": 0}, {"key": "36964631", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 5.56946357920512e-08}, "ground_truth": 1}, {"key": "36964631", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 9.796280217240283e-08}, "ground_truth": 0}, {"key": "36964631", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997547830705872, "res": {"Yes": 0.9997547830705872, "No": 0.0002448870436648436}, "ground_truth": 0}, {"key": "35502013", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.7140090797405144, "res": {"Yes": 0.7140090797405144, "No": 0.2859896950800671}, "ground_truth": 0}, {"key": "35502013", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.366036284485998e-08}, "ground_truth": 0}, {"key": "35502013", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 8.126739063946486e-08}, "ground_truth": 1}, {"key": "35502013", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.071372818225357e-08}, "ground_truth": 0}, {"key": "35502013", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.6690157476786053, "res": {"Yes": 0.6690157476786053, "No": 0.33098345160696296}, "ground_truth": 0}, {"key": "33987664", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.8792967225008091, "res": {"Yes": 0.8792967225008091, "No": 0.12070244430580276}, "ground_truth": 0}, {"key": "33987664", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9976672740438718, "res": {"Yes": 0.9976672740438718, "No": 0.0023312452782261036}, "ground_truth": 0}, {"key": "33987664", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "yes": 4.052150188428589e-07}, "ground_truth": 1}, {"key": "33987664", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 5.953181270306764e-08}, "ground_truth": 0}, {"key": "33987664", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 7.030286542647597e-05, "res": {"No": 0.9999290019304823, "Yes": 7.030286542647597e-05}, "ground_truth": 0}, {"key": "35203721", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 3.2903224133821075e-06, "res": {"No": 0.9999962302846054, "Yes": 3.2903224133821075e-06}, "ground_truth": 0}, {"key": "35203721", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9752745284265867, "res": {"Yes": 0.9752745284265867, "No": 0.024724911026588945}, "ground_truth": 0}, {"key": "35203721", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 7.059178916427368e-08}, "ground_truth": 1}, {"key": "35203721", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999928926002577, "res": {"Yes": 0.9999928926002577, "No": 6.777962617937962e-06}, "ground_truth": 0}, {"key": "35203721", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 9.91536668311802e-08}, "ground_truth": 0}, {"key": "39028348", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 1.6956388253911192e-06, "res": {"No": 0.9999978991308068, "Yes": 1.6956388253911192e-06}, "ground_truth": 0}, {"key": "39028348", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.013842149917457488, "res": {"No": 0.9861575777365019, "Yes": 0.013842149917457488}, "ground_truth": 0}, {"key": "39028348", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.431076575192687e-08}, "ground_truth": 1}, {"key": "39028348", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.1271255727153365e-07}, "ground_truth": 0}, {"key": "39028348", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999975415208221, "res": {"Yes": 0.9999975415208221, "No": 2.0874966187149133e-06}, "ground_truth": 0}, {"key": "37459383", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 1.2131418419593511e-06, "res": {"No": 0.9999984951481323, "Yes": 1.2131418419593511e-06}, "ground_truth": 0}, {"key": "37459383", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.804508480993423e-08}, "ground_truth": 0}, {"key": "37459383", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.188379340448862e-08}, "ground_truth": 1}, {"key": "37459383", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 7.012783823382216e-08}, "ground_truth": 0}, {"key": "37459383", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 1.2218604436048005e-07}, "ground_truth": 0}, {"key": "34020070", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "No": 4.5545723977942497e-07}, "ground_truth": 0}, {"key": "34020070", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.15227683124989e-08}, "ground_truth": 0}, {"key": "34020070", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 8.301761688877697e-08}, "ground_truth": 1}, {"key": "34020070", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.768721216127296e-08}, "ground_truth": 0}, {"key": "34020070", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.665956001475138e-08}, "ground_truth": 0}, {"key": "35176615", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.00020231189837972434, "res": {"No": 0.999797314573467, "Yes": 0.00020231189837972434}, "ground_truth": 0}, {"key": "35176615", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.218614446626058e-08}, "ground_truth": 0}, {"key": "35176615", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.193338486046312e-08}, "ground_truth": 1}, {"key": "35176615", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999919389784903, "res": {"Yes": 0.9999919389784903, "No": 7.5199805984440165e-06}, "ground_truth": 0}, {"key": "35176615", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9993021051419807, "res": {"Yes": 0.9993021051419807, "No": 0.0006967945178959889}, "ground_truth": 0}, {"key": "33296389", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.0022191225699203444, "res": {"No": 0.9977806019140587, "Yes": 0.0022191225699203444}, "ground_truth": 0}, {"key": "33296389", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999870516788303, "res": {"Yes": 0.9999870516788303, "No": 1.2612729200021385e-05}, "ground_truth": 0}, {"key": "33296389", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 2.9372293643839025e-08}, "ground_truth": 1}, {"key": "33296389", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.756606344473814e-08}, "ground_truth": 0}, {"key": "33296389", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.46288821052453394, "res": {"No": 0.5371113232995445, "Yes": 0.46288821052453394}, "ground_truth": 0}, {"key": "35399504", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999946806438478, "res": {"Yes": 0.9999946806438478, "No": 4.2279428799210145e-06}, "ground_truth": 0}, {"key": "35399504", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 7.211404591843858e-08}, "ground_truth": 0}, {"key": "35399504", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.1401748763608563e-07}, "ground_truth": 1}, {"key": "35399504", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 2.3592449961281034e-07}, "ground_truth": 0}, {"key": "35399504", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 7.05948246764709e-08}, "ground_truth": 0}, {"key": "34807886", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9540163493473868, "res": {"Yes": 0.9540163493473868, "No": 0.04598331802028588}, "ground_truth": 0}, {"key": "34807886", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 5.032342849690138e-07}, "ground_truth": 0}, {"key": "34807886", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.575132883833647e-08}, "ground_truth": 1}, {"key": "34807886", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.3257830048624765e-08}, "ground_truth": 0}, {"key": "34807886", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.824189512282989e-08}, "ground_truth": 0}, {"key": "37629813", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999934886141991, "res": {"Yes": 0.9999934886141991, "No": 6.32913488616828e-06}, "ground_truth": 0}, {"key": "37629813", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.2744342051292516e-08}, "ground_truth": 0}, {"key": "37629813", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.985370601359921e-08}, "ground_truth": 1}, {"key": "37629813", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.164442754412627e-08}, "ground_truth": 0}, {"key": "37629813", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.7011842921460542e-07}, "ground_truth": 0}, {"key": "28084389", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 5.968566276975849e-08}, "ground_truth": 0}, {"key": "28084389", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 2.4915393789994924e-08}, "ground_truth": 1}, {"key": "28084389", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.736434263792819e-08}, "ground_truth": 0}, {"key": "28084389", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.6988405618743725e-08}, "ground_truth": 0}, {"key": "35391734", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 1.1771514151251496e-05, "res": {"No": 0.999988005296937, "Yes": 1.1771514151251496e-05}, "ground_truth": 0}, {"key": "35391734", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.703087121379564e-06, "res": {"No": 0.9999980183344636, "Yes": 1.703087121379564e-06}, "ground_truth": 0}, {"key": "35391734", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.4020512575628266e-08}, "ground_truth": 1}, {"key": "35391734", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 2.9184269166863504e-08}, "ground_truth": 0}, {"key": "35391734", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.757944484320546e-08}, "ground_truth": 0}, {"key": "40214591", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.04069844067178884, "res": {"No": 0.9593010659346872, "Yes": 0.04069844067178884}, "ground_truth": 0}, {"key": "40214591", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.025317872746165388, "res": {"No": 0.9746816442705482, "Yes": 0.025317872746165388}, "ground_truth": 0}, {"key": "40214591", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.896149446304638e-08}, "ground_truth": 1}, {"key": "40214591", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 2.0115441298786312e-07}, "ground_truth": 0}, {"key": "40214591", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.2318432821320698e-07}, "ground_truth": 0}, {"key": "26283171", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 8.098561594395698e-05, "res": {"No": 0.9999185128062618, "Yes": 8.098561594395698e-05}, "ground_truth": 0}, {"key": "26283171", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999647611309035, "res": {"Yes": 0.9999647611309035, "No": 3.4666110184154874e-05}, "ground_truth": 0}, {"key": "26283171", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.9009533330224775e-08}, "ground_truth": 1}, {"key": "26283171", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 6.839480264001489e-08}, "ground_truth": 0}, {"key": "26283171", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.215568235447212e-07}, "ground_truth": 0}, {"key": "37084030", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.99999861435166, "res": {"Yes": 0.99999861435166, "No": 9.056553605649938e-07}, "ground_truth": 0}, {"key": "37084030", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.051126756921342914, "res": {"No": 0.9488729823572388, "Yes": 0.051126756921342914}, "ground_truth": 0}, {"key": "37084030", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.000889497694401e-08}, "ground_truth": 1}, {"key": "37084030", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 2.021748338050463e-07}, "ground_truth": 0}, {"key": "37084030", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9956117011451903, "res": {"Yes": 0.9956117011451903, "No": 0.004387931269403723}, "ground_truth": 0}, {"key": "39027295", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.928139670979541, "res": {"Yes": 0.928139670979541, "No": 0.07185960457683344}, "ground_truth": 0}, {"key": "39027295", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 7.361227148372208e-07}, "ground_truth": 0}, {"key": "39027295", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.0196053604983805e-07}, "ground_truth": 1}, {"key": "39027295", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999545100307, "res": {"Yes": 0.9999545100307, "No": 4.5124690502730586e-05}, "ground_truth": 0}, {"key": "39027295", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999950382530095, "res": {"Yes": 0.9999950382530095, "No": 4.535042966594729e-06}, "ground_truth": 0}, {"key": "14018647", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9996357605513003, "res": {"Yes": 0.9996357605513003, "No": 0.0003639808505829487}, "ground_truth": 0}, {"key": "14018647", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 8.796454512144163e-08}, "ground_truth": 0}, {"key": "14018647", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.364138236172817e-08}, "ground_truth": 1}, {"key": "14018647", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.2812294835768673e-07}, "ground_truth": 0}, {"key": "14018647", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 2.543384676945895e-08}, "ground_truth": 0}, {"key": "37424289", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 1.6864852860916407e-07, "res": {"No": 0.9999995679800934, "Yes": 1.6864852860916407e-07}, "ground_truth": 0}, {"key": "37424289", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 7.762385866416693e-08}, "ground_truth": 1}, {"key": "37424289", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.866096097054848, "res": {"Yes": 0.866096097054848, "No": 0.13390270832695245}, "ground_truth": 0}, {"key": "37424289", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.924377357887329e-05, "res": {"No": 0.9999801379802525, "Yes": 1.924377357887329e-05}, "ground_truth": 0}, {"key": "37498031", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 1.1212111275859521e-05, "res": {"No": 0.9999883629027115, "Yes": 1.1212111275859521e-05}, "ground_truth": 0}, {"key": "37498031", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999781115595554, "res": {"Yes": 0.9999781115595554, "No": 2.1337787414725968e-05}, "ground_truth": 0}, {"key": "37498031", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.0447103301460012e-07}, "ground_truth": 1}, {"key": "37498031", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.3449695209530256e-08}, "ground_truth": 0}, {"key": "30104095", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 5.25864062987403e-06, "res": {"No": 0.9999940846288958, "Yes": 5.25864062987403e-06}, "ground_truth": 0}, {"key": "30104095", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9995503629670425, "res": {"Yes": 0.9995503629670425, "No": 0.0004490957685416131}, "ground_truth": 0}, {"key": "30104095", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 1.0363590435793725e-07}, "ground_truth": 1}, {"key": "30104095", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 8.343574685902525e-08}, "ground_truth": 0}, {"key": "30104095", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.1883949925112721e-07}, "ground_truth": 0}, {"key": "37911407", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9907519378213056, "res": {"Yes": 0.9907519378213056, "No": 0.009246446161849622}, "ground_truth": 0}, {"key": "37911407", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.999996945503965, "res": {"Yes": 0.999996945503965, "No": 2.229484868164813e-06}, "ground_truth": 0}, {"key": "37911407", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 9.993028940496529e-08}, "ground_truth": 1}, {"key": "37911407", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9994703413187088, "res": {"Yes": 0.9994703413187088, "No": 0.000528607324223495}, "ground_truth": 0}, {"key": "37911407", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.6402302871272588e-07}, "ground_truth": 0}, {"key": "39177472", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.00016046800196083396, "res": {"No": 0.9998386670555593, "Yes": 0.00016046800196083396}, "ground_truth": 0}, {"key": "39177472", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.4034838003135065, "res": {"No": 0.5965151736284335, "Yes": 0.4034838003135065}, "ground_truth": 0}, {"key": "39177472", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 7.46190500302737e-08}, "ground_truth": 1}, {"key": "39177472", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.381505229784505e-07}, "ground_truth": 0}, {"key": "39177472", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.0009595477291316742, "res": {"No": 0.9990398069306679, "Yes": 0.0009595477291316742}, "ground_truth": 0}, {"key": "32325454", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9978370940593431, "res": {"Yes": 0.9978370940593431, "No": 0.0021622896404118372}, "ground_truth": 0}, {"key": "32325454", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9129468543443471, "res": {"Yes": 0.9129468543443471, "No": 0.08705216868346294}, "ground_truth": 0}, {"key": "32325454", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.2180078947003946e-07}, "ground_truth": 1}, {"key": "32325454", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "yes": 3.6186450165884177e-07}, "ground_truth": 0}, {"key": "32325454", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "yes": 4.1247097387959503e-07}, "ground_truth": 0}, {"key": "38395319", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.00023183306330999397, "res": {"No": 0.9997678813437048, "Yes": 0.00023183306330999397}, "ground_truth": 0}, {"key": "38395319", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999410407211666, "res": {"Yes": 0.9999410407211666, "No": 5.797874964409732e-05}, "ground_truth": 0}, {"key": "38395319", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "\"Yes": 2.0727759425731995e-08}, "ground_truth": 1}, {"key": "38395319", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 3.618189096039926e-07}, "ground_truth": 0}, {"key": "38395319", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.43109451581301e-08}, "ground_truth": 0}, {"key": "38235895", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999957534720165, "res": {"Yes": 0.9999957534720165, "No": 4.03275416417348e-06}, "ground_truth": 0}, {"key": "38235895", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 4.5459087423661203e-07}, "ground_truth": 0}, {"key": "38235895", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 6.799783299491029e-08}, "ground_truth": 1}, {"key": "38235895", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.4622258613412452, "res": {"No": 0.5377727531324922, "Yes": 0.4622258613412452}, "ground_truth": 0}, {"key": "38235895", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999984951481323, "res": {"Yes": 0.9999984951481323, "No": 9.513193750713571e-07}, "ground_truth": 0}, {"key": "26543267", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9951770337332762, "res": {"Yes": 0.9951770337332762, "No": 0.004822243513402635}, "ground_truth": 0}, {"key": "26543267", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.194845545046065e-08}, "ground_truth": 1}, {"key": "26543267", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.1137311816669357e-08}, "ground_truth": 0}, {"key": "26543267", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "No": 4.022657086012006e-07}, "ground_truth": 0}, {"key": "39054728", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.0015735752803377606, "res": {"No": 0.9984259110795708, "Yes": 0.0015735752803377606}, "ground_truth": 0}, {"key": "39054728", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.1477708281914517e-07}, "ground_truth": 1}, {"key": "39054728", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 1.8611780565254532e-07}, "ground_truth": 0}, {"key": "39054728", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.3267364053471595e-08}, "ground_truth": 0}, {"key": "39158443", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.3990455532749963, "res": {"No": 0.600954051969118, "Yes": 0.3990455532749963}, "ground_truth": 0}, {"key": "39158443", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 2.1780579931748377e-07}, "ground_truth": 0}, {"key": "39158443", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.283205783045253e-08}, "ground_truth": 1}, {"key": "39158443", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.0002833792358873e-08}, "ground_truth": 0}, {"key": "39158443", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.999996945503965, "res": {"Yes": 0.999996945503965, "No": 2.4005636356389405e-06}, "ground_truth": 0}, {"key": "36254201", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.642042973895796, "res": {"Yes": 0.642042973895796, "No": 0.3579565029868128}, "ground_truth": 0}, {"key": "36254201", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 1.054980887825075e-07}, "ground_truth": 0}, {"key": "36254201", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.4420821943646293e-08}, "ground_truth": 1}, {"key": "36254201", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 6.438346445026872e-08}, "ground_truth": 0}, {"key": "36254201", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.258964424006445e-08}, "ground_truth": 0}, {"key": "23434347", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.999979303571174, "res": {"Yes": 0.999979303571174, "No": 2.0267346779992777e-05}, "ground_truth": 0}, {"key": "23434347", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.3449927880009117e-07}, "ground_truth": 1}, {"key": "23434347", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 8.518929981703798e-08}, "ground_truth": 0}, {"key": "23434347", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 3.4022380496584815e-05, "res": {"No": 0.9999655955278475, "Yes": 3.4022380496584815e-05}, "ground_truth": 0}, {"key": "34397620", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 5.6792294195886395e-06, "res": {"No": 0.999992773397112, "Yes": 5.6792294195886395e-06}, "ground_truth": 0}, {"key": "34397620", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, "yes": 7.790886482915375e-07}, "ground_truth": 0}, {"key": "34397620", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 1.9830650462839683e-07}, "ground_truth": 1}, {"key": "34397620", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.020784584434632956, "res": {"No": 0.9792144454843659, "Yes": 0.020784584434632956}, "ground_truth": 0}, {"key": "34340916", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9271746710561252, "res": {"Yes": 0.9271746710561252, "No": 0.07282445207671197}, "ground_truth": 0}, {"key": "34340916", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.367988825890526e-07}, "ground_truth": 0}, {"key": "34340916", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.414452176183248e-08}, "ground_truth": 1}, {"key": "34340916", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 2.342139303438117e-08}, "ground_truth": 0}, {"key": "34340916", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 2.368816944449606e-07}, "ground_truth": 0}, {"key": "30375089", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 1.2821606349964846e-07}, "ground_truth": 0}, {"key": "30375089", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 7.958883641294514e-08}, "ground_truth": 0}, {"key": "30375089", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 1.535120149027876e-07}, "ground_truth": 1}, {"key": "30375089", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 8.510508924850934e-08}, "ground_truth": 0}, {"key": "30375089", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, " Yes": 1.213015794571471e-07}, "ground_truth": 0}, {"key": "35807797", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.0004238142450601281, "res": {"No": 0.9995754929286346, "Yes": 0.0004238142450601281}, "ground_truth": 0}, {"key": "35807797", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 2.2798522790073157e-07}, "ground_truth": 1}, {"key": "35807797", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9872527177207229, "res": {"Yes": 0.9872527177207229, "No": 0.01274458886763984}, "ground_truth": 0}, {"key": "35807797", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 6.233926960546647e-05, "res": {"No": 0.9999368688428554, "Yes": 6.233926960546647e-05}, "ground_truth": 0}, {"key": "34188172", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9976395115447896, "res": {"Yes": 0.9976395115447896, "No": 0.002357913172650923}, "ground_truth": 0}, {"key": "34188172", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 1.1671333929852098e-07}, "ground_truth": 1}, {"key": "34188172", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, " Yes": 2.3182176844232606e-07}, "ground_truth": 0}, {"key": "34188172", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.2100995766301917e-07}, "ground_truth": 0}, {"key": "37075567", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 1.263947362111393e-07}, "ground_truth": 0}, {"key": "37075567", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9551563735198952, "res": {"Yes": 0.9551563735198952, "No": 0.04484296764429291}, "ground_truth": 0}, {"key": "37075567", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999250684975053, "res": {"Yes": 0.9999250684975053, "No": 7.456961047780447e-05}, "ground_truth": 1}, {"key": "37075567", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 9.250945328261152e-08}, "ground_truth": 0}, {"key": "37075567", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.00030411032840913085, "res": {"No": 0.9996950894546717, "Yes": 0.00030411032840913085}, "ground_truth": 0}, {"key": "35559735", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 2.8581099735447517e-06, "res": {"No": 0.9999963494876631, "Yes": 2.8581099735447517e-06}, "ground_truth": 0}, {"key": "35559735", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.518818204347158e-08}, "ground_truth": 0}, {"key": "35559735", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.628301904606518e-08}, "ground_truth": 1}, {"key": "35559735", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 2.3824604208238973e-08}, "ground_truth": 0}, {"key": "35559735", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.620980132238958e-08}, "ground_truth": 0}, {"key": "33005019", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.0010889563701231213, "res": {"No": 0.9989107390758315, "Yes": 0.0010889563701231213}, "ground_truth": 0}, {"key": "33005019", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 3.397309011220123e-07}, "ground_truth": 1}, {"key": "33005019", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.00023215112442086358, "res": {"No": 0.9997671662999741, "Yes": 0.00023215112442086358}, "ground_truth": 0}, {"key": "33005019", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "yes": 2.273586750856554e-07}, "ground_truth": 0}, {"key": "30808252", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 1.1560170947238824e-06, "res": {"No": 0.9999984951481323, "Yes": 1.1560170947238824e-06}, "ground_truth": 0}, {"key": "30808252", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 1.5226963091224562e-07}, "ground_truth": 0}, {"key": "30808252", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.8288675951215014e-08}, "ground_truth": 1}, {"key": "30808252", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999967070975216, "res": {"Yes": 0.9999967070975216, "No": 3.0664285878126917e-06}, "ground_truth": 0}, {"key": "30808252", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.6664200635662764e-07}, "ground_truth": 0}, {"key": "15159017", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 6.58935620455446e-07, "res": {"No": 0.9999992103693117, "Yes": 6.58935620455446e-07}, "ground_truth": 0}, {"key": "15159017", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9980501966410655, "res": {"Yes": 0.9980501966410655, "No": 0.0019488067827102228}, "ground_truth": 0}, {"key": "15159017", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.399037753806856e-08}, "ground_truth": 1}, {"key": "15159017", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999982567412194, "res": {"Yes": 0.9999982567412194, "yes": 6.860440489394402e-07}, "ground_truth": 0}, {"key": "15159017", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.984431516528484, "res": {"Yes": 0.984431516528484, "No": 0.015566755443236793}, "ground_truth": 0}, {"key": "24493400", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9650271532076526, "res": {"Yes": 0.9650271532076526, "No": 0.03497202230037477}, "ground_truth": 0}, {"key": "24493400", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999936078174301, "res": {"Yes": 0.9999936078174301, "No": 6.034280088050569e-06}, "ground_truth": 0}, {"key": "24493400", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999063550949578, "res": {"Yes": 0.9999063550949578, "No": 9.312796899008868e-05}, "ground_truth": 1}, {"key": "24493400", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999557020111849, "res": {"Yes": 0.9999557020111849, "No": 4.377498598291274e-05}, "ground_truth": 0}, {"key": "24493400", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999914621674475, "res": {"Yes": 0.9999914621674475, "No": 8.174647184267935e-06}, "ground_truth": 0}, {"key": "37791071", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 4.359321669949351e-05, "res": {"No": 0.9999559404106522, "Yes": 4.359321669949351e-05}, "ground_truth": 0}, {"key": "37791071", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997744360533731, "res": {"Yes": 0.9997744360533731, "No": 0.0002247596385233399}, "ground_truth": 0}, {"key": "37791071", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.226197886062368e-08}, "ground_truth": 1}, {"key": "37791071", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.7288756129105555e-08}, "ground_truth": 0}, {"key": "37791071", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.6060671413565516e-08}, "ground_truth": 0}, {"key": "33528627", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9805048361769211, "res": {"Yes": 0.9805048361769211, "No": 0.019494219368524697}, "ground_truth": 0}, {"key": "33528627", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9509931266121852, "res": {"Yes": 0.9509931266121852, "No": 0.0490063449734715}, "ground_truth": 0}, {"key": "33528627", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "yes": 3.140475278225835e-07}, "ground_truth": 1}, {"key": "33528627", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 5.183511129905428e-07}, "ground_truth": 0}, {"key": "33528627", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999970647075079, "res": {"Yes": 0.9999970647075079, "No": 2.3447893254389017e-06}, "ground_truth": 0}, {"key": "39925662", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999455702013552, "res": {"Yes": 0.9999455702013552, "No": 5.362243575221119e-05}, "ground_truth": 0}, {"key": "39925662", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.4166082596985659e-05, "res": {"No": 0.9999855020530962, "Yes": 1.4166082596985659e-05}, "ground_truth": 0}, {"key": "39925662", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 8.918499758631013e-08}, "ground_truth": 1}, {"key": "39925662", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 8.887606195798929e-08}, "ground_truth": 0}, {"key": "39925662", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999176784526046, "res": {"Yes": 0.9999176784526046, "No": 8.112097491586644e-05}, "ground_truth": 0}, {"key": "29213416", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.4915705883779218e-07}, "ground_truth": 0}, {"key": "29213416", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 1.0881202333896563e-07}, "ground_truth": 1}, {"key": "29213416", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 9.302840130862448e-08}, "ground_truth": 0}, {"key": "29213416", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 8.342715341966825e-08}, "ground_truth": 0}, {"key": "34492745", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.155590811818544e-08}, "ground_truth": 0}, {"key": "34492745", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.0495701672883855e-08}, "ground_truth": 1}, {"key": "34492745", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.526655180221602e-08}, "ground_truth": 0}, {"key": "34492745", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "yes": 3.621675090376287e-07}, "ground_truth": 0}, {"key": "34191937", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 8.623012184307955e-05, "res": {"No": 0.9999127915188807, "Yes": 8.623012184307955e-05}, "ground_truth": 0}, {"key": "34191937", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 2.2167621737158205e-07}, "ground_truth": 0}, {"key": "34191937", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.4162260905003348e-07}, "ground_truth": 1}, {"key": "34191937", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 1.0009761270605401e-07}, "ground_truth": 0}, {"key": "34191937", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 7.937963402661627e-08}, "ground_truth": 0}, {"key": "34933372", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.005061081921805764, "res": {"No": 0.9949373063521318, "Yes": 0.005061081921805764}, "ground_truth": 0}, {"key": "34933372", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.99999861435166, "res": {"Yes": 0.99999861435166, "yes": 4.7646002992704275e-07}, "ground_truth": 1}, {"key": "34933372", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, " Yes": 1.513018483151025e-07}, "ground_truth": 0}, {"key": "34933372", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.09742916284069499, "res": {"No": 0.9025694489414999, "Yes": 0.09742916284069499}, "ground_truth": 0}, {"key": "38714379", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 2.388473736476405e-06, "res": {"No": 0.9999967070975216, "Yes": 2.388473736476405e-06}, "ground_truth": 1}, {"key": "38714379", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.00013576949498434162, "res": {"No": 0.9998627389311626, "Yes": 0.00013576949498434162}, "ground_truth": 0}, {"key": "39220660", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.7602944397546432, "res": {"Yes": 0.7602944397546432, "No": 0.2397040599631519}, "ground_truth": 0}, {"key": "39220660", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.99995629800496, "res": {"Yes": 0.99995629800496, "No": 4.300543736138955e-05}, "ground_truth": 0}, {"key": "39220660", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.434259985271816e-08}, "ground_truth": 1}, {"key": "39220660", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9988228461897508, "res": {"Yes": 0.9988228461897508, "No": 0.001176508647067779}, "ground_truth": 0}, {"key": "39220660", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999791843696483, "res": {"Yes": 0.9999791843696483, "No": 2.00519530495606e-05}, "ground_truth": 0}, {"key": "41028780", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.7719337412786509, "res": {"Yes": 0.7719337412786509, "No": 0.2280648410246514}, "ground_truth": 0}, {"key": "41028780", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.4662053240032203e-07}, "ground_truth": 1}, {"key": "41028780", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999968263007362, "res": {"Yes": 0.9999968263007362, "No": 2.1752368560921896e-06}, "ground_truth": 0}, {"key": "41028780", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.3288775665008736e-07}, "ground_truth": 0}, {"key": "39457108", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.2746997376050482e-07}, "ground_truth": 0}, {"key": "39457108", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.6400839477535366e-08}, "ground_truth": 1}, {"key": "39457108", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.7899155781521964e-08}, "ground_truth": 0}, {"key": "39457108", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.916963564174046e-08}, "ground_truth": 0}, {"key": "38288018", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.002698118232178873, "res": {"No": 0.9973016040329941, "Yes": 0.002698118232178873}, "ground_truth": 0}, {"key": "38288018", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.3606629547923719, "res": {"No": 0.6393363651914434, "Yes": 0.3606629547923719}, "ground_truth": 0}, {"key": "38288018", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.912524210961747e-08}, "ground_truth": 1}, {"key": "38288018", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.520185039325977e-08}, "ground_truth": 0}, {"key": "40106293", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.0026988379162770043, "res": {"No": 0.9973007754751656, "Yes": 0.0026988379162770043}, "ground_truth": 0}, {"key": "40106293", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.999943901441583, "res": {"Yes": 0.999943901441583, "No": 5.560824924485048e-05}, "ground_truth": 0}, {"key": "40106293", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.786441818529234e-08}, "ground_truth": 1}, {"key": "40106293", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "yes": 2.132703984605851e-07}, "ground_truth": 0}, {"key": "40106293", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9998851390570233, "res": {"Yes": 0.9998851390570233, "No": 0.00011407835740793429}, "ground_truth": 0}, {"key": "39948797", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 1.5498706331595016e-05, "res": {"No": 0.9999838332276837, "Yes": 1.5498706331595016e-05}, "ground_truth": 0}, {"key": "39948797", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, " Yes": 1.6062988006994982e-07}, "ground_truth": 0}, {"key": "39948797", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.809296044403087e-08}, "ground_truth": 1}, {"key": "39948797", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.78194889637587e-07}, "ground_truth": 0}, {"key": "39948797", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.0002409613525972739, "res": {"No": 0.9997585856551338, "Yes": 0.0002409613525972739}, "ground_truth": 0}, {"key": "31853399", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.997091664126677, "res": {"Yes": 0.997091664126677, "No": 0.00290779388626082}, "ground_truth": 0}, {"key": "31853399", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 5.496896960106649e-08}, "ground_truth": 1}, {"key": "31853399", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 9.928513251651412e-08}, "ground_truth": 0}, {"key": "31853399", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.2718489545523026e-08}, "ground_truth": 0}, {"key": "35273252", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.0003098194033758808, "res": {"No": 0.999689611440499, "Yes": 0.0003098194033758808}, "ground_truth": 0}, {"key": "35273252", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.264597760765305e-08}, "ground_truth": 0}, {"key": "35273252", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 2.7893643656702494e-08}, "ground_truth": 1}, {"key": "35273252", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.191945941910208e-08}, "ground_truth": 0}, {"key": "35273252", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 2.8413609665426564e-07}, "ground_truth": 0}, {"key": "37130459", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.0011425016397125086, "res": {"No": 0.9988571004077917, "Yes": 0.0011425016397125086}, "ground_truth": 0}, {"key": "37130459", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.012818121078119351, "res": {"No": 0.9871817259431143, "Yes": 0.012818121078119351}, "ground_truth": 0}, {"key": "37130459", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.2026309420713137e-08}, "ground_truth": 1}, {"key": "37130459", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.523191460285596e-08}, "ground_truth": 0}, {"key": "37130459", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 2.987989472343745e-08}, "ground_truth": 0}, {"key": "21734003", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9837960821432403, "res": {"Yes": 0.9837960821432403, "No": 0.01620315990338815}, "ground_truth": 0}, {"key": "21734003", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.5357220340880588e-07}, "ground_truth": 0}, {"key": "21734003", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.1420554337107389e-07}, "ground_truth": 1}, {"key": "21734003", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 6.641598974774327e-08}, "ground_truth": 0}, {"key": "21734003", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 9.263655599801567e-08}, "ground_truth": 0}, {"key": "33990737", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 7.140691054155388e-06, "res": {"No": 0.9999924157887603, "Yes": 7.140691054155388e-06}, "ground_truth": 0}, {"key": "33990737", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999968263007362, "res": {"Yes": 0.9999968263007362, "No": 2.97456425084115e-06}, "ground_truth": 0}, {"key": "33990737", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 2.04370378783799e-08}, "ground_truth": 1}, {"key": "33990737", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 1.8876584600437856e-07}, "ground_truth": 0}, {"key": "33990737", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9935756098069056, "res": {"Yes": 0.9935756098069056, "No": 0.006423890165556193}, "ground_truth": 0}, {"key": "34559912", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.999994561441089, "res": {"Yes": 0.999994561441089, "No": 5.299277857454474e-06}, "ground_truth": 0}, {"key": "34559912", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.954466675470171e-08}, "ground_truth": 1}, {"key": "34559912", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.6111444488726715e-08}, "ground_truth": 0}, {"key": "34559912", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.6439233162976965e-07}, "ground_truth": 0}, {"key": "39820439", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.999932458601023, "res": {"Yes": 0.999932458601023, "No": 6.708775630568892e-05}, "ground_truth": 0}, {"key": "39820439", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 2.3836183563354926e-07}, "ground_truth": 0}, {"key": "39820439", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.2521444619408018e-07}, "ground_truth": 1}, {"key": "39820439", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 9.825300513981698e-08}, "ground_truth": 0}, {"key": "39820439", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.597836698520103e-07}, "ground_truth": 0}, {"key": "34759328", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 3.1632739708103662e-06, "res": {"No": 0.9999955150656573, "Yes": 3.1632739708103662e-06}, "ground_truth": 0}, {"key": "34759328", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999930118027176, "res": {"Yes": 0.9999930118027176, "No": 6.416575460164167e-06}, "ground_truth": 0}, {"key": "34759328", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 2.3953938185035323e-07}, "ground_truth": 1}, {"key": "34759328", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "yes": 2.3781946536625314e-07}, "ground_truth": 0}, {"key": "34759328", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.00025879999879335216, "res": {"No": 0.9997407205887178, "Yes": 0.00025879999879335216}, "ground_truth": 0}, {"key": "36939137", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.8380763943049875, "res": {"Yes": 0.8380763943049875, "No": 0.16192218478236073}, "ground_truth": 0}, {"key": "36939137", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 2.82933433018306e-05, "res": {"No": 0.9999704827216435, "Yes": 2.82933433018306e-05}, "ground_truth": 0}, {"key": "36939137", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.019595164495756e-07}, "ground_truth": 1}, {"key": "36939137", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999982567412194, "res": {"Yes": 0.9999982567412194, "No": 1.103047228926387e-06}, "ground_truth": 0}, {"key": "36939137", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9975285886779959, "res": {"Yes": 0.9975285886779959, "No": 0.0024698076644942}, "ground_truth": 0}, {"key": "35851522", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.00015878793311170894, "res": {"No": 0.9998409315627257, "Yes": 0.00015878793311170894}, "ground_truth": 0}, {"key": "35851522", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997994598108725, "res": {"Yes": 0.9997994598108725, "No": 0.00020001503884863658}, "ground_truth": 1}, {"key": "35851522", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.07553115944400574, "res": {"No": 0.9244677800307242, "Yes": 0.07553115944400574}, "ground_truth": 0}, {"key": "35851522", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.4789681533755368e-07}, "ground_truth": 0}, {"key": "22412782", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9829777194122847, "res": {"Yes": 0.9829777194122847, "No": 0.01702193112789098}, "ground_truth": 0}, {"key": "22412782", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.033596089309277e-08}, "ground_truth": 0}, {"key": "22412782", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 2.717509250220354e-08}, "ground_truth": 1}, {"key": "22412782", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 8.36936350616709e-09}, "ground_truth": 0}, {"key": "22412782", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 2.6204884821214382e-08}, "ground_truth": 0}, {"key": "38579227", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.016422081151708e-08}, "ground_truth": 0}, {"key": "38579227", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.933298069254702e-08}, "ground_truth": 1}, {"key": "38579227", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 1.2323410473598588e-07}, "ground_truth": 0}, {"key": "38579227", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 7.449111354132321e-08}, "ground_truth": 0}, {"key": "37206995", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9993042493468081, "res": {"Yes": 0.9993042493468081, "No": 0.0006951948444250182}, "ground_truth": 0}, {"key": "37206995", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999697353626656, "res": {"Yes": 0.999697353626656, "No": 0.0003023443374139682}, "ground_truth": 1}, {"key": "37206995", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9990252891395349, "res": {"Yes": 0.9990252891395349, "No": 0.0009741408060861927}, "ground_truth": 0}, {"key": "37206995", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.005538278314937581, "res": {"No": 0.9944613859934888, "Yes": 0.005538278314937581}, "ground_truth": 0}, {"key": "38700847", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 5.744872115756297e-07}, "ground_truth": 0}, {"key": "38700847", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.478348812409549e-08}, "ground_truth": 0}, {"key": "38700847", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.418855579734829e-08}, "ground_truth": 1}, {"key": "38700847", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.280118704701159e-08}, "ground_truth": 0}, {"key": "38700847", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.57422193262948e-08}, "ground_truth": 0}, {"key": "20246590", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 7.000690198303809e-08}, "ground_truth": 0}, {"key": "20246590", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.068028413270683e-08}, "ground_truth": 0}, {"key": "20246590", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.806089621138121e-08}, "ground_truth": 1}, {"key": "20246590", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 8.39435741607904e-08}, "ground_truth": 0}, {"key": "20246590", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.657501410722764e-08}, "ground_truth": 0}, {"key": "39141360", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 8.120100719405028e-06, "res": {"No": 0.9999911045595646, "Yes": 8.120100719405028e-06}, "ground_truth": 0}, {"key": "39141360", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9968498299307268, "res": {"Yes": 0.9968498299307268, "No": 0.0031470429911455303}, "ground_truth": 0}, {"key": "39141360", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.203278182559439e-07}, "ground_truth": 1}, {"key": "39141360", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.016566610912433444, "res": {"No": 0.9834327979245455, "Yes": 0.016566610912433444}, "ground_truth": 0}, {"key": "39141360", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.3280154047619618e-07}, "ground_truth": 0}, {"key": "37906226", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 2.6696026762841334e-07}, "ground_truth": 0}, {"key": "37906226", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.00011246213171077397, "res": {"No": 0.9998870461047716, "Yes": 0.00011246213171077397}, "ground_truth": 0}, {"key": "37906226", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.81526736157697e-08}, "ground_truth": 1}, {"key": "37906226", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.783185484769319e-08}, "ground_truth": 0}, {"key": "37906226", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999981375378344, "res": {"Yes": 0.9999981375378344, "No": 1.0486331539089548e-06}, "ground_truth": 0}, {"key": "16201033", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 1.1778320053147098e-05, "res": {"No": 0.9999877668918251, "Yes": 1.1778320053147098e-05}, "ground_truth": 0}, {"key": "16201033", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9986706434782372, "res": {"Yes": 0.9986706434782372, "No": 0.0013290205986642495}, "ground_truth": 0}, {"key": "16201033", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.280647611829317e-08}, "ground_truth": 1}, {"key": "16201033", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 2.615291971727275e-08}, "ground_truth": 0}, {"key": "16201033", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 2.6438461817444392e-08}, "ground_truth": 0}, {"key": "36469022", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 2.357129702217727e-07}, "ground_truth": 0}, {"key": "36469022", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.9809799588035907e-07}, "ground_truth": 0}, {"key": "36469022", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.465595509438657e-07}, "ground_truth": 1}, {"key": "36469022", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 2.186715510064843e-07}, "ground_truth": 0}, {"key": "36469022", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 2.5688867939360953e-07}, "ground_truth": 0}, {"key": "31295270", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.0009682524589412057, "res": {"No": 0.9990314817168848, "Yes": 0.0009682524589412057}, "ground_truth": 0}, {"key": "31295270", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 9.348105887973447e-08}, "ground_truth": 1}, {"key": "31295270", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.9360225779648715e-07}, "ground_truth": 0}, {"key": "31295270", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.04665533481129831, "res": {"No": 0.9533443633077929, "Yes": 0.04665533481129831}, "ground_truth": 0}, {"key": "35360689", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 2.1819139819300325e-05, "res": {"No": 0.9999773963544663, "Yes": 2.1819139819300325e-05}, "ground_truth": 0}, {"key": "35360689", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 2.107211798143502e-07}, "ground_truth": 1}, {"key": "35360689", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.6557426693078927e-07}, "ground_truth": 0}, {"key": "35360689", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.83993489172501, "res": {"Yes": 0.83993489172501, "No": 0.16006424709530656}, "ground_truth": 0}, {"key": "29202793", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 7.571752586043616e-06, "res": {"No": 0.9999919389784903, "Yes": 7.571752586043616e-06}, "ground_truth": 0}, {"key": "29202793", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "yes": 2.144446872779752e-07}, "ground_truth": 0}, {"key": "29202793", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.297764593180336e-08}, "ground_truth": 1}, {"key": "29202793", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.387933013225471e-08}, "ground_truth": 0}, {"key": "29202793", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "yes": 2.91416885133383e-07}, "ground_truth": 0}, {"key": "35999008", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.934052921604035e-08}, "ground_truth": 0}, {"key": "35999008", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 2.2451619371605853e-07}, "ground_truth": 1}, {"key": "35999008", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.026018369883643e-07}, "ground_truth": 0}, {"key": "35999008", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.238799299737363e-08}, "ground_truth": 0}, {"key": "31797119", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 2.05215725213069e-06, "res": {"No": 0.9999965878943212, "Yes": 2.05215725213069e-06}, "ground_truth": 0}, {"key": "31797119", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "yes": 3.4050466874901276e-07}, "ground_truth": 0}, {"key": "31797119", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 2.695596864664268e-07}, "ground_truth": 1}, {"key": "31797119", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.0344332073412342e-07}, "ground_truth": 0}, {"key": "31797119", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.999994561441089, "res": {"Yes": 0.999994561441089, "No": 4.298776975600362e-06}, "ground_truth": 0}, {"key": "26711893", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9998739425959023, "res": {"Yes": 0.9998739425959023, "No": 0.0001253759167055039}, "ground_truth": 0}, {"key": "26711893", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999468813708443, "res": {"Yes": 0.9999468813708443, "No": 5.258997740539613e-05}, "ground_truth": 0}, {"key": "26711893", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.8828237618644239e-07}, "ground_truth": 1}, {"key": "26711893", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 2.8299800182985235e-07}, "ground_truth": 0}, {"key": "26711893", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "yes": 3.819935932795429e-07}, "ground_truth": 0}, {"key": "35348288", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9881194309613756, "res": {"Yes": 0.9881194309613756, "No": 0.011879172765568494}, "ground_truth": 0}, {"key": "35348288", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.390375709296576e-08}, "ground_truth": 0}, {"key": "35348288", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.407608562511561e-08}, "ground_truth": 1}, {"key": "35348288", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.6375114955708476e-08}, "ground_truth": 0}, {"key": "35348288", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.6095342375522895e-08}, "ground_truth": 0}, {"key": "38124131", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9589752486240688, "res": {"Yes": 0.9589752486240688, "No": 0.04102405815910687}, "ground_truth": 0}, {"key": "38124131", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 1.6522279518455135e-07}, "ground_truth": 0}, {"key": "38124131", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.5952480822216504e-08}, "ground_truth": 1}, {"key": "38124131", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.8518532431474655e-08}, "ground_truth": 0}, {"key": "38124131", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 8.475509283864046e-08}, "ground_truth": 0}, {"key": "20285901", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9377775233852906, "res": {"Yes": 0.9377775233852906, "No": 0.06222160451972504}, "ground_truth": 0}, {"key": "20285901", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 2.2594053313941418e-07}, "ground_truth": 0}, {"key": "20285901", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 8.627447958752082e-08}, "ground_truth": 1}, {"key": "20285901", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.2149289883795645e-08}, "ground_truth": 0}, {"key": "20285901", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9967701177285491, "res": {"Yes": 0.9967701177285491, "No": 0.003229412651253384}, "ground_truth": 0}, {"key": "35633632", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 3.933932851093462e-07, "res": {"No": 0.9999994487765019, "Yes": 3.933932851093462e-07}, "ground_truth": 0}, {"key": "35633632", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.4636878454623361, "res": {"No": 0.5363117306145754, "Yes": 0.4636878454623361}, "ground_truth": 0}, {"key": "35633632", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.3317458916578866e-08}, "ground_truth": 1}, {"key": "35633632", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999937270200753, "res": {"Yes": 0.9999937270200753, "No": 5.434233922216063e-06}, "ground_truth": 0}, {"key": "35633632", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.813791663183398e-08}, "ground_truth": 0}, {"key": "10741274", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.734067378841453e-08}, "ground_truth": 0}, {"key": "10741274", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 3.602523784024086e-07}, "ground_truth": 0}, {"key": "10741274", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 8.432309899851844e-08}, "ground_truth": 1}, {"key": "10741274", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.4663050293551944e-07}, "ground_truth": 0}, {"key": "10741274", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 8.127291700992272e-08}, "ground_truth": 0}, {"key": "30605795", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 4.739177312901703e-06, "res": {"No": 0.9999946806438478, "Yes": 4.739177312901703e-06}, "ground_truth": 0}, {"key": "30605795", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 3.477231617445201e-05, "res": {"No": 0.9999636883392843, "Yes": 3.477231617445201e-05}, "ground_truth": 0}, {"key": "30605795", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.0207571450104e-07}, "ground_truth": 1}, {"key": "30605795", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.2404272812554477, "res": {"No": 0.7595719181405229, "Yes": 0.2404272812554477}, "ground_truth": 0}, {"key": "30605795", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 9.733607207472338e-06, "res": {"No": 0.9999890781166442, "Yes": 9.733607207472338e-06}, "ground_truth": 0}, {"key": "30539722", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 1.5231699413205824e-07}, "ground_truth": 0}, {"key": "30539722", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 8.608247402062749e-08}, "ground_truth": 0}, {"key": "30539722", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.968851584806098e-08}, "ground_truth": 1}, {"key": "30539722", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.8250965451697245e-08}, "ground_truth": 0}, {"key": "30539722", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.4336342458441185e-07}, "ground_truth": 0}, {"key": "18639299", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.028701324713986974, "res": {"No": 0.9712983289515201, "Yes": 0.028701324713986974}, "ground_truth": 0}, {"key": "18639299", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.2047386445860294e-07}, "ground_truth": 0}, {"key": "18639299", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.835602285059188e-08}, "ground_truth": 1}, {"key": "18639299", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.2716836953828134e-08}, "ground_truth": 0}, {"key": "18639299", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 8.371254417595527e-08}, "ground_truth": 0}, {"key": "39773552", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 1.9036261322280524e-06, "res": {"No": 0.999996945503965, "Yes": 1.9036261322280524e-06}, "ground_truth": 0}, {"key": "39773552", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997626376149095, "res": {"Yes": 0.9997626376149095, "No": 0.00023646342811062937}, "ground_truth": 0}, {"key": "39773552", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999957534720165, "res": {"Yes": 0.9999957534720165, "No": 3.4454260126412933e-06}, "ground_truth": 1}, {"key": "39773552", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.39231550626469547, "res": {"No": 0.607683452457568, "Yes": 0.39231550626469547}, "ground_truth": 0}, {"key": "39773552", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999461661890916, "res": {"Yes": 0.9999461661890916, "No": 5.3204371177576204e-05}, "ground_truth": 0}, {"key": "34086410", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 6.55091323402402e-05, "res": {"No": 0.9999338889494318, "Yes": 6.55091323402402e-05}, "ground_truth": 0}, {"key": "34086410", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.4415971789160895e-07}, "ground_truth": 0}, {"key": "34086410", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.1293538454164368e-07}, "ground_truth": 1}, {"key": "34086410", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.0412528042035785e-08}, "ground_truth": 0}, {"key": "34086410", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 1.2179932786933547e-07}, "ground_truth": 0}, {"key": "35454652", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.3690642982947962, "res": {"No": 0.6309350207201792, "Yes": 0.3690642982947962}, "ground_truth": 0}, {"key": "35454652", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.6711160809686857, "res": {"Yes": 0.6711160809686857, "No": 0.3288825114663593}, "ground_truth": 0}, {"key": "35454652", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 2.0686014355898643e-08}, "ground_truth": 1}, {"key": "35454652", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.845814513783619e-07}, "ground_truth": 0}, {"key": "35454652", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999981375378344, "res": {"Yes": 0.9999981375378344, "yes": 9.540488131339746e-07}, "ground_truth": 0}, {"key": "36158310", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9886023549560372, "res": {"Yes": 0.9886023549560372, "No": 0.011396846325125089}, "ground_truth": 0}, {"key": "36158310", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "yes": 4.587685511138165e-07}, "ground_truth": 0}, {"key": "36158310", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 8.451642016347585e-08}, "ground_truth": 1}, {"key": "36158310", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "\"Yes": 5.100541670295967e-08}, "ground_truth": 0}, {"key": "36158310", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999978991308068, "res": {"Yes": 0.9999978991308068, "No": 1.1000092160145497e-06}, "ground_truth": 0}, {"key": "35688387", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.01449111959957e-08}, "ground_truth": 0}, {"key": "35688387", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.5701290512665913e-08}, "ground_truth": 1}, {"key": "35688387", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.0206974364683946e-08}, "ground_truth": 0}, {"key": "35688387", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.07138784230325179, "res": {"No": 0.928611759304275, "Yes": 0.07138784230325179}, "ground_truth": 0}, {"key": "34209292", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9997638293726743, "res": {"Yes": 0.9997638293726743, "No": 0.00023559484187408598}, "ground_truth": 0}, {"key": "34209292", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "\"Yes": 4.372411746533187e-08}, "ground_truth": 1}, {"key": "34209292", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "yes": 2.7255265831197854e-07}, "ground_truth": 0}, {"key": "34209292", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.375861758076485e-08}, "ground_truth": 0}, {"key": "25037859", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999924157887603, "res": {"Yes": 0.9999924157887603, "No": 6.647371796416132e-06}, "ground_truth": 0}, {"key": "25037859", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 8.420260314202123e-08}, "ground_truth": 1}, {"key": "25037859", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.747781228451196e-08}, "ground_truth": 0}, {"key": "25037859", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.999966549126493, "res": {"Yes": 0.999966549126493, "No": 3.3092777559824695e-05}, "ground_truth": 0}, {"key": "36412121", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9983155305139325, "res": {"Yes": 0.9983155305139325, "No": 0.0016830078491490504}, "ground_truth": 0}, {"key": "36412121", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 2.6105127316740975e-07}, "ground_truth": 1}, {"key": "36412121", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "yes": 3.1844119820473246e-07}, "ground_truth": 0}, {"key": "36412121", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.011032657951453641, "res": {"No": 0.9889657558343378, "Yes": 0.011032657951453641}, "ground_truth": 0}, {"key": "34909172", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 2.896118980088609e-06, "res": {"No": 0.999995276659155, "Yes": 2.896118980088609e-06}, "ground_truth": 0}, {"key": "34909172", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999977799274644, "res": {"Yes": 0.9999977799274644, "No": 1.6413639252981928e-06}, "ground_truth": 0}, {"key": "34909172", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, " Yes": 1.699485657647402e-07}, "ground_truth": 1}, {"key": "34909172", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999961110815618, "res": {"Yes": 0.9999961110815618, "No": 3.3083565581104736e-06}, "ground_truth": 0}, {"key": "34909172", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, " Yes": 2.3243900277399016e-07}, "ground_truth": 0}, {"key": "39011806", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9996703102896007, "res": {"Yes": 0.9996703102896007, "No": 0.00032947733250487927}, "ground_truth": 0}, {"key": "39011806", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 2.54043352081359e-08}, "ground_truth": 1}, {"key": "39011806", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 1.8610833119346506e-08}, "ground_truth": 0}, {"key": "39011806", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 3.952655576848419e-07}, "ground_truth": 0}, {"key": "33096163", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.00019834373442159276, "res": {"No": 0.9998013666902087, "Yes": 0.00019834373442159276}, "ground_truth": 0}, {"key": "33096163", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.00010733969850550501, "res": {"No": 0.9998922904870862, "Yes": 0.00010733969850550501}, "ground_truth": 0}, {"key": "33096163", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.2029125277774544e-08}, "ground_truth": 1}, {"key": "33096163", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998692942427418, "res": {"Yes": 0.9998692942427418, "No": 0.00013029838088056157}, "ground_truth": 0}, {"key": "33096163", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.0020179603609176678, "res": {"No": 0.9979816842440034, "Yes": 0.0020179603609176678}, "ground_truth": 0}, {"key": "38762205", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9944276577169776, "res": {"Yes": 0.9944276577169776, "No": 0.005569934413875338}, "ground_truth": 0}, {"key": "38762205", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.5662177727489512e-07}, "ground_truth": 1}, {"key": "38762205", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999984951481323, "res": {"Yes": 0.9999984951481323, "yes": 6.345016178450665e-07}, "ground_truth": 0}, {"key": "38762205", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999971839107652, "res": {"Yes": 0.9999971839107652, "No": 1.6265684625060003e-06}, "ground_truth": 0}, {"key": "35519177", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.009075679732348609, "res": {"No": 0.9909235201995718, "Yes": 0.009075679732348609}, "ground_truth": 0}, {"key": "35519177", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999759659438225, "res": {"Yes": 0.9999759659438225, "No": 2.3512085321620924e-05}, "ground_truth": 0}, {"key": "35519177", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 3.640251056944712e-07}, "ground_truth": 1}, {"key": "35519177", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "yes": 3.5570432462404417e-07}, "ground_truth": 0}, {"key": "35519177", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "yes": 4.207723019716549e-07}, "ground_truth": 0}, {"key": "36192531", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9998625005539155, "res": {"Yes": 0.9998625005539155, "No": 0.00013584174357209383}, "ground_truth": 0}, {"key": "36192531", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999976607241361, "res": {"Yes": 0.9999976607241361, "No": 1.6739867518439846e-06}, "ground_truth": 0}, {"key": "36192531", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 8.455361557073739e-08}, "ground_truth": 1}, {"key": "36192531", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.0400155354332737e-07}, "ground_truth": 0}, {"key": "36192531", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 3.6956876907688404e-07}, "ground_truth": 0}, {"key": "33160852", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999977799274644, "res": {"Yes": 0.9999977799274644, "No": 1.8419403572472117e-06}, "ground_truth": 0}, {"key": "33160852", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.212625266364036e-07}, "ground_truth": 0}, {"key": "33160852", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 8.732194771743286e-08}, "ground_truth": 1}, {"key": "33160852", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 9.743659287361013e-08}, "ground_truth": 0}, {"key": "33160852", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999971839107652, "res": {"Yes": 0.9999971839107652, "No": 2.465280410855747e-06}, "ground_truth": 0}, {"key": "36312304", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.278008070578868e-08}, "ground_truth": 0}, {"key": "36312304", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 2.0944471973851174e-07}, "ground_truth": 0}, {"key": "36312304", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 7.509589050348248e-08}, "ground_truth": 1}, {"key": "36312304", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.3956482225216735e-08}, "ground_truth": 0}, {"key": "36312304", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999980183344636, "res": {"Yes": 0.9999980183344636, "No": 1.0549871196218662e-06}, "ground_truth": 0}, {"key": "33773343", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.1209299473882866e-07}, "ground_truth": 0}, {"key": "33773343", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.481957877130158e-08}, "ground_truth": 0}, {"key": "33773343", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 8.216026877259272e-08}, "ground_truth": 1}, {"key": "33773343", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 7.999929618489833e-08}, "ground_truth": 0}, {"key": "33773343", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 2.170209379091555e-07}, "ground_truth": 0}, {"key": "34913320", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.8080429063116841, "res": {"Yes": 0.8080429063116841, "No": 0.19195630710752407}, "ground_truth": 0}, {"key": "34913320", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.4267593031874343e-07}, "ground_truth": 0}, {"key": "34913320", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.0383871072959094e-07}, "ground_truth": 1}, {"key": "34913320", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.6075393421610326e-07}, "ground_truth": 0}, {"key": "34913320", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.673691144042681e-08}, "ground_truth": 0}, {"key": "33784155", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.052211954338307415, "res": {"No": 0.9477875994911512, "Yes": 0.052211954338307415}, "ground_truth": 0}, {"key": "33784155", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999900317366834, "res": {"Yes": 0.9999900317366834, "No": 8.780958392088893e-06}, "ground_truth": 0}, {"key": "33784155", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.1414697791548084e-08}, "ground_truth": 1}, {"key": "33784155", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 4.1060465309402947e-07}, "ground_truth": 0}, {"key": "33784155", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.083656285557589e-08}, "ground_truth": 0}, {"key": "24085062", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.40283863767887507, "res": {"No": 0.5971606239111518, "Yes": 0.40283863767887507}, "ground_truth": 0}, {"key": "24085062", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.370262923151978e-08}, "ground_truth": 0}, {"key": "24085062", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 8.354595476609562e-08}, "ground_truth": 1}, {"key": "24085062", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.1869436607747181e-07}, "ground_truth": 0}, {"key": "24085062", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.146597913462181e-08}, "ground_truth": 0}, {"key": "33893487", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 9.407739898701196e-08}, "ground_truth": 0}, {"key": "33893487", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999980183344636, "res": {"Yes": 0.9999980183344636, "No": 1.1449415159906584e-06}, "ground_truth": 0}, {"key": "33893487", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 2.3641410035867258e-08}, "ground_truth": 1}, {"key": "33893487", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.647659427501989e-08}, "ground_truth": 0}, {"key": "33893487", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999975415208221, "res": {"Yes": 0.9999975415208221, "No": 2.056944353335335e-06}, "ground_truth": 0}, {"key": "40913011", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 3.726738886087338e-06, "res": {"No": 0.999994561441089, "Yes": 3.726738886087338e-06}, "ground_truth": 0}, {"key": "40913011", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.3663916935676856e-05, "res": {"No": 0.9999851444463448, "Yes": 1.3663916935676856e-05}, "ground_truth": 0}, {"key": "40913011", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9991666578226204, "res": {"Yes": 0.9991666578226204, "No": 0.0008322518987881473}, "ground_truth": 1}, {"key": "40913011", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 4.791792051632808e-05, "res": {"No": 0.9999512916842885, "Yes": 4.791792051632808e-05}, "ground_truth": 0}, {"key": "29642545", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.0008573940014794439, "res": {"No": 0.9991422674030854, "Yes": 0.0008573940014794439}, "ground_truth": 0}, {"key": "29642545", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.5349068898605776e-08}, "ground_truth": 0}, {"key": "29642545", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.2541106119924395e-08}, "ground_truth": 1}, {"key": "29642545", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 8.602197930777385e-08}, "ground_truth": 0}, {"key": "29642545", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.080459671194583e-08}, "ground_truth": 0}, {"key": "35969159", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9909752546942304, "res": {"Yes": 0.9909752546942304, "No": 0.009023440387011937}, "ground_truth": 0}, {"key": "35969159", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999977799274644, "res": {"Yes": 0.9999977799274644, "No": 1.305035910426526e-06}, "ground_truth": 0}, {"key": "35969159", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 2.7364245266098254e-07}, "ground_truth": 1}, {"key": "35969159", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997618033933174, "res": {"Yes": 0.9997618033933174, "No": 0.00023637666195507604}, "ground_truth": 0}, {"key": "35969159", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.07316702043823428, "res": {"No": 0.9268325279364891, "Yes": 0.07316702043823428}, "ground_truth": 0}, {"key": "37081669", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.0028763544575597867, "res": {"No": 0.9971230738064828, "Yes": 0.0028763544575597867}, "ground_truth": 0}, {"key": "37081669", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "yes": 2.935663965276564e-07}, "ground_truth": 0}, {"key": "37081669", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 1.332186598083959e-07}, "ground_truth": 1}, {"key": "37081669", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.224934441788775e-08}, "ground_truth": 0}, {"key": "37081669", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.6024913004882376e-08}, "ground_truth": 0}, {"key": "40048022", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999974223173222, "res": {"Yes": 0.9999974223173222, "No": 2.0244834021525472e-06}, "ground_truth": 0}, {"key": "40048022", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.968790653428237e-08}, "ground_truth": 1}, {"key": "40048022", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 2.1610898420574543e-07}, "ground_truth": 0}, {"key": "40048022", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 2.030466476356369e-06, "res": {"No": 0.9999971839107652, "Yes": 2.030466476356369e-06}, "ground_truth": 0}, {"key": "32884004", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "yes": 2.550291460714826e-07}, "ground_truth": 0}, {"key": "32884004", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "\"Yes": 3.61595769819096e-08}, "ground_truth": 1}, {"key": "32884004", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.3593404936054092e-07}, "ground_truth": 0}, {"key": "32884004", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 1.665172382253645e-07}, "ground_truth": 0}, {"key": "39022490", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.0013006566846727292, "res": {"No": 0.9986979912498453, "Yes": 0.0013006566846727292}, "ground_truth": 0}, {"key": "39022490", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "yes": 2.654859278811434e-07}, "ground_truth": 0}, {"key": "39022490", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.646443736967539e-07}, "ground_truth": 1}, {"key": "39022490", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, " Yes": 2.746921007978479e-07}, "ground_truth": 0}, {"key": "35159385", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 8.197633851738593e-06, "res": {"No": 0.9999897933310884, "Yes": 8.197633851738593e-06}, "ground_truth": 0}, {"key": "35159385", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 2.872002871347601e-07}, "ground_truth": 0}, {"key": "35159385", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 2.1472922954313574e-07}, "ground_truth": 1}, {"key": "35159385", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 2.2633831228139834e-07}, "ground_truth": 0}, {"key": "35159385", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.006793150865870066, "res": {"No": 0.9932064614307361, "Yes": 0.006793150865870066}, "ground_truth": 0}, {"key": "34363669", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.504107427684218e-07}, "ground_truth": 0}, {"key": "34363669", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "yes": 3.1530260766645167e-07}, "ground_truth": 1}, {"key": "34363669", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999982567412194, "res": {"Yes": 0.9999982567412194, "No": 1.0060055192195865e-06}, "ground_truth": 0}, {"key": "34363669", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 8.108879136222491e-07, "res": {"No": 0.99999861435166, "Yes": 8.108879136222491e-07}, "ground_truth": 0}, {"key": "36119687", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 8.679593918130602e-08}, "ground_truth": 0}, {"key": "36119687", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.330015874370358e-08}, "ground_truth": 1}, {"key": "36119687", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999970647075079, "res": {"Yes": 0.9999970647075079, "No": 2.4706754084178368e-06}, "ground_truth": 0}, {"key": "36119687", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, " Yes": 3.7810477393946307e-07}, "ground_truth": 0}, {"key": "35217446", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 3.195271193973554e-05, "res": {"No": 0.999967264321824, "Yes": 3.195271193973554e-05}, "ground_truth": 0}, {"key": "35217446", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 2.2198900129892427e-07}, "ground_truth": 0}, {"key": "35217446", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.280224598621968e-08}, "ground_truth": 1}, {"key": "35217446", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.239188402180472e-07}, "ground_truth": 0}, {"key": "35217446", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.0114891500341747e-07}, "ground_truth": 0}, {"key": "39049331", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.5573494959262446e-08}, "ground_truth": 0}, {"key": "39049331", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 2.1953767040475682e-07}, "ground_truth": 0}, {"key": "39049331", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.045368753266529e-08}, "ground_truth": 1}, {"key": "39049331", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 9.335727849933037e-08}, "ground_truth": 0}, {"key": "39049331", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.8753768671825172, "res": {"Yes": 0.8753768671825172, "No": 0.12462270077721388}, "ground_truth": 0}, {"key": "36472242", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.99999861435166, "res": {"Yes": 0.99999861435166, "No": 9.012131831999807e-07}, "ground_truth": 0}, {"key": "36472242", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 7.081803811973763e-07, "res": {"No": 0.9999989719621284, "Yes": 7.081803811973763e-07}, "ground_truth": 0}, {"key": "36472242", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9997925474212627, "res": {"Yes": 0.9997925474212627, "No": 0.00020682200786121028}, "ground_truth": 1}, {"key": "36472242", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999982567412194, "res": {"Yes": 0.9999982567412194, "No": 1.2276172039534893e-06}, "ground_truth": 0}, {"key": "36472242", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 7.648783059266632e-06, "res": {"No": 0.9999919389784903, "Yes": 7.648783059266632e-06}, "ground_truth": 0}, {"key": "31854721", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 1.282841523788684e-06, "res": {"No": 0.9999984951481323, "Yes": 1.282841523788684e-06}, "ground_truth": 0}, {"key": "31854721", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.5043101251062753e-08}, "ground_truth": 0}, {"key": "31854721", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.974070492694074e-07}, "ground_truth": 1}, {"key": "31854721", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.3378858590883476e-07}, "ground_truth": 0}, {"key": "31854721", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 2.538724148156807e-07}, "ground_truth": 0}, {"key": "18725849", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 1.3432197870708564e-06, "res": {"No": 0.9999983759447187, "Yes": 1.3432197870708564e-06}, "ground_truth": 0}, {"key": "18725849", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.6244676318859222e-07}, "ground_truth": 0}, {"key": "18725849", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.072635795116594e-08}, "ground_truth": 1}, {"key": "18725849", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.485544284142498e-08}, "ground_truth": 0}, {"key": "18725849", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.275374879033646e-08}, "ground_truth": 0}, {"key": "36883179", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.010135035463219332, "res": {"No": 0.9898644632836928, "Yes": 0.010135035463219332}, "ground_truth": 0}, {"key": "36883179", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.3186792281684145e-07}, "ground_truth": 1}, {"key": "36883179", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 8.186977187431582e-08}, "ground_truth": 0}, {"key": "36883179", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.003030114110315057, "res": {"No": 0.9969692504222384, "Yes": 0.003030114110315057}, "ground_truth": 0}, {"key": "34266359", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 2.3807763952032663e-07}, "ground_truth": 0}, {"key": "34266359", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9901933875207561, "res": {"Yes": 0.9901933875207561, "No": 0.009805998389394983}, "ground_truth": 0}, {"key": "34266359", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.3788277076749277e-07}, "ground_truth": 1}, {"key": "34266359", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "YES": 1.1120016181056584e-07}, "ground_truth": 0}, {"key": "34266359", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997683580531397, "res": {"Yes": 0.9997683580531397, "No": 0.0002306216659175621}, "ground_truth": 0}, {"key": "31920289", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 1.0954097179967536e-05, "res": {"No": 0.9999886013079656, "Yes": 1.0954097179967536e-05}, "ground_truth": 0}, {"key": "31920289", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998142382258554, "res": {"Yes": 0.9998142382258554, "No": 0.00018527858512943862}, "ground_truth": 0}, {"key": "31920289", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 5.4637684497870867e-08}, "ground_truth": 1}, {"key": "31920289", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 2.7326743469418626e-08}, "ground_truth": 0}, {"key": "31920289", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 2.8869215995895733e-08}, "ground_truth": 0}, {"key": "36292997", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9988493683847897, "res": {"Yes": 0.9988493683847897, "No": 0.0011494959925752986}, "ground_truth": 0}, {"key": "36292997", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 1.1793184076540186e-07}, "ground_truth": 0}, {"key": "36292997", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.528907515948565e-08}, "ground_truth": 1}, {"key": "36292997", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.2832314823570456e-08}, "ground_truth": 0}, {"key": "36292997", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.002748133293934009, "res": {"No": 0.9972516878436837, "Yes": 0.002748133293934009}, "ground_truth": 0}, {"key": "30412533", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 2.4250609326950294e-07}, "ground_truth": 0}, {"key": "30412533", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 5.590086404097914e-08}, "ground_truth": 1}, {"key": "30412533", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.3785781624443638e-07}, "ground_truth": 0}, {"key": "30412533", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.7081393026314126e-07}, "ground_truth": 0}, {"key": "40433191", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 3.401373990933972e-05, "res": {"No": 0.99996356913662, "Yes": 3.401373990933972e-05}, "ground_truth": 0}, {"key": "40433191", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999968263007362, "res": {"Yes": 0.9999968263007362, "No": 1.662498444592997e-06}, "ground_truth": 0}, {"key": "40433191", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999981375378344, "res": {"Yes": 0.9999981375378344, "No": 1.0988471205488725e-06}, "ground_truth": 1}, {"key": "40433191", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999764427474764, "res": {"Yes": 0.9999764427474764, "No": 2.2431754306164028e-05}, "ground_truth": 0}, {"key": "40433191", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999973031140366, "res": {"Yes": 0.9999973031140366, "No": 1.910332972400303e-06}, "ground_truth": 0}, {"key": "34565591", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999708403221517, "res": {"Yes": 0.9999708403221517, "No": 2.8957885720520342e-05}, "ground_truth": 0}, {"key": "34565591", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, "No": 8.804409283650468e-07}, "ground_truth": 0}, {"key": "34565591", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.9850854062802884e-08}, "ground_truth": 1}, {"key": "34565591", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 7.912642081940016e-08}, "ground_truth": 0}, {"key": "34565591", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999987335551019, "res": {"Yes": 0.9999987335551019, "No": 6.596562308304335e-07}, "ground_truth": 0}, {"key": "36062480", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 9.895445076321373e-06, "res": {"No": 0.9999887205106139, "Yes": 9.895445076321373e-06}, "ground_truth": 0}, {"key": "36062480", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.007627106746713873, "res": {"No": 0.992371743563359, "Yes": 0.007627106746713873}, "ground_truth": 0}, {"key": "36062480", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 2.609100826241118e-07}, "ground_truth": 1}, {"key": "36062480", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 1.2290083822687504e-07}, "ground_truth": 0}, {"key": "36062480", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.003338809408710564, "res": {"No": 0.9966603334337517, "Yes": 0.003338809408710564}, "ground_truth": 0}, {"key": "37276883", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.005480010291571389, "res": {"No": 0.9945197456833471, "Yes": 0.005480010291571389}, "ground_truth": 0}, {"key": "37276883", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999949190499081, "res": {"Yes": 0.9999949190499081, "No": 4.510976089679356e-06}, "ground_truth": 0}, {"key": "37276883", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 5.108331038661317e-08}, "ground_truth": 1}, {"key": "37276883", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 5.701793325985301e-08}, "ground_truth": 0}, {"key": "38509260", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, " Yes": 1.256213014199139e-07}, "ground_truth": 0}, {"key": "38509260", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 8.656346436050811e-08}, "ground_truth": 1}, {"key": "38509260", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.7590196748819334e-07}, "ground_truth": 0}, {"key": "37139607", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 2.0142490881392827e-05, "res": {"No": 0.999979303571174, "Yes": 2.0142490881392827e-05}, "ground_truth": 0}, {"key": "37139607", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.999848913294746, "res": {"Yes": 0.999848913294746, "No": 0.00015062624322160847}, "ground_truth": 0}, {"key": "37139607", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.1646803258579253e-07}, "ground_truth": 1}, {"key": "37139607", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.462342592304197e-08}, "ground_truth": 0}, {"key": "37139607", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.1146133327142541e-06, "res": {"No": 0.9999983759447187, "Yes": 1.1146133327142541e-06}, "ground_truth": 0}, {"key": "37092824", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 1.909721763647754e-06, "res": {"No": 0.9999973031140366, "Yes": 1.909721763647754e-06}, "ground_truth": 0}, {"key": "37092824", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 2.8073743484548565e-07}, "ground_truth": 0}, {"key": "37092824", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 2.9880193523878673e-08}, "ground_truth": 1}, {"key": "37092824", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.98380392985111e-08}, "ground_truth": 0}, {"key": "37092824", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 5.543952776768176e-08}, "ground_truth": 0}, {"key": "32191802", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 9.202274216554544e-07, "res": {"No": 0.9999981375378344, "Yes": 9.202274216554544e-07}, "ground_truth": 0}, {"key": "32191802", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999976607241361, "res": {"Yes": 0.9999976607241361, "No": 2.0557064454822748e-06}, "ground_truth": 1}, {"key": "32191802", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999771579518836, "res": {"Yes": 0.9999771579518836, "No": 2.2358768214316164e-05}, "ground_truth": 0}, {"key": "32191802", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.081836662339357e-08}, "ground_truth": 0}, {"key": "39396038", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 6.169344861247324e-07, "res": {"No": 0.9999988527586581, "Yes": 6.169344861247324e-07}, "ground_truth": 0}, {"key": "39396038", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 5.861442014979395e-07}, "ground_truth": 0}, {"key": "39396038", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 1.961196313267885e-07}, "ground_truth": 1}, {"key": "39396038", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.1767655916397621e-07}, "ground_truth": 0}, {"key": "39396038", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999947998470209, "res": {"Yes": 0.9999947998470209, "No": 4.481203093202439e-06}, "ground_truth": 0}, {"key": "39076884", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 2.356917350911323e-06, "res": {"No": 0.9999955150656573, "Yes": 2.356917350911323e-06}, "ground_truth": 0}, {"key": "39076884", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.99999861435166, "res": {"Yes": 0.99999861435166, "yes": 6.073767882484635e-07}, "ground_truth": 0}, {"key": "39076884", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 2.6071447342476265e-07}, "ground_truth": 1}, {"key": "39076884", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.524987179790307e-08}, "ground_truth": 0}, {"key": "39076884", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9977213810026287, "res": {"Yes": 0.9977213810026287, "No": 0.0022770876477443713}, "ground_truth": 0}, {"key": "27763432", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9997954077218552, "res": {"Yes": 0.9997954077218552, "No": 0.00020408969079147469}, "ground_truth": 0}, {"key": "27763432", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "yes": 3.202538097709646e-07}, "ground_truth": 0}, {"key": "27763432", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.859666812002338e-08}, "ground_truth": 1}, {"key": "27763432", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.745453645458537e-08}, "ground_truth": 0}, {"key": "27763432", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.1873579764062009e-07}, "ground_truth": 0}, {"key": "37806929", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.995982289681408, "res": {"Yes": 0.995982289681408, "No": 0.004017333117932732}, "ground_truth": 0}, {"key": "37806929", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.0889147440840161, "res": {"No": 0.9110844187400373, "Yes": 0.0889147440840161}, "ground_truth": 1}, {"key": "37806929", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999987335551019, "res": {"Yes": 0.9999987335551019, "No": 1.082080479491159e-06}, "ground_truth": 0}, {"key": "37806929", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999978991308068, "res": {"Yes": 0.9999978991308068, "No": 1.761226705392216e-06}, "ground_truth": 0}, {"key": "32334186", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999862172649827, "res": {"Yes": 0.9999862172649827, "No": 1.2232588242920441e-05}, "ground_truth": 0}, {"key": "32334186", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "yes": 4.855172594506762e-07}, "ground_truth": 0}, {"key": "32334186", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.117058406433518e-08}, "ground_truth": 1}, {"key": "32334186", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.184242538607526e-08}, "ground_truth": 0}, {"key": "32334186", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9932267907872819, "res": {"Yes": 0.9932267907872819, "No": 0.0067721003259677195}, "ground_truth": 0}, {"key": "36187324", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9996643557411417, "res": {"Yes": 0.9996643557411417, "No": 0.00033487206319244845}, "ground_truth": 0}, {"key": "36187324", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, "yes": 6.194096437659607e-07}, "ground_truth": 0}, {"key": "36187324", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999956342685299, "res": {"Yes": 0.9999956342685299, "No": 3.988317879656622e-06}, "ground_truth": 1}, {"key": "36187324", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999983759447187, "res": {"Yes": 0.9999983759447187, "yes": 9.954370002252112e-07}, "ground_truth": 0}, {"key": "36187324", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "yes": 3.792379471402427e-07}, "ground_truth": 0}, {"key": "35306009", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.00013398214676321735, "res": {"No": 0.9998635732369773, "Yes": 0.00013398214676321735}, "ground_truth": 0}, {"key": "35306009", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.99999861435166, "res": {"Yes": 0.99999861435166, "No": 6.751796725083755e-07}, "ground_truth": 0}, {"key": "35306009", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.8723073700207139e-06, "res": {"No": 0.9999961110815618, "Yes": 1.8723073700207139e-06}, "ground_truth": 0}, {"key": "39490050", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999734627301196, "res": {"Yes": 0.9999734627301196, "No": 2.6166724804983146e-05}, "ground_truth": 0}, {"key": "39490050", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.99999861435166, "res": {"Yes": 0.99999861435166, "No": 1.0523876741321408e-06}, "ground_truth": 0}, {"key": "39490050", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999595163694843, "res": {"Yes": 0.9999595163694843, "No": 4.0175869651149874e-05}, "ground_truth": 1}, {"key": "39490050", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.3213681154581322e-07}, "ground_truth": 0}, {"key": "39490050", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "yes": 1.9148813279209286e-07}, "ground_truth": 0}, {"key": "38072149", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999982567412194, "res": {"Yes": 0.9999982567412194, "No": 1.2025286618887868e-06}, "ground_truth": 0}, {"key": "38072149", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999107652316588, "res": {"Yes": 0.9999107652316588, "No": 8.85994418530551e-05}, "ground_truth": 0}, {"key": "38072149", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.8827849519116014e-08}, "ground_truth": 1}, {"key": "38072149", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.0513947867039667e-07}, "ground_truth": 0}, {"key": "38072149", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999980183344636, "res": {"Yes": 0.9999980183344636, "No": 1.6449657411704578e-06}, "ground_truth": 0}, {"key": "35899689", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.65530646374226e-08}, "ground_truth": 0}, {"key": "35899689", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 2.9649961796078928e-08}, "ground_truth": 1}, {"key": "35899689", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.981295518424899e-08}, "ground_truth": 0}, {"key": "35899689", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 7.024160764479468e-08}, "ground_truth": 0}, {"key": "27994518", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.0897679294979974, "res": {"No": 0.9102315511731441, "Yes": 0.0897679294979974}, "ground_truth": 0}, {"key": "27994518", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 2.3386448810510523e-07}, "ground_truth": 0}, {"key": "27994518", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 2.4035642598747764e-08}, "ground_truth": 1}, {"key": "27994518", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.091677252944027e-08}, "ground_truth": 0}, {"key": "27994518", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 2.315771154010566e-08}, "ground_truth": 0}, {"key": "10615479", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.03755353740559889, "res": {"No": 0.9624453564031277, "Yes": 0.03755353740559889}, "ground_truth": 0}, {"key": "10615479", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.031680475584867425, "res": {"No": 0.9683188843238361, "Yes": 0.031680475584867425}, "ground_truth": 0}, {"key": "10615479", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "yes": 5.273871120980301e-07}, "ground_truth": 1}, {"key": "10615479", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.0420872269982168e-07}, "ground_truth": 0}, {"key": "10615479", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.02563781676823245, "res": {"No": 0.9743619329102329, "Yes": 0.02563781676823245}, "ground_truth": 0}, {"key": "40186667", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.012189668869107911, "res": {"No": 0.9878098127310693, "Yes": 0.012189668869107911}, "ground_truth": 0}, {"key": "40186667", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.0304005920580846e-07}, "ground_truth": 0}, {"key": "40186667", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 6.361458938887649e-08}, "ground_truth": 1}, {"key": "40186667", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999592779711644, "res": {"Yes": 0.9999592779711644, "No": 3.959645357108404e-05}, "ground_truth": 0}, {"key": "40186667", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9882679712972812, "res": {"Yes": 0.9882679712972812, "No": 0.01173139731761552}, "ground_truth": 0}, {"key": "38622886", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 9.444293946516079e-08}, "ground_truth": 0}, {"key": "38622886", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.27950581193739e-08}, "ground_truth": 1}, {"key": "38622886", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 2.334205674358872e-07}, "ground_truth": 0}, {"key": "38622886", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 8.963687950786214e-08}, "ground_truth": 0}, {"key": "40686943", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 1.995318057534499e-05, "res": {"No": 0.9999795419732683, "Yes": 1.995318057534499e-05}, "ground_truth": 0}, {"key": "40686943", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 6.946635662047886e-05, "res": {"No": 0.9999300746848888, "Yes": 6.946635662047886e-05}, "ground_truth": 0}, {"key": "40686943", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "\"Yes": 1.811899502619819e-08}, "ground_truth": 1}, {"key": "40686943", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 2.428225500664464e-08}, "ground_truth": 0}, {"key": "40686943", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "\"Yes": 3.848951386092762e-08}, "ground_truth": 0}, {"key": "30604567", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999480733442354, "res": {"Yes": 0.9999480733442354, "No": 5.1693810662971404e-05}, "ground_truth": 0}, {"key": "30604567", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 8.697657564540569e-08}, "ground_truth": 0}, {"key": "30604567", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 1.65904932735571e-08}, "ground_truth": 1}, {"key": "30604567", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999938462231346, "res": {"Yes": 0.9999938462231346, "No": 5.955347515099547e-06}, "ground_truth": 0}, {"key": "30604567", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 2.7203614131217288e-08}, "ground_truth": 0}, {"key": "35440903", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.151498704705093e-08}, "ground_truth": 0}, {"key": "35440903", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.2743683587543872e-07}, "ground_truth": 0}, {"key": "35440903", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.7937881183683574e-07}, "ground_truth": 1}, {"key": "35440903", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 2.1087759295187034e-07}, "ground_truth": 0}, {"key": "35440903", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.004942280572300648, "res": {"No": 0.9950575761701992, "Yes": 0.004942280572300648}, "ground_truth": 0}, {"key": "37219533", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 7.705338998164001e-05, "res": {"No": 0.9999222078259681, "Yes": 7.705338998164001e-05}, "ground_truth": 0}, {"key": "37219533", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 2.6914501860159973e-07}, "ground_truth": 0}, {"key": "37219533", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.999979422771714, "res": {"Yes": 0.999979422771714, "No": 1.9645288880942377e-05}, "ground_truth": 1}, {"key": "37219533", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.17488520355380185, "res": {"No": 0.825113860040464, "Yes": 0.17488520355380185}, "ground_truth": 0}, {"key": "37219533", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9984111824091696, "res": {"Yes": 0.9984111824091696, "No": 0.0015883000271967233}, "ground_truth": 0}, {"key": "40178965", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.4452259136322321e-07}, "ground_truth": 1}, {"key": "40178965", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998349723485612, "res": {"Yes": 0.9998349723485612, "No": 0.00016356239821082054}, "ground_truth": 0}, {"key": "13750468", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 3.172086746459228e-07, "res": {"No": 0.9999993295729247, "Yes": 3.172086746459228e-07}, "ground_truth": 0}, {"key": "13750468", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 9.090268675108409e-05, "res": {"No": 0.999908619765428, "Yes": 9.090268675108409e-05}, "ground_truth": 0}, {"key": "13750468", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.785146992899775e-08}, "ground_truth": 1}, {"key": "13750468", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999981375378344, "res": {"Yes": 0.9999981375378344, "No": 1.6964088201934433e-06}, "ground_truth": 0}, {"key": "13750468", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.810596744160447e-08}, "ground_truth": 0}, {"key": "17754949", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.22634556473787776, "res": {"No": 0.7736534381441799, "Yes": 0.22634556473787776}, "ground_truth": 0}, {"key": "17754949", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.0185926085317244e-08}, "ground_truth": 0}, {"key": "17754949", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.076082106638187e-08}, "ground_truth": 1}, {"key": "17754949", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 8.407395573782086e-08}, "ground_truth": 0}, {"key": "17754949", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "yes": 2.981172414884039e-07}, "ground_truth": 0}, {"key": "36675623", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.001107094791216194, "res": {"No": 0.9988915939362947, "Yes": 0.001107094791216194}, "ground_truth": 0}, {"key": "36675623", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 5.419819243405388e-05, "res": {"No": 0.9999446166237158, "Yes": 5.419819243405388e-05}, "ground_truth": 0}, {"key": "36675623", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.0016310467375344943, "res": {"No": 0.9983685231077084, "Yes": 0.0016310467375344943}, "ground_truth": 1}, {"key": "36675623", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.007043061719620888, "res": {"No": 0.9929555661675771, "Yes": 0.007043061719620888}, "ground_truth": 0}, {"key": "36675623", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.00022792202115215325, "res": {"No": 0.9997712182745312, "Yes": 0.00022792202115215325}, "ground_truth": 0}, {"key": "40035440", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9992221253514705, "res": {"Yes": 0.9992221253514705, "No": 0.0007774134021387008}, "ground_truth": 1}, {"key": "40035440", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9997957652387589, "res": {"Yes": 0.9997957652387589, "No": 0.0002032182567673237}, "ground_truth": 0}, {"key": "37685909", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999987335551019, "res": {"Yes": 0.9999987335551019, "No": 8.857899422031894e-07}, "ground_truth": 0}, {"key": "37685909", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 2.123155781356396e-06, "res": {"No": 0.9999958726752174, "Yes": 2.123155781356396e-06}, "ground_truth": 0}, {"key": "37685909", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.815278807396222e-08}, "ground_truth": 1}, {"key": "37685909", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 9.246672378646869e-08}, "ground_truth": 0}, {"key": "37685909", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.799912765777593e-08}, "ground_truth": 0}, {"key": "36938787", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.00014634217704443116, "res": {"No": 0.9998530848030946, "Yes": 0.00014634217704443116}, "ground_truth": 0}, {"key": "36938787", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.553623820604529e-07}, "ground_truth": 0}, {"key": "36938787", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.563316193544012e-08}, "ground_truth": 1}, {"key": "36938787", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.577972956863698e-08}, "ground_truth": 0}, {"key": "36938787", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.174881820512558e-08}, "ground_truth": 0}, {"key": "39398068", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.2043459653366354e-08}, "ground_truth": 0}, {"key": "39398068", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.0022200024044787482, "res": {"No": 0.9977797694660498, "Yes": 0.0022200024044787482}, "ground_truth": 0}, {"key": "39398068", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.927239430034699e-08}, "ground_truth": 1}, {"key": "39398068", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 2.99575637008532e-08}, "ground_truth": 0}, {"key": "39398068", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.6583520299611516e-08}, "ground_truth": 0}, {"key": "39926408", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 6.050159594544151e-07, "res": {"No": 0.9999989719621284, "Yes": 6.050159594544151e-07}, "ground_truth": 0}, {"key": "39926408", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 2.613401555980695e-07}, "ground_truth": 0}, {"key": "39926408", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 5.383134061912148e-08}, "ground_truth": 1}, {"key": "39926408", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.230588209249531e-08}, "ground_truth": 0}, {"key": "39926408", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 2.9636474132137984e-08}, "ground_truth": 0}, {"key": "40465336", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9963977599898108, "res": {"Yes": 0.9963977599898108, "No": 0.0036016961201056144}, "ground_truth": 0}, {"key": "40465336", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999932502087799, "res": {"Yes": 0.9999932502087799, "No": 6.558340234860779e-06}, "ground_truth": 1}, {"key": "40465336", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.289773587277072e-08}, "ground_truth": 0}, {"key": "40465336", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 2.4794871487979787e-08}, "ground_truth": 0}, {"key": "34173549", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.0074101373802588745, "res": {"No": 0.9925896917325902, "Yes": 0.0074101373802588745}, "ground_truth": 0}, {"key": "34173549", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 2.932318942537698e-06, "res": {"No": 0.9999967070975216, "Yes": 2.932318942537698e-06}, "ground_truth": 0}, {"key": "34173549", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.411962599280163e-07}, "ground_truth": 1}, {"key": "34173549", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.251370651548239e-08}, "ground_truth": 0}, {"key": "34173549", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.6904261399368163e-07}, "ground_truth": 0}, {"key": "33541535", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999467621731833, "res": {"Yes": 0.9999467621731833, "No": 5.2527590136385235e-05}, "ground_truth": 0}, {"key": "33541535", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9712104955013191, "res": {"Yes": 0.9712104955013191, "No": 0.028788421613036762}, "ground_truth": 0}, {"key": "33541535", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999829988145218, "res": {"Yes": 0.9999829988145218, "No": 1.66260552458853e-05}, "ground_truth": 1}, {"key": "33541535", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.169866513129353e-07}, "ground_truth": 0}, {"key": "33541535", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 9.608381052906414e-08}, "ground_truth": 0}, {"key": "35685195", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.32943408189504103, "res": {"No": 0.6705640869451472, "Yes": 0.32943408189504103}, "ground_truth": 0}, {"key": "35685195", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999983759447187, "res": {"Yes": 0.9999983759447187, "yes": 8.943407341412242e-07}, "ground_truth": 0}, {"key": "35685195", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.756729420687566e-08}, "ground_truth": 1}, {"key": "35685195", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.0181127322623933e-07}, "ground_truth": 0}, {"key": "35685195", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 1.608970689030432e-07}, "ground_truth": 0}, {"key": "28440730", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9998647651150615, "res": {"Yes": 0.9998647651150615, "No": 0.0001345065044323326}, "ground_truth": 0}, {"key": "28440730", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 1.9794039102529673e-08}, "ground_truth": 1}, {"key": "28440730", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 5.0368694284926367e-08}, "ground_truth": 0}, {"key": "28440730", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.012061755951754e-08}, "ground_truth": 0}, {"key": "38338714", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 1.0783503988325394e-05, "res": {"No": 0.999988005296937, "Yes": 1.0783503988325394e-05}, "ground_truth": 0}, {"key": "38338714", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.0, "res": {"Yes": 1.0, "No": 1.705560411714695e-08}, "ground_truth": 0}, {"key": "38338714", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 1.3497693742293293e-08}, "ground_truth": 1}, {"key": "38338714", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 5.95203241718949e-08}, "ground_truth": 0}, {"key": "38338714", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.00018288189426581285, "res": {"No": 0.9998168602121388, "Yes": 0.00018288189426581285}, "ground_truth": 0}, {"key": "32191881", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.030091197254974344, "res": {"No": 0.9699083805373088, "Yes": 0.030091197254974344}, "ground_truth": 0}, {"key": "32191881", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 6.040795151965407e-07}, "ground_truth": 0}, {"key": "32191881", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.618804363887564e-08}, "ground_truth": 1}, {"key": "32191881", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 5.707469433739972e-08}, "ground_truth": 0}, {"key": "32191881", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.12738740169697e-08}, "ground_truth": 0}, {"key": "37707251", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 9.97645241773371e-06, "res": {"No": 0.9999897933310884, "Yes": 9.97645241773371e-06}, "ground_truth": 0}, {"key": "37707251", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 9.27334117973748e-08}, "ground_truth": 1}, {"key": "37707251", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.1009414208911879e-07}, "ground_truth": 0}, {"key": "37707251", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.08807464163727e-08}, "ground_truth": 0}, {"key": "40172567", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9968034041266044, "res": {"Yes": 0.9968034041266044, "No": 0.003194930321492487}, "ground_truth": 0}, {"key": "40172567", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.99998752848777, "res": {"Yes": 0.99998752848777, "No": 1.1900037578436132e-05}, "ground_truth": 0}, {"key": "40172567", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 5.251820319085743e-08}, "ground_truth": 1}, {"key": "40172567", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.0000855322443525e-08}, "ground_truth": 0}, {"key": "40172567", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 8.78149568014158e-08}, "ground_truth": 0}, {"key": "33113255", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.002228297807512871, "res": {"No": 0.9977712278093387, "Yes": 0.002228297807512871}, "ground_truth": 0}, {"key": "33113255", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 5.715471202346735e-08}, "ground_truth": 0}, {"key": "33113255", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.052715669159088e-08}, "ground_truth": 1}, {"key": "33113255", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.1284428245095635e-07}, "ground_truth": 0}, {"key": "33113255", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "yes": 4.39975849077834e-07}, "ground_truth": 0}, {"key": "33022143", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.17574227565695727, "res": {"No": 0.8242568058853201, "Yes": 0.17574227565695727}, "ground_truth": 0}, {"key": "33022143", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.999995276659155, "res": {"Yes": 0.999995276659155, "No": 3.370268982566106e-06}, "ground_truth": 0}, {"key": "33022143", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 8.87140110613367e-08}, "ground_truth": 1}, {"key": "33022143", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.4899188434016977e-07}, "ground_truth": 0}, {"key": "33022143", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999982567412194, "res": {"Yes": 0.9999982567412194, "No": 5.26849978346926e-07}, "ground_truth": 0}, {"key": "32084473", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.0027512663015446066, "res": {"No": 0.997246943928688, "Yes": 0.0027512663015446066}, "ground_truth": 0}, {"key": "32084473", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 2.148119162126723e-07}, "ground_truth": 0}, {"key": "32084473", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 9.238067732746518e-08}, "ground_truth": 1}, {"key": "32084473", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.0651670602652426e-07}, "ground_truth": 0}, {"key": "32084473", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.2890566231482687e-07}, "ground_truth": 0}, {"key": "40564245", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 6.517301858920082e-05, "res": {"No": 0.9999338889494318, "Yes": 6.517301858920082e-05}, "ground_truth": 0}, {"key": "40564245", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999987335551019, "res": {"Yes": 0.9999987335551019, "No": 6.494402492913743e-07}, "ground_truth": 0}, {"key": "40564245", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 7.449163498094303e-08}, "ground_truth": 1}, {"key": "40564245", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.362322073755201e-07}, "ground_truth": 0}, {"key": "40564245", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, " Yes": 3.900066682078663e-07}, "ground_truth": 0}, {"key": "31717213", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 1.8803624634470743e-06, "res": {"No": 0.9999977799274644, "Yes": 1.8803624634470743e-06}, "ground_truth": 0}, {"key": "31717213", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9601696013992612, "res": {"Yes": 0.9601696013992612, "No": 0.039829927580705145}, "ground_truth": 0}, {"key": "31717213", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.306237196002642e-08}, "ground_truth": 1}, {"key": "31717213", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.503502699308127e-08}, "ground_truth": 0}, {"key": "31717213", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "yes": 3.017436563670078e-07}, "ground_truth": 0}, {"key": "34861894", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9996144457451517, "res": {"Yes": 0.9996144457451517, "No": 0.0003849809905847601}, "ground_truth": 0}, {"key": "34861894", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "yes": 4.242690631822862e-07}, "ground_truth": 0}, {"key": "34861894", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.3437673856366172e-07}, "ground_truth": 1}, {"key": "34861894", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "\"Yes": 8.40805978426838e-08}, "ground_truth": 0}, {"key": "34861894", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 5.086360564224887e-06, "res": {"No": 0.9999943230348141, "Yes": 5.086360564224887e-06}, "ground_truth": 0}, {"key": "40838760", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.07684585858805376, "res": {"No": 0.9231520162913983, "Yes": 0.07684585858805376}, "ground_truth": 0}, {"key": "40838760", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "yes": 1.9995949818122201e-07}, "ground_truth": 0}, {"key": "40838760", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "\"Yes": 2.1957477540633948e-07}, "ground_truth": 1}, {"key": "40838760", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.4281210810274212e-07}, "ground_truth": 0}, {"key": "40838760", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, "yes": 3.3438293123370835e-07}, "ground_truth": 0}, {"key": "40044849", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.99999861435166, "res": {"Yes": 0.99999861435166, "No": 7.235291510352267e-07}, "ground_truth": 0}, {"key": "40044849", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.83605454260409e-08}, "ground_truth": 1}, {"key": "40044849", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.576585856936734e-08}, "ground_truth": 0}, {"key": "40044849", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.7503707995680416e-07}, "ground_truth": 0}, {"key": "30296116", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.036824606587128036, "res": {"No": 0.9631748323246573, "Yes": 0.036824606587128036}, "ground_truth": 0}, {"key": "30296116", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "yes": 3.03455457495532e-07}, "ground_truth": 0}, {"key": "30296116", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.5032668665908578e-07}, "ground_truth": 1}, {"key": "30296116", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.805459360872681e-08}, "ground_truth": 0}, {"key": "30296116", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "No": 1.4258236560103695e-07}, "ground_truth": 0}, {"key": "34931360", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, "No": 6.853542356604625e-07}, "ground_truth": 0}, {"key": "34931360", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.767679467220691e-07}, "ground_truth": 1}, {"key": "34931360", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.3298759283912838e-07}, "ground_truth": 0}, {"key": "34931360", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.0008615211087284058, "res": {"No": 0.9991379833999612, "Yes": 0.0008615211087284058}, "ground_truth": 0}, {"key": "18862422", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999449742139741, "res": {"Yes": 0.9999449742139741, "No": 5.446817346462173e-05}, "ground_truth": 0}, {"key": "18862422", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.1472475640070977e-07}, "ground_truth": 1}, {"key": "18862422", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 2.719322180638873e-07}, "ground_truth": 0}, {"key": "18862422", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 3.048995374524772e-07}, "ground_truth": 0}, {"key": "36361140", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "No": 3.835846539812512e-07}, "ground_truth": 0}, {"key": "36361140", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 9.185209781340268e-06, "res": {"No": 0.9999903893441826, "Yes": 9.185209781340268e-06}, "ground_truth": 0}, {"key": "36361140", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.184785387936146e-07}, "ground_truth": 1}, {"key": "36361140", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "No": 5.538366772893337e-07}, "ground_truth": 0}, {"key": "36361140", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999804955832136, "res": {"Yes": 0.9999804955832136, "No": 1.9179472336860705e-05}, "ground_truth": 0}, {"key": "39703329", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9949990332818962, "res": {"Yes": 0.9949990332818962, "No": 0.005000321843098038}, "ground_truth": 0}, {"key": "39703329", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.579588121567418e-08}, "ground_truth": 1}, {"key": "39703329", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999615427524989, "res": {"Yes": 0.9999615427524989, "No": 3.728341381681191e-05}, "ground_truth": 0}, {"key": "39703329", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 8.425971186462087e-08}, "ground_truth": 0}, {"key": "34033324", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.00010080203641513234, "res": {"No": 0.9998982500468776, "Yes": 0.00010080203641513234}, "ground_truth": 0}, {"key": "34033324", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9365003671998432, "res": {"Yes": 0.9365003671998432, "No": 0.06349886800089173}, "ground_truth": 0}, {"key": "34033324", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.4760088892643434, "res": {"No": 0.5239894387088473, "Yes": 0.4760088892643434}, "ground_truth": 1}, {"key": "34033324", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, "yes": 5.388724154965543e-07}, "ground_truth": 0}, {"key": "34033324", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999976607241361, "res": {"Yes": 0.9999976607241361, "No": 2.0051213005666467e-06}, "ground_truth": 0}, {"key": "35658862", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 1.0812291166215855e-05, "res": {"No": 0.9999872900832717, "Yes": 1.0812291166215855e-05}, "ground_truth": 0}, {"key": "35658862", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 2.674695791181757e-07}, "ground_truth": 0}, {"key": "35658862", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 2.273629949415205e-07}, "ground_truth": 1}, {"key": "35658862", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999980183344636, "res": {"Yes": 0.9999980183344636, "yes": 8.329685088466495e-07}, "ground_truth": 0}, {"key": "35658862", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.0008660257422476e-07}, "ground_truth": 0}, {"key": "36092657", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.00017000376059697573, "res": {"No": 0.9998293707088336, "Yes": 0.00017000376059697573}, "ground_truth": 0}, {"key": "36092657", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999987335551019, "res": {"Yes": 0.9999987335551019, "No": 5.1660101061765e-07}, "ground_truth": 0}, {"key": "36092657", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999596355646657, "res": {"Yes": 0.9999596355646657, "No": 3.804412072689582e-05}, "ground_truth": 1}, {"key": "36092657", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999933694113825, "res": {"Yes": 0.9999933694113825, "No": 3.1345717377163174e-06}, "ground_truth": 0}, {"key": "26333438", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999982567412194, "res": {"Yes": 0.9999982567412194, "No": 1.5286134122056187e-06}, "ground_truth": 0}, {"key": "26333438", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.0019114703812078274, "res": {"No": 0.9980884254902688, "Yes": 0.0019114703812078274}, "ground_truth": 0}, {"key": "26333438", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.764634094671842e-08}, "ground_truth": 1}, {"key": "26333438", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 2.050567637080159e-07}, "ground_truth": 0}, {"key": "26333438", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.999844149545944, "res": {"Yes": 0.999844149545944, "No": 0.00015530649599469152}, "ground_truth": 0}, {"key": "34184963", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9907759311515059, "res": {"Yes": 0.9907759311515059, "No": 0.009223103434021458}, "ground_truth": 0}, {"key": "34184963", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.112152920080953e-08}, "ground_truth": 0}, {"key": "34184963", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.138066889775487e-08}, "ground_truth": 1}, {"key": "34184963", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.1701543356923073e-07}, "ground_truth": 0}, {"key": "34184963", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.103177788861791e-08}, "ground_truth": 0}, {"key": "35069975", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.139022742062878e-07}, "ground_truth": 0}, {"key": "35069975", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.29806413425842376, "res": {"No": 0.7019356821659479, "Yes": 0.29806413425842376}, "ground_truth": 0}, {"key": "35069975", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 8.712830801309999e-08}, "ground_truth": 1}, {"key": "35069975", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.6021070208786507e-07}, "ground_truth": 0}, {"key": "35069975", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.7439033191291481, "res": {"Yes": 0.7439033191291481, "No": 0.2560955402562336}, "ground_truth": 0}, {"key": "36443950", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.00244065093162273, "res": {"No": 0.997558484758526, "Yes": 0.00244065093162273}, "ground_truth": 0}, {"key": "36443950", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 2.8144941797391557e-06, "res": {"No": 0.999996945503965, "Yes": 2.8144941797391557e-06}, "ground_truth": 0}, {"key": "36443950", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.095946163411099e-08}, "ground_truth": 1}, {"key": "36443950", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, "yes": 3.742390771666673e-07}, "ground_truth": 0}, {"key": "36443950", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9967737904344911, "res": {"Yes": 0.9967737904344911, "No": 0.0032249430106719825}, "ground_truth": 0}, {"key": "29460858", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 1.3071215962948668e-05, "res": {"No": 0.9999866940725246, "Yes": 1.3071215962948668e-05}, "ground_truth": 0}, {"key": "29460858", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9653343287321388, "res": {"Yes": 0.9653343287321388, "No": 0.03466539679823303}, "ground_truth": 0}, {"key": "29460858", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.349890811375595e-08}, "ground_truth": 1}, {"key": "29460858", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 9.01257427539452e-08}, "ground_truth": 0}, {"key": "29460858", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "No": 7.316643934545153e-08}, "ground_truth": 0}, {"key": "36155704", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999415175041678, "res": {"Yes": 0.9999415175041678, "No": 5.7918251415039365e-05}, "ground_truth": 0}, {"key": "36155704", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.999999091165773, "res": {"Yes": 0.999999091165773, "No": 5.839724674540474e-07}, "ground_truth": 0}, {"key": "36155704", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.565658480962546e-08}, "ground_truth": 1}, {"key": "36155704", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.942981758522609e-08}, "ground_truth": 0}, {"key": "36155704", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.2959015764621276e-07}, "ground_truth": 0}, {"key": "37185211", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 1.922436640389849e-05, "res": {"No": 0.9999776347571058, "Yes": 1.922436640389849e-05}, "ground_truth": 0}, {"key": "37185211", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9788710775686484, "res": {"Yes": 0.9788710775686484, "No": 0.02112760921985775}, "ground_truth": 0}, {"key": "37185211", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999987335551019, "res": {"Yes": 0.9999987335551019, "No": 7.556171533113794e-07}, "ground_truth": 1}, {"key": "37185211", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.5293176430033186e-07}, "ground_truth": 0}, {"key": "37185211", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 1.8660943259468887e-07}, "ground_truth": 0}, {"key": "36454885", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9872713078644232, "res": {"Yes": 0.9872713078644232, "No": 0.012727826746964144}, "ground_truth": 0}, {"key": "36454885", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9992622336834982, "res": {"Yes": 0.9992622336834982, "No": 0.0007352796903842597}, "ground_truth": 0}, {"key": "36454885", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.3634689502608783e-07}, "ground_truth": 1}, {"key": "36454885", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.1896564760597134e-08}, "ground_truth": 0}, {"key": "36454885", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, "yes": 2.995432567310566e-07}, "ground_truth": 0}, {"key": "33148906", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 2.3778926421196572e-07}, "ground_truth": 0}, {"key": "33148906", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.503172163761432e-07}, "ground_truth": 0}, {"key": "33148906", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.0093844109813971e-07}, "ground_truth": 1}, {"key": "33148906", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 9.409574586857739e-08}, "ground_truth": 0}, {"key": "33148906", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.774182823391241e-08}, "ground_truth": 0}, {"key": "18086604", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.618995212856689, "res": {"Yes": 0.618995212856689, "No": 0.3810043448936954}, "ground_truth": 0}, {"key": "18086604", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.646664934799019e-08}, "ground_truth": 0}, {"key": "18086604", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.665320366570592e-08}, "ground_truth": 1}, {"key": "18086604", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9998970581288781, "res": {"Yes": 0.9998970581288781, "No": 0.00010231073029889868}, "ground_truth": 0}, {"key": "18086604", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.6007842268685402e-07}, "ground_truth": 0}, {"key": "33693397", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 2.049831234178005e-05, "res": {"No": 0.9999791843696483, "Yes": 2.049831234178005e-05}, "ground_truth": 0}, {"key": "33693397", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.007340348191109327, "res": {"No": 0.9926593451876464, "Yes": 0.007340348191109327}, "ground_truth": 0}, {"key": "33693397", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999956342685299, "res": {"Yes": 0.9999956342685299, "No": 3.907424877670052e-06}, "ground_truth": 1}, {"key": "33693397", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 8.002999948663311e-05, "res": {"No": 0.999919585553415, "Yes": 8.002999948663311e-05}, "ground_truth": 0}, {"key": "33693397", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 7.427538843027229e-06, "res": {"No": 0.9999920581810364, "Yes": 7.427538843027229e-06}, "ground_truth": 0}, {"key": "39501530", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.0019862993392879323, "res": {"No": 0.9980120807340495, "Yes": 0.0019862993392879323}, "ground_truth": 0}, {"key": "39501530", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.03054049411651021, "res": {"No": 0.9694578463772391, "Yes": 0.03054049411651021}, "ground_truth": 0}, {"key": "39501530", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.5212872537533026e-08}, "ground_truth": 1}, {"key": "39501530", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.262282554378053e-08}, "ground_truth": 0}, {"key": "39501530", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.864984243006212e-08}, "ground_truth": 0}, {"key": "30948874", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.8600809639826461, "res": {"Yes": 0.8600809639826461, "No": 0.13991834171601572}, "ground_truth": 0}, {"key": "30948874", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9993757994942419, "res": {"Yes": 0.9993757994942419, "No": 0.0006235703303046513}, "ground_truth": 0}, {"key": "30948874", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.050874118194424e-08}, "ground_truth": 1}, {"key": "30948874", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 5.5662008296267086e-08}, "ground_truth": 0}, {"key": "30948874", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 7.778447311787841e-08}, "ground_truth": 0}, {"key": "39410675", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.0001872112311988689, "res": {"No": 0.9998114970389163, "Yes": 0.0001872112311988689}, "ground_truth": 0}, {"key": "39410675", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "yes": 1.9114796938686854e-07}, "ground_truth": 0}, {"key": "39410675", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 7.518508225033936e-08}, "ground_truth": 1}, {"key": "39410675", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999975415208221, "res": {"Yes": 0.9999975415208221, "No": 8.693700222066057e-07}, "ground_truth": 0}, {"key": "39410675", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.4595857330316953e-07}, "ground_truth": 0}, {"key": "32903337", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 8.683578766191717e-08}, "ground_truth": 0}, {"key": "32903337", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 8.041822559065899e-08}, "ground_truth": 1}, {"key": "32903337", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, "yes": 6.334397172252205e-07}, "ground_truth": 0}, {"key": "32903337", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 2.262837713205885e-07}, "ground_truth": 0}, {"key": "27685132", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9996701911189102, "res": {"Yes": 0.9996701911189102, "No": 0.0003289578272716165}, "ground_truth": 1}, {"key": "27685132", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.5180180337212816e-08}, "ground_truth": 0}, {"key": "27685132", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.2514447087858065e-07}, "ground_truth": 0}, {"key": "22791471", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.0012493285899660774, "res": {"No": 0.9987505462181749, "Yes": 0.0012493285899660774}, "ground_truth": 0}, {"key": "22791471", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 8.164118370733053e-08}, "ground_truth": 1}, {"key": "22791471", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.7106021456893365e-08}, "ground_truth": 0}, {"key": "22791471", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.2879456955718495e-05, "res": {"No": 0.999985978860297, "Yes": 1.2879456955718495e-05}, "ground_truth": 0}, {"key": "32292348", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.4264825387296358e-07}, "ground_truth": 0}, {"key": "32292348", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.9157175949658474e-08}, "ground_truth": 1}, {"key": "32292348", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.731814654594964e-08}, "ground_truth": 0}, {"key": "32292348", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999011106398752, "res": {"Yes": 0.9999011106398752, "No": 9.811316505894816e-05}, "ground_truth": 0}, {"key": "20482930", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.0019036332156680727, "res": {"No": 0.9980960290566653, "Yes": 0.0019036332156680727}, "ground_truth": 0}, {"key": "20482930", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.646623115002786e-08}, "ground_truth": 0}, {"key": "20482930", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.2832085882658953e-07}, "ground_truth": 1}, {"key": "20482930", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.68386093535153e-08}, "ground_truth": 0}, {"key": "20482930", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.8920514940864586e-08}, "ground_truth": 0}, {"key": "11635754", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 7.702467591519675e-08}, "ground_truth": 0}, {"key": "11635754", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.326161148879786e-08}, "ground_truth": 1}, {"key": "11635754", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.272126633758837e-08}, "ground_truth": 0}, {"key": "11635754", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.577946756833257e-08}, "ground_truth": 0}, {"key": "40029096", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.0005782410700690419, "res": {"No": 0.9994212786238518, "Yes": 0.0005782410700690419}, "ground_truth": 0}, {"key": "40029096", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999987335551019, "res": {"Yes": 0.9999987335551019, "yes": 6.376253192610343e-07}, "ground_truth": 0}, {"key": "40029096", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 6.20937808726676e-08}, "ground_truth": 1}, {"key": "40029096", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "yes": 3.301406701937992e-07}, "ground_truth": 0}, {"key": "40029096", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 7.284718107317406e-08}, "ground_truth": 0}, {"key": "40414719", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.5721275631305077, "res": {"Yes": 0.5721275631305077, "No": 0.4278716121555509}, "ground_truth": 0}, {"key": "40414719", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9983320392952012, "res": {"Yes": 0.9983320392952012, "No": 0.0016672268528150438}, "ground_truth": 0}, {"key": "40414719", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, "No": 6.598561369526236e-07}, "ground_truth": 1}, {"key": "40414719", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "No": 2.311092025080494e-07}, "ground_truth": 0}, {"key": "40414719", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.1740456441807325e-08}, "ground_truth": 0}, {"key": "39537616", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, "yes": 4.5919586351796346e-07}, "ground_truth": 0}, {"key": "39537616", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.3635322009863056, "res": {"No": 0.6364655096262543, "Yes": 0.3635322009863056}, "ground_truth": 1}, {"key": "39537616", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "\"Yes": 1.0543491434995812e-07}, "ground_truth": 0}, {"key": "39537616", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.1532447749024889, "res": {"No": 0.8467537911489308, "Yes": 0.1532447749024889}, "ground_truth": 0}, {"key": "33245830", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999989719621284, "res": {"Yes": 0.9999989719621284, " Yes": 3.17392709042033e-07}, "ground_truth": 0}, {"key": "33245830", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.2987868688240223e-07}, "ground_truth": 1}, {"key": "33245830", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.8559369278707504e-07}, "ground_truth": 0}, {"key": "33245830", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 2.037739214172554e-05, "res": {"No": 0.9999756083404814, "Yes": 2.037739214172554e-05}, "ground_truth": 0}, {"key": "39243601", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999982567412194, "res": {"Yes": 0.9999982567412194, "No": 1.4812666375867108e-06}, "ground_truth": 0}, {"key": "39243601", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 5.334503116965193e-08}, "ground_truth": 1}, {"key": "39243601", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 6.724010277009772e-08}, "ground_truth": 0}, {"key": "39243601", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.8588559130205975e-08}, "ground_truth": 0}, {"key": "35815905", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 1.9260698083422236e-06, "res": {"No": 0.9999975415208221, "Yes": 1.9260698083422236e-06}, "ground_truth": 0}, {"key": "35815905", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999824028078323, "res": {"Yes": 0.9999824028078323, "No": 1.7293376345091225e-05}, "ground_truth": 0}, {"key": "35815905", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 7.199501186378182e-08}, "ground_truth": 1}, {"key": "35815905", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 1.960741550836123e-08}, "ground_truth": 0}, {"key": "35815905", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999978991308068, "res": {"Yes": 0.9999978991308068, "No": 1.9550750040889157e-06}, "ground_truth": 0}, {"key": "35260212", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9744442124800454, "res": {"Yes": 0.9744442124800454, "No": 0.025552983451767427}, "ground_truth": 0}, {"key": "35260212", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.0916732936920887e-07}, "ground_truth": 1}, {"key": "35260212", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999807339855921, "res": {"Yes": 0.9999807339855921, "No": 1.8312493978612694e-05}, "ground_truth": 0}, {"key": "35260212", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "yes": 4.061984514873632e-07}, "ground_truth": 0}, {"key": "39193924", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 1.7182459345405163e-05, "res": {"No": 0.9999820452021894, "Yes": 1.7182459345405163e-05}, "ground_truth": 0}, {"key": "39193924", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 2.0327014073922482e-07}, "ground_truth": 0}, {"key": "39193924", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.174667623318012e-08}, "ground_truth": 1}, {"key": "39193924", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 1.0706893045915304e-07}, "ground_truth": 0}, {"key": "39193924", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 9.74070740582959e-08}, "ground_truth": 0}, {"key": "40658569", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 6.498234716660484e-06, "res": {"No": 0.9999928926002577, "Yes": 6.498234716660484e-06}, "ground_truth": 0}, {"key": "40658569", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.603334468512567e-08}, "ground_truth": 0}, {"key": "40658569", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 2.191987669742261e-08}, "ground_truth": 1}, {"key": "40658569", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 2.502015842483087e-08}, "ground_truth": 0}, {"key": "40658569", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999978991308068, "res": {"Yes": 0.9999978991308068, "No": 1.774088124001117e-06}, "ground_truth": 0}, {"key": "33497596", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9949799139621675, "res": {"Yes": 0.9949799139621675, "No": 0.00501968046346818}, "ground_truth": 0}, {"key": "33497596", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.999994561441089, "res": {"Yes": 0.999994561441089, "No": 4.8962693498535785e-06}, "ground_truth": 0}, {"key": "33497596", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999978991308068, "res": {"Yes": 0.9999978991308068, "No": 1.515127445649683e-06}, "ground_truth": 1}, {"key": "33497596", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 7.536717511683098e-08}, "ground_truth": 0}, {"key": "33497596", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999963494876631, "res": {"Yes": 0.9999963494876631, "No": 3.1788088716629554e-06}, "ground_truth": 0}, {"key": "40339241", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.7667387487836778, "res": {"Yes": 0.7667387487836778, "No": 0.23325943264687835}, "ground_truth": 0}, {"key": "40339241", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.0509501407584369e-07}, "ground_truth": 0}, {"key": "40339241", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 8.848250394620241e-08}, "ground_truth": 1}, {"key": "40339241", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9997195153831671, "res": {"Yes": 0.9997195153831671, "No": 0.00027950678225295556}, "ground_truth": 0}, {"key": "40339241", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 1.70897307803575e-07}, "ground_truth": 0}, {"key": "31792608", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999357960811598, "res": {"Yes": 0.9999357960811598, "No": 6.362201498185391e-05}, "ground_truth": 0}, {"key": "31792608", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.788373269105936e-08}, "ground_truth": 0}, {"key": "31792608", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 4.128659359365012e-08}, "ground_truth": 1}, {"key": "31792608", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 2.3598233014142858e-08}, "ground_truth": 0}, {"key": "31792608", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.2083787394208974e-08}, "ground_truth": 0}, {"key": "33132662", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.893870865428435e-08}, "ground_truth": 0}, {"key": "33132662", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999871708812939, "res": {"Yes": 0.9999871708812939, "No": 1.231944068295828e-05}, "ground_truth": 0}, {"key": "33132662", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.2229006894041177e-07}, "ground_truth": 1}, {"key": "33132662", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, " Yes": 1.0095227061187008e-07}, "ground_truth": 0}, {"key": "33132662", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.07758612262014326, "res": {"No": 0.9224135317324255, "Yes": 0.07758612262014326}, "ground_truth": 0}, {"key": "37577457", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 1.8712088221122145e-07}, "ground_truth": 0}, {"key": "37577457", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999996871837189, "res": {"Yes": 0.9999996871837189, "yes": 6.28006759496955e-08}, "ground_truth": 1}, {"key": "37577457", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999953958625991, "res": {"Yes": 0.9999953958625991, "No": 3.6267812900199737e-06}, "ground_truth": 0}, {"key": "37577457", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, " Yes": 3.4694381032872937e-07}, "ground_truth": 0}, {"key": "38701278", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 1.6036198257244396e-05, "res": {"No": 0.9999837140256179, "Yes": 1.6036198257244396e-05}, "ground_truth": 0}, {"key": "38701278", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.40349604219771645, "res": {"No": 0.5965036788917899, "Yes": 0.40349604219771645}, "ground_truth": 0}, {"key": "38701278", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.3060256035931206e-08}, "ground_truth": 1}, {"key": "38701278", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.812946805399432e-08}, "ground_truth": 0}, {"key": "38701278", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 3.8907959222072475e-08}, "ground_truth": 0}, {"key": "34570783", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 8.938354081047585e-05, "res": {"No": 0.999909692497968, "Yes": 8.938354081047585e-05}, "ground_truth": 0}, {"key": "34570783", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999937270200753, "res": {"Yes": 0.9999937270200753, "No": 5.167601001792402e-06}, "ground_truth": 0}, {"key": "34570783", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "yes": 1.8973167495718432e-07}, "ground_truth": 1}, {"key": "34570783", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999984951481323, "res": {"Yes": 0.9999984951481323, "No": 1.0722385415145933e-06}, "ground_truth": 0}, {"key": "34570783", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9993334160258753, "res": {"Yes": 0.9993334160258753, "No": 0.0006657304493867277}, "ground_truth": 0}, {"key": "39064526", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9987273637891741, "res": {"Yes": 0.9987273637891741, "No": 0.0012715312450832126}, "ground_truth": 0}, {"key": "39064526", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.5014760400305553e-07}, "ground_truth": 0}, {"key": "39064526", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999994487765019, "res": {"Yes": 0.9999994487765019, "yes": 3.2557336792677835e-07}, "ground_truth": 1}, {"key": "39064526", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 9.078006780579023e-07, "res": {"No": 0.9999981375378344, "Yes": 9.078006780579023e-07}, "ground_truth": 0}, {"key": "39064526", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999992103693117, "res": {"Yes": 0.9999992103693117, "yes": 4.5122487839311874e-07}, "ground_truth": 0}, {"key": "40741545", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999860980626328, "res": {"Yes": 0.9999860980626328, "No": 1.2973289085449935e-05}, "ground_truth": 0}, {"key": "40741545", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 5.1712567493072734e-08}, "ground_truth": 0}, {"key": "40741545", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 8.097818820147558e-08}, "ground_truth": 1}, {"key": "40741545", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, " Yes": 1.3978965618179781e-07}, "ground_truth": 0}, {"key": "40741545", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999870516788303, "res": {"Yes": 0.9999870516788303, "No": 1.254431478755412e-05}, "ground_truth": 0}, {"key": "36929751", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.0018227363827918554, "res": {"No": 0.9981770171554102, "Yes": 0.0018227363827918554}, "ground_truth": 0}, {"key": "36929751", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 1.1456035900113831e-07}, "ground_truth": 0}, {"key": "36929751", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 3.160119185924812e-08}, "ground_truth": 1}, {"key": "36929751", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, "yes": 2.5179189426801635e-08}, "ground_truth": 0}, {"key": "36929751", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 5.663860596860729e-08}, "ground_truth": 0}, {"key": "23984730", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.01922864278260244, "res": {"No": 0.9807711460732582, "Yes": 0.01922864278260244}, "ground_truth": 0}, {"key": "23984730", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "yes": 2.1023161193826667e-07}, "ground_truth": 0}, {"key": "23984730", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 1.016866307756252e-07}, "ground_truth": 1}, {"key": "23984730", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999998063873687, "res": {"Yes": 0.9999998063873687, "yes": 6.473220502545562e-08}, "ground_truth": 0}, {"key": "23984730", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999995679800934, "res": {"Yes": 0.9999995679800934, "No": 1.5838675159254007e-07}, "ground_truth": 0}, {"key": "36007415", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "yes": 2.847999024299833e-07}, "ground_truth": 0}, {"key": "36007415", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.0310924081664587, "res": {"No": 0.9689072780080277, "Yes": 0.0310924081664587}, "ground_truth": 0}, {"key": "36007415", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 2.789830228417776e-08}, "ground_truth": 1}, {"key": "36007415", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 1.0, "res": {"Yes": 1.0, " Yes": 4.195333113144345e-08}, "ground_truth": 0}, {"key": "36007415", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.9999962302846054, "res": {"Yes": 0.9999962302846054, "No": 3.3472347525375015e-06}, "ground_truth": 0}, {"key": "38875041", "model": "xsum_500_ft_gpt35", "target_model": "human", "recognition_score": 9.263080409547403e-07, "res": {"No": 0.9999978991308068, "Yes": 9.263080409547403e-07}, "ground_truth": 0}, {"key": "38875041", "model": "xsum_500_ft_gpt35", "target_model": "claude", "recognition_score": 0.9999993295729247, "res": {"Yes": 0.9999993295729247, "yes": 3.45439264275733e-07}, "ground_truth": 0}, {"key": "38875041", "model": "xsum_500_ft_gpt35", "target_model": "gpt35", "recognition_score": 0.9919589148865093, "res": {"Yes": 0.9919589148865093, "No": 0.008038667044669604}, "ground_truth": 1}, {"key": "38875041", "model": "xsum_500_ft_gpt35", "target_model": "gpt4", "recognition_score": 0.9999988527586581, "res": {"Yes": 0.9999988527586581, "yes": 6.357610829001409e-07}, "ground_truth": 0}, {"key": "38875041", "model": "xsum_500_ft_gpt35", "target_model": "llama", "recognition_score": 0.002396436348282553, "res": {"No": 0.9976022548940996, "Yes": 0.002396436348282553}, "ground_truth": 0}]