{
  "zvYJ1qG1Fy": [
    {
      "inconsistency_parts": [
        {
          "type": "text",
          "page": 5,
          "content": "Given the straightforward definition of Bayesian update function h(\u00b7), its inverse operation is generally easy to derive. The details of such results can be found in Figure 14",
          "line": 236
        },
        {
          "type": "image",
          "page": 22,
          "image_id": "zvYJ1qG1Fy_22_1751270524049",
          "bbox": {
            "x": 0.16828793774319065,
            "y": 0.518796992481203,
            "width": 0.6906614785992218,
            "height": 0.3804511278195489
          }
        }
      ],
      "review_text": "Lines 236-237: The authors mention that details of the Bayesian update function can be found in Figure 14, but Figure 14 does not provide any details on the Bayesian update function. It is unclear if the authors were referring to Table 2.",
      "category": "figure-text",
      "description": "The text refers to an unrelated figure",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {
          "question": "Given the straightforward definition of Bayesian update function h(\u00b7), its inverse operation is generally easy to derive. The details of such results can be found in Figure 14",
          "correct": "zvYJ1qG1Fy_22_1751270524049",
          "incorrect": [
            "zvYJ1qG1Fy_22_image_figure15",
            "zvYJ1qG1Fy_22_image_figure16",
            "zvYJ1qG1Fy_21_image_figure13"
          ],
          "letters": ["C", "B", "A", "D"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"figure content\",\"claim\":{\"source\":\"expectation\",\"statement\":\"shows inverse operation of Bayesian update\"},\"evidence\":{\"source\":\"Figure 14\",\"statement\":\"shows latent space interpolation\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"task type\",\"claim\":{\"source\":\"text\",\"statement\":\"image reconstruction task\"},\"evidence\":{\"source\":\"Figure 14\",\"statement\":\"classification task\"}}",
            "{\"letter\":\"A\",\"attribute\":\"image quality\",\"claim\":{\"source\":\"caption\",\"statement\":\"VAEs produce blurry images\"},\"evidence\":{\"source\":\"Figure 14\",\"statement\":\"only ParamReL is blurry\"}}",
            "{\"letter\":\"D\",\"attribute\":\"figure number\",\"claim\":{\"source\":\"expectation\",\"statement\":\"figure number is visible\"},\"evidence\":{\"source\":\"Figure 14\",\"statement\":\"figure number is not visible\"}}"
          ],
          "letters": ["C", "B", "A", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"content\",\"target\":\"text_referencing_figure_14\",\"other_involved\":\"figure_14\",\"action\":\"modify\",\"edit_statement\":\"align paper content\",\"reason\":\"mismatch\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"task type\",\"target\":\"figure_14_description\",\"other_involved\":\"figure_14\",\"action\":\"modify\",\"edit_statement\":\"align task depicted\",\"reason\":\"mismatch\"}",
            "{\"letter\":\"A\",\"attribute\":\"blurry images\",\"target\":\"figure_14_caption\",\"other_involved\":\"figure_14\",\"action\":\"modify\",\"edit_statement\":\"align mentioned models\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"D\",\"attribute\":\"figure number\",\"target\":\"figure_14\",\"other_involved\":\"explanation text\",\"action\":\"add\",\"edit_statement\":\"add figure number\",\"reason\":\"missing\"}"
          ],
          "letters": ["C", "B", "A", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 14"]
    },
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 8,
          "image_id": "zvYJ1qG1Fy_8_b697afb6",
          "bbox": {
            "x": 0.16339283897763207,
            "y": 0.09256704045438217,
            "width": 0.6785714285714286,
            "height": 0.39540229885057476
          }
        }
      ],
      "review_text": "Figure 4(b): The reviewer states that the learned semantics exhibit progressive, time-varying changes, but the figure's content or caption does not explicitly confirm or elaborate on this statement.",
      "category": "figure-caption",
      "description": "The caption mentions time-varying changes in the images, but the images all look the same",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the caption of the figure consistent with the content of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the caption of the figure inconsistent with the content of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"progressive changes\",\"claim\":{\"source\":\"caption\",\"statement\":\"progressive changes\"},\"evidence\":{\"source\":\"figure_4b\",\"statement\":\"visually identical\"}}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"FID values\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should explain FID\"},\"evidence\":{\"source\":\"caption\",\"statement\":\"omits FID details\"}}",
            "{\"letter\":\"D\",\"attribute\":\"time steps\",\"claim\":{\"source\":\"caption\",\"statement\":\"three time steps\"},\"evidence\":{\"source\":\"figure_4b\",\"statement\":\"four time steps\"}}",
            "{\"letter\":\"A\",\"attribute\":\"representation\",\"claim\":{\"source\":\"caption\",\"statement\":\"time-varying representation\"},\"evidence\":{\"source\":\"figure_4b\",\"statement\":\"static snapshot\"}}"
          ],
          "letters": ["B", "C", "D", "A"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"time-varying changes\",\"target\":\"figure_4b\",\"other_involved\":\"caption\",\"action\":\"modify\",\"edit_statement\":\"update images\",\"reason\":\"visually identical\"}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"FID values\",\"target\":\"caption\",\"other_involved\":\"figure_4a\",\"action\":\"add\",\"edit_statement\":\"details explanation\",\"reason\":\"omits details\"}",
            "{\"letter\":\"D\",\"attribute\":\"time steps\",\"target\":\"figure_4b\",\"other_involved\":\"caption\",\"action\":\"modify\",\"edit_statement\":\"align steps number\",\"reason\":\"different number\"}",
            "{\"letter\":\"A\",\"attribute\":\"representation learning\",\"target\":\"figure_4b\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"add continuous depiction\",\"reason\":\"static snapshot\"}"
          ],
          "letters": ["B", "C", "D", "A"]
        }
      },
      "severity": 1,
      "visual_elements": ["Figure 4"]
    }
  ],
  "zrNbsV87Os": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 8,
          "image_id": "zrNbsV87Os_8_fcbbb38a",
          "bbox": {
            "x": 0.16636902945382254,
            "y": 0.08463597790948277,
            "width": 0.6755952380952381,
            "height": 0.2183908045977012
          }
        }
      ],
      "review_text": "Figure 3: The PSNR values for C > 31 are shown, but the CAVE dataset lacks ground truth data for these cases, and the reviewer questions if the model's performance is limited to approximating interpolation results.",
      "category": "figure-only",
      "description": "The CAVE dataset does not have a ground truth for C>31, so it is unknown how the figure can show values for higher C.",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the figure that is consistent with a different part of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the figure that is inconsistent with a different part of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"C values\",\"claim\":{\"source\":\"expectation\",\"statement\":\"C values within dataset limits\"},\"evidence\":{\"source\":\"figure_3\",\"statement\":\"C values above 31\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"C values\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be integers\"},\"evidence\":{\"source\":\"figure\",\"statement\":\"not integers\"}}",
            "{\"letter\":\"C\",\"attribute\":\"models\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be evaluated on full dataset\"},\"evidence\":{\"source\":\"legend\",\"statement\":\"unclear evaluation dataset\"}}",
            "{\"letter\":\"B\",\"attribute\":\"lines\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be explained\"},\"evidence\":{\"source\":\"legend\",\"statement\":\"not explained\"}}"
          ],
          "letters": ["D", "A", "C", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"C values\",\"target\":\"figure_3\",\"other_involved\":\"CAVE dataset\",\"action\":\"modify\",\"edit_statement\":\"align max value\",\"reason\":\"exceeds ground truth\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"C values\",\"target\":\"figure_3\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"make C integer\",\"reason\":\"C continuous\"}",
            "{\"letter\":\"C\",\"attribute\":\"models\",\"target\":\"figure_3 legend\",\"other_involved\":\"CAVE dataset\",\"action\":\"modify\",\"edit_statement\":\"clarify evaluation scope\",\"reason\":\"unclear\"}",
            "{\"letter\":\"B\",\"attribute\":\"lines\",\"target\":\"figure_3 legend\",\"other_involved\":null,\"action\":\"add\",\"edit_statement\":\"explain meaning\",\"reason\":\"unexplained\"}"
          ],
          "letters": ["D", "A", "C", "B"]
        }
      },
      "severity": 1,
      "visual_elements": ["Figure 3"]
    }
  ],
  "zkn2tvtt8J": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 2,
          "image_id": "zkn2tvtt8J_2_1751270884677",
          "bbox": {
            "x": 0.17023346303501946,
            "y": 0.09022556390977444,
            "width": 0.6673151750972763,
            "height": 0.4481203007518797
          }
        }
      ],
      "review_text": "Figure 1b (i): The reconstructed image appears significantly different from the input image. How does the reconstruction network generate a horizontally flipped image?",
      "category": "figure-only",
      "description": "The reconstructed image (VAE output) is horizontally flipped.",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the figure that is consistent with a different part of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the figure that is inconsistent with a different part of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"VAE output\",\"claim\":{\"source\":\"expectation\",\"statement\":\"match input\"},\"evidence\":{\"source\":\"Figure 1 b-i\",\"statement\":\"horizontally flipped\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"decoding\",\"claim\":{\"source\":\"expectation\",\"statement\":\"resemble input\"},\"evidence\":{\"source\":\"a\",\"statement\":\"not resemble input\"}}",
            "{\"letter\":\"A\",\"attribute\":\"anatomical realism\",\"claim\":{\"source\":\"expectation\",\"statement\":\"realistic\"},\"evidence\":{\"source\":\"b-ii\",\"statement\":\"not realistic\"}}",
            "{\"letter\":\"C\",\"attribute\":\"attention maps\",\"claim\":{\"source\":\"expectation\",\"statement\":\"consistent\"},\"evidence\":{\"source\":\"b-iii\",\"statement\":\"inconsistent\"}}"
          ],
          "letters": ["D", "B", "A", "C"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"reconstruction image\",\"target\":\"figure_1\",\"other_involved\":\"input chest x-ray\",\"action\":\"modify\",\"edit_statement\":\"unflip horizontally\",\"reason\":\"flipped\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"denoised latent\",\"target\":\"figure_1a\",\"other_involved\":\"latent image\",\"action\":\"modify\",\"edit_statement\":\"match visually\",\"reason\":\"not matching\"}",
            "{\"letter\":\"A\",\"attribute\":\"anatomical realism\",\"target\":\"figure_1b-ii\",\"other_involved\":\"input images\",\"action\":\"modify\",\"edit_statement\":\"resolve realism\",\"reason\":\"unrealistic\"}",
            "{\"letter\":\"C\",\"attribute\":\"segmentation attention maps\",\"target\":\"figure_1b-iii\",\"other_involved\":\"merging output\",\"action\":\"modify\",\"edit_statement\":\"align number arrangement\",\"reason\":\"inconsistent\"}"
          ],
          "letters": ["D", "B", "A", "C"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 1"]
    },
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 3,
          "image_id": "zkn2tvtt8J_3_1751270926096",
          "bbox": {
            "x": 0.17607003891050585,
            "y": 0.09022556390977444,
            "width": 0.6575875486381323,
            "height": 0.4631578947368421
          }
        }
      ],
      "review_text": "Figure 2: The generated images, both reconstructed and interpolated, have lower intensity (appear darker) than real images. What is causing this? Are these images generated by DM using reconstruction and interpolation features clinically meaningful?",
      "category": "figure-only",
      "description": "The generated images are generally darker than the original ones, which is not explained why.",
      "confidence": 2,
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the figure that is consistent with a different part of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the figure that is inconsistent with a different part of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"image brightness\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be explained\"},\"evidence\":{\"source\":\"figure_2\",\"statement\":\"not explained\"}}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"normalization\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be addressed\"},\"evidence\":{\"source\":\"figure_2\",\"statement\":\"not addressed\"}}",
            "{\"letter\":\"A\",\"attribute\":\"generation quality\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be similar\"},\"evidence\":{\"source\":\"figure_2\",\"statement\":\"differs\"}}",
            "{\"letter\":\"B\",\"attribute\":\"semantic variability\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be evident\"},\"evidence\":{\"source\":\"figure_2\",\"statement\":\"not evident\"}}"
          ],
          "letters": ["C", "D", "A", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"generated image brightness\",\"target\":\"figure_2\",\"other_involved\":\"caption\",\"action\":\"modify\",\"edit_statement\":\"explain brightness discrepancy\",\"reason\":\"unexplained discrepancy\"}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"real image brightness\",\"target\":\"figure_2b\",\"other_involved\":\"figure_2b\",\"action\":\"modify\",\"edit_statement\":\"address normalization\",\"reason\":\"unaddressed issue\"}",
            "{\"letter\":\"A\",\"attribute\":\"generated images\",\"target\":\"figure_2a\",\"other_involved\":\"figure_2b\",\"action\":\"modify\",\"edit_statement\":\"blurriness level\",\"reason\":\"expected quality\"}",
            "{\"letter\":\"B\",\"attribute\":\"semantic variability\",\"target\":\"figure_2a\",\"other_involved\":\"DiNOv1-Diffusion, DiNOv2-Diffusion\",\"action\":\"modify\",\"edit_statement\":\"clarify DiNOv2-Diffusion results\",\"reason\":\"inconsistent variability\"}"
          ],
          "letters": ["C", "D", "A", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 2"]
    }
  ],
  "zi3MEZRCqd": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 4,
          "image_id": "zi3MEZRCqd_4_1751271198698",
          "bbox": {
            "x": 0.17023346303501946,
            "y": 0.0962406015037594,
            "width": 0.6634241245136187,
            "height": 0.43007518796992483
          }
        },
        {
          "type": "text",
          "page": 1,
          "content": "To accommodate different learning tasks with a unified approach, we designed a flexible grouping\nstrategy that divides tokens into two groups for contrastive learning and mask modeling. Specifically\na input sample to UmiF is image-supervision pair, with their token embeddings denoted as Xi \u2208\nRni\u00d7D and Xs \u2208 Rns\u00d7D , where ni and ns are the number of image tokens and supervision tokens,\nrespectively. Then, we introduce a set of randomly sampled binary bits b = Concat(bi, bs) where\nbi \u2208 {0, 1}ni\nand bs \u2208 {0, 1}ns\n. According to whether the binary bit at each corresponding position\nin b is 0 or 1, we can divide the tokens into two groups. We use X1 = Concat(X1i, X1s) to denote\ntoken embeddings in group 1, which is a concatenation of tokens embeddings from Xi and Xi at\npositions where the corresponding binary bit is 1. Similarly, token embeddings in group 0 are denoted\nas X0 = Concat(X0i, X0s). Therefore, tokens are split into two groups according to b.",
          "line": 215
        }
      ],
      "review_text": "Inconsistencies Between Text and Figures: There are inconsistencies between the text and figures. For instance, the text describes vector groups as Group 1 and Group 0, but the figure labels them as Group 1 and Group 2.",
      "category": "figure-text",
      "description": "The figure shows Group 1 and Group 2, but the text talks about Group 0 and Group 1.",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {
          "question": "To accommodate different learning tasks with a unified approach, we designed a flexible grouping\nstrategy that divides tokens into two groups for contrastive learning and mask modeling. Specifically\na input sample to UmiF is image-supervision pair, with their token embeddings denoted as Xi \u2208\nRni\u00d7D and Xs \u2208 Rns\u00d7D , where ni and ns are the number of image tokens and supervision tokens,\nrespectively. Then, we introduce a set of randomly sampled binary bits b = Concat(bi, bs) where\nbi \u2208 {0, 1}ni\nand bs \u2208 {0, 1}ns\n. According to whether the binary bit at each corresponding position\nin b is 0 or 1, we can divide the tokens into two groups. We use X1 = Concat(X1i, X1s) to denote\ntoken embeddings in group 1, which is a concatenation of tokens embeddings from Xi and Xi at\npositions where the corresponding binary bit is 1. Similarly, token embeddings in group 0 are denoted\nas X0 = Concat(X0i, X0s). Therefore, tokens are split into two groups according to b.",
          "correct": "zi3MEZRCqd_4_1751271198698",
          "incorrect": [
            "zi3MEZRCqd_15_image_figure2",
            "zi3MEZRCqd_16_image_figure3",
            "zi3MEZRCqd_18_image_figure5"
          ],
          "letters": ["C", "A", "D", "B"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"group names\",\"claim\":{\"source\":\"figure\",\"statement\":\"Group 1 and Group 2\"},\"evidence\":{\"source\":\"text\",\"statement\":\"Group 0 and Group 1\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"number of groups\",\"claim\":{\"source\":\"text\",\"statement\":\"three groups\"},\"evidence\":{\"source\":\"figure\",\"statement\":\"two groups\"}}",
            "{\"letter\":\"D\",\"attribute\":\"group roles\",\"claim\":{\"source\":\"text\",\"statement\":\"specific roles\"},\"evidence\":{\"source\":\"figure\",\"statement\":\"interchangeable groups\"}}",
            "{\"letter\":\"C\",\"attribute\":\"task association\",\"claim\":{\"source\":\"text\",\"statement\":\"both tasks\"},\"evidence\":{\"source\":\"figure\",\"statement\":\"separate tasks\"}}"
          ],
          "letters": ["B", "A", "D", "C"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"group labels\",\"target\":\"figure_1\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"update labels\",\"reason\":\"mismatch\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"token groups\",\"target\":\"figure_1\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"add third group\",\"reason\":\"only 2 groups\"}",
            "{\"letter\":\"D\",\"attribute\":\"group usage\",\"target\":\"figure_1\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"specify roles\",\"reason\":\"same role\"}",
            "{\"letter\":\"C\",\"attribute\":\"group contribution\",\"target\":\"figure_1\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"clarify contribution\",\"reason\":\"different contribution\"}"
          ],
          "letters": ["B", "A", "D", "C"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 1"]
    }
  ],
  "z1pydjd4XQ": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 3,
          "image_id": "z1pydjd4XQ_3_cd53f83e",
          "bbox": {
            "x": 0.16934521993001303,
            "y": 0.09095781698994254,
            "width": 0.6666666666666666,
            "height": 0.31724137931034485
          }
        }
      ],
      "review_text": "Figure 1 caption is completely wrong",
      "category": "figure-caption",
      "description": "The caption and image do not match",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the caption of the figure consistent with the content of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the caption of the figure inconsistent with the content of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"task\",\"claim\":{\"source\":\"expectation\",\"statement\":\"consistent tasks\"},\"evidence\":{\"source\":\"figure_1\",\"statement\":\"query-document tasks\"}}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"output\",\"claim\":{\"source\":\"expectation\",\"statement\":\"mentions output\"},\"evidence\":{\"source\":\"caption\",\"statement\":\"no output mention\"}}",
            "{\"letter\":\"A\",\"attribute\":\"complexity\",\"claim\":{\"source\":\"caption\",\"statement\":\"complex models\"},\"evidence\":{\"source\":\"figure\",\"statement\":\"simple models\"}}",
            "{\"letter\":\"C\",\"attribute\":\"task\",\"claim\":{\"source\":\"expectation\",\"statement\":\"consistent tasks\"},\"evidence\":{\"source\":\"figure_1\",\"statement\":\"inconsistent tasks\"}}"
          ],
          "letters": ["B", "D", "A", "C"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"methods\",\"target\":\"figure_1\",\"other_involved\":\"caption\",\"action\":\"modify\",\"edit_statement\":\"align\",\"reason\":\"different\"}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"output formats\",\"target\":\"caption\",\"other_involved\":\"figure_1\",\"action\":\"add\",\"edit_statement\":\"specify\",\"reason\":\"missing\"}",
            "{\"letter\":\"A\",\"attribute\":\"models\",\"target\":\"figure_1\",\"other_involved\":\"caption\",\"action\":\"modify\",\"edit_statement\":\"align\",\"reason\":\"more complex models in caption\"}",
            "{\"letter\":\"C\",\"attribute\":\"input types\",\"target\":\"figure_1\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"align\",\"reason\":\"query vs. passage differ in input\"}"
          ],
          "letters": ["B", "D", "A", "C"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 1"]
    }
  ],
  "yx8bU8T5ZN": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 4,
          "image_id": "yx8bU8T5ZN_4_1751271646959",
          "bbox": {
            "x": 0.26750972762645914,
            "y": 0.8766917293233083,
            "width": 0.5836575875486382,
            "height": 0.055639097744360905
          }
        },
        {
          "type": "image",
          "page": 4,
          "image_id": "yx8bU8T5ZN_4_1751271700384",
          "bbox": {
            "x": 0.4562256809338522,
            "y": 0.1722055076656485,
            "width": 0.39105058365758755,
            "height": 0.23157894736842105
          }
        }
      ],
      "review_text": "Section 4: The mathematical derivation shows delta L as 0, but the experiment's delta is 1e-5, which contradicts the theoretical result.",
      "category": "figure-figure",
      "description": "The equation shows delta L = 0, but the figure shows delta L != 0",
      "confidence": 2,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the first figure consistent with the content of the second figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the first figure inconsistent with the content of the second figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "yx8bU8T5ZN_4_1751271646959",
          "correct": "yx8bU8T5ZN_4_1751271700384",
          "incorrect": [
            "yx8bU8T5ZN_4_image_figure2",
            "yx8bU8T5ZN_5_image_figure3",
            "yx8bU8T5ZN_7_image_figure4"
          ],
          "letters": ["B", "C", "A", "D"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"\u0394L value\",\"claim\":{\"source\":\"equation_(6)\",\"statement\":\"zero\"},\"evidence\":{\"source\":\"figure_1\",\"statement\":\"non-zero\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"\u0394L value\",\"claim\":{\"source\":\"expectation\",\"statement\":\"non-negative\"},\"evidence\":{\"source\":\"figure_1\",\"statement\":\"negative\"}}",
            "{\"letter\":\"B\",\"attribute\":\"\u0394L value\",\"claim\":{\"source\":\"figure_1\",\"statement\":\"approximation\"},\"evidence\":{\"source\":\"equation_(6)\",\"statement\":\"zero\"}}",
            "{\"letter\":\"D\",\"attribute\":\"'w/o rescale' baseline\",\"claim\":{\"source\":\"expectation\",\"statement\":\"mathematically defined\"},\"evidence\":{\"source\":\"figure_1\",\"statement\":\"not defined\"}}"
          ],
          "letters": ["C", "A", "B", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"\u0394L values\",\"target\":\"equation_6\",\"other_involved\":\"figure_1\",\"action\":\"modify\",\"edit_statement\":\"align approximation definition\",\"reason\":\"contradictory\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"\u0394L values\",\"target\":\"figure_1\",\"other_involved\":\"equation_6\",\"action\":\"modify\",\"edit_statement\":\"align \u0394L sign\",\"reason\":\"contradictory\"}",
            "{\"letter\":\"B\",\"attribute\":\"\u0394L value\",\"target\":\"equation_6\",\"other_involved\":\"figure_1\",\"action\":\"modify\",\"edit_statement\":\"align \u0394L precision\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"D\",\"attribute\":\"w/o rescale baseline\",\"target\":\"figure_1\",\"other_involved\":\"equation_6\",\"action\":\"add\",\"edit_statement\":\"mathematical definition\",\"reason\":\"undefined\"}"
          ],
          "letters": ["C", "A", "B", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["(6)", "Figure 1"]
    }
  ],
  "yPxhj1FKhG": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 10,
          "image_id": "yPxhj1FKhG_10_1751271933067",
          "bbox": {
            "x": 0.49124513618677046,
            "y": 0.4962406015037594,
            "width": 0.3463035019455253,
            "height": 0.18195488721804512
          }
        },
        {
          "type": "text",
          "page": 1,
          "content": "Increasing N, the number of iterations of the Joint Up/Down Projection dProjIt(\u00b7) in Algorithm 1, consistently leads to improved performance. As N grows, the resulting images better align with the desired conditions and demonstrate higher conditional fidelity and visual quality. ",
          "line": 512
        }
      ],
      "review_text": "Figure 8: The visualizations indicate that further iterations of the method may produce artificial-looking images compared to more natural scenes.",
      "category": "figure-text",
      "description": "Increasing N does not always lead to better results, as too high N introduces artifacts",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "Increasing N, the number of iterations of the Joint Up/Down Projection dProjIt(\u00b7) in Algorithm 1, consistently leads to improved performance. As N grows, the resulting images better align with the desired conditions and demonstrate higher conditional fidelity and visual quality. ",
          "correct": "yPxhj1FKhG_10_1751271933067",
          "incorrect": [
            "yPxhj1FKhG_8_image_figure7",
            "yPxhj1FKhG_7_image_figure5",
            "yPxhj1FKhG_7_image_figure6"
          ],
          "letters": ["C", "B", "D", "A"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"image quality\",\"claim\":{\"source\":\"text\",\"statement\":\"consistently improves\"},\"evidence\":{\"source\":\"figure_8\",\"statement\":\"degrades towards the end\"}}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"N values\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be continuous\"},\"evidence\":{\"source\":\"figure_8\",\"statement\":\"discrete values\"}}",
            "{\"letter\":\"D\",\"attribute\":\"visual quality\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should improve\"},\"evidence\":{\"source\":\"figure_8\",\"statement\":\"already high at N=1\"}}",
            "{\"letter\":\"A\",\"attribute\":\"algorithm detail\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be provided\"},\"evidence\":{\"source\":\"text\",\"statement\":\"not provided\"}}"
          ],
          "letters": ["B", "C", "D", "A"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"image quality, conditional fidelity\",\"target\":\"text\",\"other_involved\":\"Figure 8d, N=30\",\"action\":\"modify\",\"edit_statement\":\"reflect performance N=30\",\"reason\":\"contradiction\"}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"performance improvement\",\"target\":\"Figure 8\",\"other_involved\":\"text, N values\",\"action\":\"add\",\"edit_statement\":\"more N values\",\"reason\":\"insufficient\"}",
            "{\"letter\":\"D\",\"attribute\":\"visual quality\",\"target\":\"text\",\"other_involved\":\"Figure 8b, N=1\",\"action\":\"modify\",\"edit_statement\":\"reflect N=1 quality\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"A\",\"attribute\":\"Algorithm 1\",\"target\":\"text\",\"other_involved\":\"Figure 8, N\",\"action\":\"add\",\"edit_statement\":\"detail Algorithm 1\",\"reason\":\"missing\"}"
          ],
          "letters": ["B", "C", "D", "A"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 8"]
    }
  ],
  "y8TjnkdWNA": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 5,
          "image_id": "y8TjnkdWNA_5_1751272145946",
          "bbox": {
            "x": 0.16439688715953307,
            "y": 0.08721804511278196,
            "width": 0.678988326848249,
            "height": 0.3954887218045113
          }
        }
      ],
      "review_text": "Figure 3 caption: The accuracy of the weak labels displayed in the figure appears to be larger than 70.2%. Or is this the difference between training/test accuracy?",
      "category": "figure-caption",
      "description": "The captions states the performance for weak labels is 70.2, but the dotted black bar in the figure seems to be higher than that",
      "confidence": 1,
      "mcq": {
        "binary_consistent": {
          "question": "Is the caption of the figure consistent with the content of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the caption of the figure inconsistent with the content of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"weak acc\",\"claim\":{\"source\":\"caption\",\"statement\":\"70.2% accurate\"},\"evidence\":{\"source\":\"Figure 3\",\"statement\":\"higher than 70.2%\"}}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"errorbars\",\"claim\":{\"source\":\"caption\",\"statement\":\"standard deviations\"},\"evidence\":{\"source\":\"plot\",\"statement\":\"not displayed\"}}",
            "{\"letter\":\"A\",\"attribute\":\"weak acc line\",\"claim\":{\"source\":\"expectation\",\"statement\":\"vary with spending\"},\"evidence\":{\"source\":\"plot\",\"statement\":\"horizontal line\"}}",
            "{\"letter\":\"C\",\"attribute\":\"weak acc\",\"claim\":{\"source\":\"caption\",\"statement\":\"less capable than Llama-3-8B\"},\"evidence\":{\"source\":\"Figure 3\",\"statement\":\"higher than Llama-3-8B\"}}"
          ],
          "letters": ["B", "D", "A", "C"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"weak acc line\",\"target\":\"figure_3\",\"other_involved\":\"caption\",\"action\":\"modify\",\"edit_statement\":\"match caption value\",\"reason\":\"inconsistency\"}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"errorbars\",\"target\":\"figure_3\",\"other_involved\":\"caption, weak acc line\",\"action\":\"add\",\"edit_statement\":\"display errorbars\",\"reason\":\"missing\"}",
            "{\"letter\":\"A\",\"attribute\":\"weak acc line\",\"target\":\"figure_3\",\"other_involved\":\"weak spending frac range\",\"action\":\"modify\",\"edit_statement\":\"match expected variation\",\"reason\":\"contradiction\"}",
            "{\"letter\":\"C\",\"attribute\":\"weak acc line\",\"target\":\"figure_3\",\"other_involved\":\"caption, Llama-3-8B performance curves\",\"action\":\"modify\",\"edit_statement\":\"match capability statement\",\"reason\":\"contradiction\"}"
          ],
          "letters": ["B", "D", "A", "C"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 3"]
    }
  ],
  "xaafWdM5jI": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 10,
          "image_id": "xaafWdM5jI_10_5f40784e",
          "bbox": {
            "x": 0.16636902945382254,
            "y": 0.40957859083153736,
            "width": 0.6666666666666666,
            "height": 0.1724137931034483
          }
        }
      ],
      "review_text": "Figure 6: The reviewer questions the representation of a contour plot for discrete variables and the possibility of the optimal k being a non-integer value, suggesting a mismatch between the figure's content and the expected representation of discrete data.",
      "category": "figure-only",
      "description": "The optimal values for a integer hyperparameter are shown to be non-integers in the plot",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the figure that is consistent with a different part of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the figure that is inconsistent with a different part of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"optimal values\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be integer\"},\"evidence\":{\"source\":\"figure_6\",\"statement\":\"non-integer positions\"}}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"color bar scale\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"figure_6\",\"statement\":\"differs between subplots\"}}",
            "{\"letter\":\"A\",\"attribute\":\"hidden size impact\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should have impact\"},\"evidence\":{\"source\":\"plot\",\"statement\":\"little impact\"}}",
            "{\"letter\":\"D\",\"attribute\":\"optimal s value\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be in range\"},\"evidence\":{\"source\":\"figure_6\",\"statement\":\"near boundary\"}}"
          ],
          "letters": ["B", "C", "A", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"optimal k and s values\",\"target\":\"figure_6\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"align position\",\"reason\":\"non-integer\"}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"MAE scale\",\"target\":\"figure_6\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"unify across\",\"reason\":\"differs\"}",
            "{\"letter\":\"A\",\"attribute\":\"MAE values\",\"target\":\"figure_6\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"explain impact\",\"reason\":\"negligible\"}",
            "{\"letter\":\"D\",\"attribute\":\"optimal s value\",\"target\":\"figure_6c\",\"other_involved\":\"figure_6a, figure_6b\",\"action\":\"modify\",\"edit_statement\":\"extend range\",\"reason\":\"boundary\"}"
          ],
          "letters": ["B", "C", "A", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 6"]
    }
  ],
  "wrVZ771SZQ": [
    {
      "inconsistency_parts": [
        {
          "type": "text",
          "page": 1,
          "content": "Comprehensive experiments demonstrate the effectiveness of our method\nin overcoming the limitations of existing historical embedding techniques, high-\nlighting its superior performance and efficiency on large-scale benchmarks, as well\nas significantly accelerated convergence. We will make the code publicly available\nupon acceptance of the work.",
          "line": 26
        },
        {
          "type": "image",
          "page": 8,
          "image_id": "wrVZ771SZQ_8_1751272485350",
          "bbox": {
            "x": 0.4464980544747082,
            "y": 0.22556390977443608,
            "width": 0.39105058365758755,
            "height": 0.26315789473684215
          }
        }
      ],
      "review_text": "The abstract: 'superior performance' vs. Table 1: 'marginal improvement'.",
      "category": "figure-text",
      "description": "The text says superior performance, but the results shown in the table indicate only marginal improvements",
      "confidence": 1,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {
          "question": "Comprehensive experiments demonstrate the effectiveness of our method\nin overcoming the limitations of existing historical embedding techniques, high-\nlighting its superior performance and efficiency on large-scale benchmarks, as well\nas significantly accelerated convergence. We will make the code publicly available\nupon acceptance of the work.",
          "correct": "wrVZ771SZQ_8_1751272485350",
          "incorrect": [
            "wrVZ771SZQ_8_table_table3",
            "wrVZ771SZQ_7_table_table2",
            "wrVZ771SZQ_9_table_table5"
          ],
          "letters": ["A", "D", "C", "B"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"performance gain\",\"claim\":{\"source\":\"text\",\"statement\":\"superior performance\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"marginal accuracy gains\"}}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"convergence speed\",\"claim\":{\"source\":\"text\",\"statement\":\"significantly accelerated convergence\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"no convergence data\"}}",
            "{\"letter\":\"B\",\"attribute\":\"comparison\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should compare to historical embedding techniques\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"compares to broader range of GNNs\"}}",
            "{\"letter\":\"A\",\"attribute\":\"performance\",\"claim\":{\"source\":\"text\",\"statement\":\"superior performance\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"performs worse\"}}"
          ],
          "letters": ["C", "D", "B", "A"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"performance claims\",\"target\":\"text\",\"other_involved\":\"Table 1\",\"action\":\"modify\",\"edit_statement\":\"align performance level\",\"reason\":\"marginal gain\"}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"convergence speed data\",\"target\":\"Table 1\",\"other_involved\":\"text claim\",\"action\":\"add\",\"edit_statement\":\"convergence speed data\",\"reason\":\"missing data\"}",
            "{\"letter\":\"B\",\"attribute\":\"method comparison scope\",\"target\":\"text\",\"other_involved\":\"Table 1\",\"action\":\"modify\",\"edit_statement\":\"align comparison set\",\"reason\":\"mismatch\"}",
            "{\"letter\":\"A\",\"attribute\":\"proposed method performance\",\"target\":\"Table 1\",\"other_involved\":\"text claim\",\"action\":\"modify\",\"edit_statement\":\"align performance claim\",\"reason\":\"contradiction\"}"
          ],
          "letters": ["C", "D", "B", "A"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 1"]
    }
  ],
  "wmFp2aMhi0": [
    {
      "inconsistency_parts": [
        {
          "type": "text",
          "page": 18,
          "content": "where \u03b7 is a hyperparameter controlling the strength of the gradient guidance, \u03b3 balances the trade-\noff between fitting the observed data and adhering to the learned data distribution",
          "line": 951
        },
        {
          "type": "image",
          "page": 22,
          "image_id": "wmFp2aMhi0_22_1751272801001",
          "bbox": {
            "x": 0.22470817120622566,
            "y": 0.4270676691729323,
            "width": 0.5797665369649806,
            "height": 0.18345864661654135
          }
        }
      ],
      "review_text": "Table 10: The text mentions eta and gamma hyperparameters, but they are not listed in Table 10.",
      "category": "figure-text",
      "description": "The text uses hyperparameters $\\eta$ and $\\gamma$, but they do not show up in the table summarizing the hyperparameters",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "where \u03b7 is a hyperparameter controlling the strength of the gradient guidance, \u03b3 balances the trade-\noff between fitting the observed data and adhering to the learned data distribution",
          "correct": "wmFp2aMhi0_22_1751272801001",
          "incorrect": [
            "wmFp2aMhi0_20_table_table9",
            "wmFp2aMhi0_19_table_table7",
            "wmFp2aMhi0_19_table_table8"
          ],
          "letters": ["A", "D", "B", "C"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"eta and gamma hyperparameters\",\"claim\":{\"source\":\"text\",\"statement\":\"present\"},\"evidence\":{\"source\":\"Table 10\",\"statement\":\"missing\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"Attention heads\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be explained\"},\"evidence\":{\"source\":\"text\",\"statement\":\"not explained\"}}",
            "{\"letter\":\"C\",\"attribute\":\"gradient guidance and trade-off\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be in Table 10\"},\"evidence\":{\"source\":\"Table 10\",\"statement\":\"not in table\"}}",
            "{\"letter\":\"D\",\"attribute\":\"Alpha\",\"claim\":{\"source\":\"text\",\"statement\":\"should be gamma\"},\"evidence\":{\"source\":\"Table 10\",\"statement\":\"listed as Alpha\"}}"
          ],
          "letters": ["A", "B", "C", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"hyperparameters eta (\u03b7) and gamma (\u03b3)\",\"target\":\"table_10\",\"other_involved\":\"text\",\"action\":\"add\",\"edit_statement\":\"parameters\",\"reason\":\"missing\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"attention heads\",\"target\":\"text\",\"other_involved\":\"table_10\",\"action\":\"add\",\"edit_statement\":\"explain\",\"reason\":\"missing\"}",
            "{\"letter\":\"C\",\"attribute\":\"gradient guidance and trade-off\",\"target\":\"table_10\",\"other_involved\":\"text\",\"action\":\"add\",\"edit_statement\":\"values\",\"reason\":\"missing\"}",
            "{\"letter\":\"D\",\"attribute\":\"alpha, \u03b1\",\"target\":\"table_10\",\"other_involved\":\"text\",\"action\":\"replace\",\"edit_statement\":\"with gamma, \u03b3\",\"reason\":\"typo\"}"
          ],
          "letters": ["A", "B", "C", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 10"]
    }
  ],
  "wgKW4U7ktq": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 45,
          "image_id": "wgKW4U7ktq_45_1751272918687",
          "bbox": {
            "x": 0.16828793774319065,
            "y": 0.09172932330827067,
            "width": 0.6692607003891051,
            "height": 0.837593984962406
          }
        }
      ],
      "review_text": "Fig. 26, Fig. 32, and Fig. 33: The titles indicate that they display cases of standard answers and correct GPT-4o answers, but in some cases, the answers provided by the model are actually incorrect.",
      "category": "figure-caption",
      "description": "The caption says 'GPT-4o's correct response', but the response do not match up with the ground truth and is therefore wrong",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the caption of the figure consistent with the content of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the caption of the figure inconsistent with the content of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"answers\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should match ground truth\"},\"evidence\":{\"source\":\"figure_26\",\"statement\":\"do not match ground truth\"}}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"ground truth\",\"claim\":{\"source\":\"figure_26\",\"statement\":\"is incorrect\"},\"evidence\":{\"source\":\"expectation\",\"statement\":\"should be correct\"}}",
            "{\"letter\":\"A\",\"attribute\":\"response\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be complete\"},\"evidence\":{\"source\":\"figure_26\",\"statement\":\"is partial\"}}",
            "{\"letter\":\"D\",\"attribute\":\"diagrams\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be correct\"},\"evidence\":{\"source\":\"figure_26\",\"statement\":\"contain errors\"}}"
          ],
          "letters": ["B", "C", "A", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"answers\",\"target\":\"GPT-4o\",\"other_involved\":\"figure_26, Ground Truth\",\"action\":\"modify\",\"edit_statement\":\"align answers\",\"reason\":\"inconsistent\"}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"Ground Truth\",\"target\":\"figure_26\",\"other_involved\":\"Plane Geometry problem\",\"action\":\"modify\",\"edit_statement\":\"correct Ground Truth\",\"reason\":\"incorrect\"}",
            "{\"letter\":\"A\",\"attribute\":\"response\",\"target\":\"figure_26\",\"other_involved\":\"GPT-4o\",\"action\":\"add\",\"edit_statement\":\"full response\",\"reason\":\"missing\"}",
            "{\"letter\":\"D\",\"attribute\":\"diagrams\",\"target\":\"figure_26\",\"other_involved\":\"Ground Truth, GPT-4o\",\"action\":\"modify\",\"edit_statement\":\"correct diagrams\",\"reason\":\"errors\"}"
          ],
          "letters": ["B", "C", "A", "D"]
        }
      },
      "severity": 1,
      "visual_elements": ["Figure 26"]
    }
  ],
  "w1Pwcx5hPp": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 2,
          "image_id": "w1Pwcx5hPp_2_bf20dddd",
          "bbox": {
            "x": 0.16934521993001303,
            "y": 0.10245206986350575,
            "width": 0.6696428571428571,
            "height": 0.2620689655172414
          }
        }
      ],
      "review_text": "Figure 1: The reviewer questions the improvement shown in the Gaussian representation within the red box regions, as the ellipsoid visualizations appear similar between the baseline and the proposed method. (Reviewer's quote: '...why is it difficult to ascertain the correctness or improvement...')",
      "category": "figure-caption",
      "description": "Caption claims the comparison \"Ours\" shows better performance than the baseline \"GT\", but the highlighted regions look the same",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the caption of the figure consistent with the content of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the caption of the figure inconsistent with the content of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"representation of scene geometry\",\"claim\":{\"source\":\"caption\",\"statement\":\"superior representation\"},\"evidence\":{\"source\":\"Figure 1\",\"statement\":\"Ours and GT appear identical\"}}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"method performance\",\"claim\":{\"source\":\"expectation\",\"statement\":\"Ours is superior\"},\"evidence\":{\"source\":\"Figure 1\",\"statement\":\"Gaussian Splatting SLAM outperforms Ours\"}}",
            "{\"letter\":\"C\",\"attribute\":\"Novel View\",\"claim\":{\"source\":\"caption\",\"statement\":\"visually similar\"},\"evidence\":{\"source\":\"Figure 1\",\"statement\":\"Ours and GT show differences\"}}",
            "{\"letter\":\"B\",\"attribute\":\"quantitative metrics\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should provide metrics\"},\"evidence\":{\"source\":\"caption\",\"statement\":\"does not provide metrics for GT\"}}"
          ],
          "letters": ["A", "D", "C", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"representation\",\"target\":\"figure_1\",\"other_involved\":\"caption\",\"action\":\"modify\",\"edit_statement\":\"align superiority claim\",\"reason\":\"identical\"}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"visual performance\",\"target\":\"figure_1\",\"other_involved\":\"caption\",\"action\":\"modify\",\"edit_statement\":\"align superiority claim\",\"reason\":\"contradictory\"}",
            "{\"letter\":\"C\",\"attribute\":\"novel view\",\"target\":\"figure_1\",\"other_involved\":\"caption\",\"action\":\"modify\",\"edit_statement\":\"align similarity\",\"reason\":\"distinct\"}",
            "{\"letter\":\"B\",\"attribute\":\"metrics\",\"target\":\"table_1\",\"other_involved\":\"caption\",\"action\":\"add\",\"edit_statement\":\"GT metrics\",\"reason\":\"missing\"}"
          ],
          "letters": ["A", "D", "C", "B"]
        }
      },
      "severity": 1,
      "visual_elements": ["Figure 1"]
    }
  ],
  "vtUbXd5Cyg": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 9,
          "image_id": "vtUbXd5Cyg_9_5352ccf9",
          "bbox": {
            "x": 0.16828793774319065,
            "y": 0.1593984962406015,
            "width": 0.6653696498054475,
            "height": 0.7398496240601504
          }
        },
        {
          "type": "image",
          "page": 8,
          "image_id": "vtUbXd5Cyg_8_644e57e8",
          "bbox": {
            "x": 0.17217898832684825,
            "y": 0.0759649061618891,
            "width": 0.6673151750972763,
            "height": 0.14887218045112782
          }
        }
      ],
      "review_text": "Figure 4: The visual results show 3DGS-avatar seems very close to ToMiE than GART, but in Table 1 quantitatively it is overall the inverse of it. Why is this?",
      "category": "figure-table",
      "description": "The table shows worse performance for the 3DGS Avatar, but the figure shows qualitatively similar performance",
      "confidence": 2,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the content of the table?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the content of the table?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "vtUbXd5Cyg_9_5352ccf9",
          "correct": "vtUbXd5Cyg_8_644e57e8",
          "incorrect": [
            "vtUbXd5Cyg_13_table_table3",
            "vtUbXd5Cyg_8_image_figure4",
            "vtUbXd5Cyg_9_image_figure5"
          ],
          "letters": ["B", "C", "A", "D"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"rendering quality\",\"claim\":{\"source\":\"expectation\",\"statement\":\"worse performance\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"generally worse across multiple metrics\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"PSNR(full)\",\"claim\":{\"source\":\"Table 1\",\"statement\":\"lower than GauHuman and ToMiE(ours)\"},\"evidence\":{\"source\":\"Figure 4\",\"statement\":\"higher visual fidelity\"}}",
            "{\"letter\":\"C\",\"attribute\":\"masked regions\",\"claim\":{\"source\":\"Figure 4\",\"statement\":\"superior rendering\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"average or below-average scores\"}}",
            "{\"letter\":\"B\",\"attribute\":\"LPIPS\",\"claim\":{\"source\":\"Table 1\",\"statement\":\"second best\"},\"evidence\":{\"source\":\"Figure 4\",\"statement\":\"noticeable artifacts\"}}"
          ],
          "letters": ["D", "A", "C", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"rendering quality\",\"target\":\"table_1\",\"other_involved\":\"figure_4\",\"action\":\"modify\",\"edit_statement\":\"align ranks\",\"reason\":\"inconsistent\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"PSNR(full)\",\"target\":\"table_1\",\"other_involved\":\"figure_4\",\"action\":\"modify\",\"edit_statement\":\"align values\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"C\",\"attribute\":\"rendering quality\",\"target\":\"figure_4\",\"other_involved\":\"table_1\",\"action\":\"modify\",\"edit_statement\":\"align scores\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"B\",\"attribute\":\"LPIPS\",\"target\":\"figure_4\",\"other_involved\":\"table_1\",\"action\":\"modify\",\"edit_statement\":\"align visual quality\",\"reason\":\"inconsistent\"}"
          ],
          "letters": ["D", "A", "C", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 4", "Table 1"]
    }
  ],
  "zz9jAssrwL": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 8,
          "image_id": "zz9jAssrwL_8_93e94642",
          "bbox": {
            "x": 0.17023346303501946,
            "y": 0.0962406015037594,
            "width": 0.6712062256809338,
            "height": 0.27669172932330827
          }
        }
      ],
      "review_text": "Table 1: The names 'Ant-v3' and 'Ant' are not consistent.",
      "category": "table-only",
      "description": "There is a model Ant and Ant-v3 in the table that are the same so the naming is not consistent",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the table that is consistent with a different part of the table?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the table that is inconsistent with a different part of the table?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"environment name\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"Ant and Ant-v3\"}}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"numerical values\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be same\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"different values\"}}",
            "{\"letter\":\"A\",\"attribute\":\"Teacher column\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should vary\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"N/A for all\"}}",
            "{\"letter\":\"B\",\"attribute\":\"version number\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"only in Ant-v3\"}}"
          ],
          "letters": ["C", "D", "A", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"environment name\",\"target\":\"table_1\",\"other_involved\":\"return, sparsity\",\"action\":\"modify\",\"edit_statement\":\"align name\",\"reason\":\"inconsistent\"}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"numerical values\",\"target\":\"table_1\",\"other_involved\":\"ant (medium)\",\"action\":\"modify\",\"edit_statement\":\"align different values\",\"reason\":\"different\"}",
            "{\"letter\":\"A\",\"attribute\":\"return\",\"target\":\"table_1\",\"other_involved\":\"sparsity\",\"action\":\"modify\",\"edit_statement\":\"update empty cells\",\"reason\":\"not applicable\"}",
            "{\"letter\":\"B\",\"attribute\":\"version number\",\"target\":\"table_1\",\"other_involved\":\"return, sparsity\",\"action\":\"add\",\"edit_statement\":\"add missing versions\",\"reason\":\"inconsistent\"}"
          ],
          "letters": ["C", "D", "A", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 1"]
    }
  ],
  "ztT70ubhsc": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 1,
          "image_id": "ztT70ubhsc_1_93190580",
          "bbox": {
            "x": 0.16828793774319065,
            "y": 0.23458646616541354,
            "width": 0.6653696498054475,
            "height": 0.47969924812030074
          }
        }
      ],
      "review_text": "Figure 1: The binarised HED edges used in this work do not reflect real-world professional sketches, contradicting the paper's claim of dealing with pro-sketch and any other complexity levels with a unified model.",
      "category": "figure-only",
      "description": "The sketch examples for seasoned artists are not actual sketches by artists but manipulated photos",
      "confidence": 1,
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the figure that is consistent with a different part of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the figure that is inconsistent with a different part of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"sketch authenticity\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be hand-drawn\"},\"evidence\":{\"source\":\"figure_1\",\"statement\":\"derived from photographs\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"subjects\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be similar\"},\"evidence\":{\"source\":\"figure_1\",\"statement\":\"differ between categories\"}}",
            "{\"letter\":\"D\",\"attribute\":\"visual complexity\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should differ\"},\"evidence\":{\"source\":\"figure_1\",\"statement\":\"indistinguishable\"}}",
            "{\"letter\":\"B\",\"attribute\":\"number of examples\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be sufficient\"},\"evidence\":{\"source\":\"figure_1\",\"statement\":\"insufficient examples\"}}"
          ],
          "letters": ["C", "A", "D", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"sketches\",\"target\":\"figure_1\",\"other_involved\":null,\"action\":\"replace\",\"edit_statement\":\"with hand-drawn sketches\",\"reason\":\"not original\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"subjects\",\"target\":\"figure_1\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"broaden professional sketches\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"D\",\"attribute\":\"complexity\",\"target\":\"figure_1\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"distinguish professional sketches\",\"reason\":\"indistinguishable\"}",
            "{\"letter\":\"B\",\"attribute\":\"examples\",\"target\":\"figure_1\",\"other_involved\":null,\"action\":\"add\",\"edit_statement\":\"more professional sketches\",\"reason\":\"insufficient\"}"
          ],
          "letters": ["C", "A", "D", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 1"]
    },
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 8,
          "image_id": "ztT70ubhsc_8_90a7ddb1",
          "bbox": {
            "x": 0.16828793774319065,
            "y": 0.09022556390977444,
            "width": 0.6692607003891051,
            "height": 0.3819548872180451
          }
        },
        {
          "type": "text",
          "page": 7,
          "content": "We introduce \u03b3 variable as our Knob parameter. Let the total number of denoising steps be S,\nand \u03b3 represent the step at which fine-grained details cease to influence the denoising process. The\ninference knob influnce the impact of the CGC and FGC modules at inference-time, allowing users\nto adjust \u03b3 depending on their desired level of detail:",
          "line": 375
        }
      ],
      "review_text": "Figure 6: The effect of the knob mechanism is not pronounced, as shown in the volcano and Keith's examples, where changes in details are not noticeable. This contradicts the claim of the knob mechanism's effectiveness and applicability.",
      "category": "figure-text",
      "description": "The knob value does not seem to change the level of detail for most examples",
      "confidence": 2,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {
          "question": "We introduce \u03b3 variable as our Knob parameter. Let the total number of denoising steps be S,\nand \u03b3 represent the step at which fine-grained details cease to influence the denoising process. The\ninference knob influnce the impact of the CGC and FGC modules at inference-time, allowing users\nto adjust \u03b3 depending on their desired level of detail:",
          "correct": "ztT70ubhsc_8_90a7ddb1",
          "incorrect": [
            "ztT70ubhsc_9_image_figure7",
            "ztT70ubhsc_5_image_figure5",
            "ztT70ubhsc_3_image_figure4"
          ],
          "letters": ["C", "A", "B", "D"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"gamma effect\",\"claim\":{\"source\":\"text\",\"statement\":\"adjusts fine-grained details\"},\"evidence\":{\"source\":\"Figure 6\",\"statement\":\"shows minimal change\"}}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"color spectrum\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"Figure 6\",\"statement\":\"shows contradiction\"}}",
            "{\"letter\":\"A\",\"attribute\":\"gamma effect\",\"claim\":{\"source\":\"text\",\"statement\":\"affects fine-grained details\"},\"evidence\":{\"source\":\"Figure 6\",\"statement\":\"shows overall image composition\"}}",
            "{\"letter\":\"B\",\"attribute\":\"sketch complexity example\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should show similar change\"},\"evidence\":{\"source\":\"Figure 6\",\"statement\":\"shows different change\"}}"
          ],
          "letters": ["D", "C", "A", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"gamma effect\",\"target\":\"figure_6\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align detail representation\",\"reason\":\"minimal change\"}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"color spectrum\",\"target\":\"figure_6\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align color representation\",\"reason\":\"contradictory\"}",
            "{\"letter\":\"A\",\"attribute\":\"gamma effect\",\"target\":\"text\",\"other_involved\":\"figure_6\",\"action\":\"modify\",\"edit_statement\":\"align description\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"B\",\"attribute\":\"sketch complexity\",\"target\":\"figure_6\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align examples\",\"reason\":\"inconsistent\"}"
          ],
          "letters": ["D", "C", "A", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 6"]
    }
  ],
  "zgM66fu0wv": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 8,
          "image_id": "zgM66fu0wv_8_cddf0d04",
          "bbox": {
            "x": 0.17607003891050585,
            "y": 0.4571428571428572,
            "width": 0.6595330739299611,
            "height": 0.1894736842105263
          }
        },
        {
          "type": "image",
          "page": 8,
          "image_id": "zgM66fu0wv_8_03b55df5",
          "bbox": {
            "x": 0.17023346303501946,
            "y": 0.6428821993949718,
            "width": 0.6653696498054475,
            "height": 0.19849624060150375
          }
        }
      ],
      "review_text": "Tables 3, 4, 5, 6, and 7: The methods reported vary inconsistently. For instance, some tables include IRIS (Llama)-PC + VCR and IRIS-PC+VCR, while others list IRIS (Llama)-GES + VCR and IRIS-GES+VCR, and yet others have IRIS (Llama)-NOTEARS + VCR and IRIS-NOTEARS + VCR. It is scientifically unsound to selectively report results across datasets in this manner.",
      "category": "table-table",
      "description": "The tables should compare the performances on different graphs, but the tables also differ in used methods",
      "confidence": 2,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the first table consistent with the content of the second table?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the first table inconsistent with the content of the second table?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {
          "question": "zgM66fu0wv_8_cddf0d04",
          "correct": "zgM66fu0wv_8_03b55df5",
          "incorrect": [
            "zgM66fu0wv_8_table_table6",
            "zgM66fu0wv_7_table_table3",
            "zgM66fu0wv_7_table_table4"
          ],
          "letters": ["C", "B", "D", "A"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"methods evaluated\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"Table 4 and Table 5\",\"statement\":\"IRIS configurations differ\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"graph properties\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"Table 4 and Table 5\",\"statement\":\"5 edges, 4 nodes\"}}",
            "{\"letter\":\"A\",\"attribute\":\"NHD Ratio trend\",\"claim\":{\"source\":\"Table 4\",\"statement\":\"decreases across methods\"},\"evidence\":{\"source\":\"Table 5\",\"statement\":\"no decrease\"}}",
            "{\"letter\":\"D\",\"attribute\":\"F1 scores\",\"claim\":{\"source\":\"Table 4\",\"statement\":\"non-zero for IRIS-NOTEARS\"},\"evidence\":{\"source\":\"Table 5\",\"statement\":\"zero for IRIS-NOTEARS\"}}"
          ],
          "letters": ["C", "B", "A", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"methods evaluated\",\"target\":\"table_5\",\"other_involved\":\"table_4\",\"action\":\"modify\",\"edit_statement\":\"align consistency\",\"reason\":\"differs\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"structural properties\",\"target\":\"table_5\",\"other_involved\":\"table_4\",\"action\":\"modify\",\"edit_statement\":\"align consistency\",\"reason\":\"differs\"}",
            "{\"letter\":\"A\",\"attribute\":\"NHD Ratio\",\"target\":\"table_5\",\"other_involved\":\"table_4\",\"action\":\"modify\",\"edit_statement\":\"align consistency\",\"reason\":\"differs\"}",
            "{\"letter\":\"D\",\"attribute\":\"F1 scores\",\"target\":\"table_5\",\"other_involved\":\"table_4\",\"action\":\"modify\",\"edit_statement\":\"align values\",\"reason\":\"differs\"}"
          ],
          "letters": ["C", "B", "A", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 4", "Table 5"]
    }
  ],
  "zfIxlvKq4u": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 10,
          "image_id": "zfIxlvKq4u_10_904ae08a",
          "bbox": {
            "x": 0.541828793774319,
            "y": 0.3593984962406015,
            "width": 0.301556420233463,
            "height": 0.20300751879699247
          }
        }
      ],
      "review_text": "Figure 9: The caption states that the optimal trade-off is achieved by generating 128 tokens with the AR, but the plot does not show a maximum or minimum at this point. This inconsistency needs elaboration in the figure caption.",
      "category": "figure-caption",
      "description": "The caption states best tradeoff at 128 tokens, but plot does not show minimum or maximum there",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the caption of the figure consistent with the content of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the caption of the figure inconsistent with the content of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"optimal trade-off\",\"claim\":{\"source\":\"caption\",\"statement\":\"at 128 tokens\"},\"evidence\":{\"source\":\"plot\",\"statement\":\"no optimum at 128 tokens\"}}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"optimal trade-off\",\"claim\":{\"source\":\"caption\",\"statement\":\"at 128 tokens\"},\"evidence\":{\"source\":\"plot\",\"statement\":\"FID minimum at 192 tokens\"}}",
            "{\"letter\":\"A\",\"attribute\":\"red dashed line\",\"claim\":{\"source\":\"expectation\",\"statement\":\"connect related data points\"},\"evidence\":{\"source\":\"plot\",\"statement\":\"connects unrelated data points\"}}",
            "{\"letter\":\"D\",\"attribute\":\"optimal trade-off\",\"claim\":{\"source\":\"expectation\",\"statement\":\"shows convergence\"},\"evidence\":{\"source\":\"plot\",\"statement\":\"does not show convergence\"}}"
          ],
          "letters": ["B", "C", "A", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"optimal trade-off\",\"target\":\"caption_figure_9\",\"other_involved\":\"figure_9\",\"action\":\"modify\",\"edit_statement\":\"align token count\",\"reason\":\"inconsistent\"}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"optimal trade-off\",\"target\":\"caption_figure_9\",\"other_involved\":\"figure_9\",\"action\":\"modify\",\"edit_statement\":\"align FID minimum\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"A\",\"attribute\":\"red dashed line\",\"target\":\"figure_9\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"correct data points\",\"reason\":\"unrelated\"}",
            "{\"letter\":\"D\",\"attribute\":\"optimal trade-off\",\"target\":\"figure_9\",\"other_involved\":\"caption_figure_9\",\"action\":\"add\",\"edit_statement\":\"add convergence plot\",\"reason\":\"missing\"}"
          ],
          "letters": ["B", "C", "A", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 9"]
    }
  ],
  "zZU69H8tcr": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 2,
          "image_id": "zZU69H8tcr_2_4a21bbe4",
          "bbox": {
            "x": 0.17023346303501946,
            "y": 0.1037593984962406,
            "width": 0.6614785992217899,
            "height": 0.33082706766917297
          }
        },
        {
          "type": "text",
          "page": 4,
          "content": "Reward Function Given the above-mentioned state space and action space, the policy only needs\nto execute one step per episode. After pruning the model with the searched strategy, we obtain a\nmodel that meets the total pruning ratio P and subsequently evaluate the pruned model according\nto the task metric. Considering that our experiments are primarily performed on WikiText Merity\net al. (2016) and perplexity is used as the evaluation metric, we define the default reward function\nas R = $10/ppl$ , where ppl is the perplexity evaluated on the WikiText validation. We expect the final\nconvergence value to fall within the range of (1, 2), remaining within the same order of magnitude.\nBased on current LLM benchmarks, we set the coefficient of the reward function to 10",
          "line": 202
        }
      ],
      "review_text": "Figure 1: The reward function is defined as $1/ppl$ in the figure, but the reward function in the text states it as $10/ppl$.",
      "category": "figure-text",
      "description": "The text and the figure show different reward functions",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {
          "question": "Reward Function Given the above-mentioned state space and action space, the policy only needs\nto execute one step per episode. After pruning the model with the searched strategy, we obtain a\nmodel that meets the total pruning ratio P and subsequently evaluate the pruned model according\nto the task metric. Considering that our experiments are primarily performed on WikiText Merity\net al. (2016) and perplexity is used as the evaluation metric, we define the default reward function\nas R = $10/ppl$ , where ppl is the perplexity evaluated on the WikiText validation. We expect the final\nconvergence value to fall within the range of (1, 2), remaining within the same order of magnitude.\nBased on current LLM benchmarks, we set the coefficient of the reward function to 10",
          "correct": "zZU69H8tcr_2_4a21bbe4",
          "incorrect": [
            "zZU69H8tcr_2_image_figure2",
            "zZU69H8tcr_6_image_figure3",
            "zZU69H8tcr_7_image_figure5"
          ],
          "letters": ["D", "A", "C", "B"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"illustration\",\"claim\":{\"source\":\"expectation\",\"statement\":\"figure matches text\"},\"evidence\":{\"source\":\"figure_1\",\"statement\":\"differs from text\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"coefficient\",\"claim\":{\"source\":\"text\",\"statement\":\"coefficient is 1\"},\"evidence\":{\"source\":\"figure_1\",\"statement\":\"coefficient is 10\"}}",
            "{\"letter\":\"D\",\"attribute\":\"scissors\",\"claim\":{\"source\":\"expectation\",\"statement\":\"clear representation\"},\"evidence\":{\"source\":\"figure_1\",\"statement\":\"unclear representation\"}}",
            "{\"letter\":\"C\",\"attribute\":\"coefficient\",\"claim\":{\"source\":\"text\",\"statement\":\"coefficient is 10\"},\"evidence\":{\"source\":\"figure_1\",\"statement\":\"head count is 10\"}}"
          ],
          "letters": ["A", "B", "D", "C"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"reward function\",\"target\":\"figure_1\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align illustration\",\"reason\":\"different\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"coefficient\",\"target\":\"figure_1\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align coefficient\",\"reason\":\"different\"}",
            "{\"letter\":\"D\",\"attribute\":\"scissors\",\"target\":\"figure_1\",\"other_involved\":\"caption, text\",\"action\":\"add\",\"edit_statement\":\"explain representation\",\"reason\":\"unclear\"}",
            "{\"letter\":\"C\",\"attribute\":\"coefficient\",\"target\":\"figure_1\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"add reward function\",\"reason\":\"missing\"}"
          ],
          "letters": ["A", "B", "D", "C"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 1"]
    }
  ],
  "zPaTnGjgpa": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 5,
          "image_id": "zPaTnGjgpa_5_55c710d0",
          "bbox": {
            "x": 0.17412451361867703,
            "y": 0.0962406015037594,
            "width": 0.6614785992217899,
            "height": 0.20150375939849627
          }
        },
        {
          "type": "text",
          "page": 5,
          "content": "Interestingly, even after the instability is resolved, the similarity among individual eigenvectors fall while the subspace comparison remain largely similar",
          "line": 244
        }
      ],
      "review_text": "Line 244: 'Even after the instability is resolved, the similarity among individual eigenvectors fall while the subspace comparison remain largely similar': This doesn't seem to be true in Figure 2, as after the instability the similarity is close to 1.",
      "category": "figure-text",
      "description": "The similarities do not fall dramatically after the instability and recover, contrary to the text",
      "confidence": 1,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "Interestingly, even after the instability is resolved, the similarity among individual eigenvectors fall while the subspace comparison remain largely similar",
          "correct": "zPaTnGjgpa_5_55c710d0",
          "incorrect": [
            "zPaTnGjgpa_5_image_figure3",
            "zPaTnGjgpa_5_image_figure4",
            "zPaTnGjgpa_6_image_figure5"
          ],
          "letters": ["C", "A", "B", "D"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"individual eigenvectors similarity\",\"claim\":{\"source\":\"text\",\"statement\":\"fall\"},\"evidence\":{\"source\":\"Figure 2 (Top)\",\"statement\":\"recover to high values\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"subspace comparison\",\"claim\":{\"source\":\"text\",\"statement\":\"remains largely similar\"},\"evidence\":{\"source\":\"Figure 2 (Top)\",\"statement\":\"drops massively\"}}",
            "{\"letter\":\"C\",\"attribute\":\"subspace comparison\",\"claim\":{\"source\":\"text\",\"statement\":\"remain largely similar\"},\"evidence\":{\"source\":\"Figure 2 (Bottom)\",\"statement\":\"differ significantly\"}}",
            "{\"letter\":\"B\",\"attribute\":\"individual eigenvectors similarity\",\"claim\":{\"source\":\"text\",\"statement\":\"fall\"},\"evidence\":{\"source\":\"Figure 2 (Bottom)\",\"statement\":\"recover to high values\"}}"
          ],
          "letters": ["D", "A", "C", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"eigenvector similarity\",\"target\":\"text\",\"other_involved\":\"figure_2_top\",\"action\":\"modify\",\"edit_statement\":\"update recovery statement\",\"reason\":\"contradiction\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"subspace comparison\",\"target\":\"figure_2_top\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"update k=3 subspace similarity\",\"reason\":\"contradiction\"}",
            "{\"letter\":\"C\",\"attribute\":\"subspace comparison\",\"target\":\"text\",\"other_involved\":\"figure_2_bottom\",\"action\":\"modify\",\"edit_statement\":\"update similarity claim\",\"reason\":\"contradiction\"}",
            "{\"letter\":\"B\",\"attribute\":\"eigenvector similarity\",\"target\":\"text\",\"other_involved\":\"figure_2_bottom\",\"action\":\"modify\",\"edit_statement\":\"update recovery statement\",\"reason\":\"contradiction\"}"
          ],
          "letters": ["D", "A", "C", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 2"]
    },
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 7,
          "image_id": "zPaTnGjgpa_7_274c30ee",
          "bbox": {
            "x": 0.16828793774319065,
            "y": 0.4932330827067669,
            "width": 0.6692607003891051,
            "height": 0.22406015037593985
          }
        },
        {
          "type": "text",
          "page": 7,
          "content": "Validation accuracy across learning rates are shown in Figure 5. For both tasks, the mean accuracy\nremains relatively flat until \u03b7 goes past the stability threshold, where it sharply improves. This shift\nhighlights the immediate impact of instabilities, which provide notable generalization benefits, as\ndescribed in earlier sections.",
          "line": 371
        }
      ],
      "review_text": "Figure 5(b): The transition is not sharp, rather it's continuous, contradicting the authors' claim of a clear phase transition.",
      "category": "figure-text",
      "description": "The accuracy does not sharply improve right after crossing the threshold in 5(b)",
      "confidence": 2,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {
          "question": "Validation accuracy across learning rates are shown in Figure 5. For both tasks, the mean accuracy\nremains relatively flat until \u03b7 goes past the stability threshold, where it sharply improves. This shift\nhighlights the immediate impact of instabilities, which provide notable generalization benefits, as\ndescribed in earlier sections.",
          "correct": "zPaTnGjgpa_7_274c30ee",
          "incorrect": [
            "zPaTnGjgpa_7_image_figure6",
            "zPaTnGjgpa_7_image_figure7",
            "zPaTnGjgpa_8_image_figure8"
          ],
          "letters": ["C", "D", "A", "B"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"accuracy improvement\",\"claim\":{\"source\":\"text\",\"statement\":\"sharply improves\"},\"evidence\":{\"source\":\"figure_5b\",\"statement\":\"gradual increase\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"accuracy trend\",\"claim\":{\"source\":\"text\",\"statement\":\"remains flat\"},\"evidence\":{\"source\":\"figure_5b\",\"statement\":\"sharp upward trend\"}}",
            "{\"letter\":\"A\",\"attribute\":\"marker meaning\",\"claim\":{\"source\":\"caption\",\"statement\":\"X for below\"},\"evidence\":{\"source\":\"figure_5b\",\"statement\":\"X for above\"}}",
            "{\"letter\":\"D\",\"attribute\":\"stability impact\",\"claim\":{\"source\":\"expectation\",\"statement\":\"immediate impact\"},\"evidence\":{\"source\":\"figure_5a\",\"statement\":\"deteriorates before improving\"}}"
          ],
          "letters": ["C", "B", "A", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"accuracy improvement\",\"target\":\"figure_5b\",\"other_involved\":\"main_text\",\"action\":\"modify\",\"edit_statement\":\"align accuracy depiction\",\"reason\":\"discrepant growth\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"accuracy trend\",\"target\":\"figure_5b\",\"other_involved\":\"main_text\",\"action\":\"modify\",\"edit_statement\":\"align accuracy trend\",\"reason\":\"contradictory description\"}",
            "{\"letter\":\"A\",\"attribute\":\"marker explanation\",\"target\":\"figure_5b\",\"other_involved\":\"figure_caption\",\"action\":\"modify\",\"edit_statement\":\"align marker usage\",\"reason\":\"inconsistent labels\"}",
            "{\"letter\":\"D\",\"attribute\":\"stability impact\",\"target\":\"figure_5a\",\"other_involved\":\"main_text\",\"action\":\"modify\",\"edit_statement\":\"align initial effect\",\"reason\":\"differing outcomes\"}"
          ],
          "letters": ["C", "B", "A", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 5"]
    }
  ],
  "zLHP6QDWYp": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 4,
          "image_id": "zLHP6QDWYp_4_22b61dd5",
          "bbox": {
            "x": 0.1663424124513619,
            "y": 0.09473684210526315,
            "width": 0.6712062256809338,
            "height": 0.3112781954887218
          }
        },
        {
          "type": "image",
          "page": 6,
          "image_id": "zLHP6QDWYp_6_81e472c4",
          "bbox": {
            "x": 0.34922178988326846,
            "y": 0.4248370866130169,
            "width": 0.4844357976653696,
            "height": 0.02706766917293233
          }
        }
      ],
      "review_text": "Two-stage method claim (text) and Eq(8) (figure): The text suggests training labeled and unlabeled samples in different stages, but Eq(8) shows they are optimized together.",
      "category": "figure-equation",
      "description": "The figure shows training of labeled and unlabeled data in two stages, but equation (8) shows them being optimized at the same time",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the content of the equation?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the content of the equation?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {
          "question": "zLHP6QDWYp_4_22b61dd5",
          "correct": "zLHP6QDWYp_6_81e472c4",
          "incorrect": [
            "zLHP6QDWYp_5_interline-equation_equation3",
            "zLHP6QDWYp_5_interline-equation_equation17",
            "zLHP6QDWYp_5_interline-equation_equation9"
          ],
          "letters": ["B", "A", "C", "D"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"optimization process\",\"claim\":{\"source\":\"figure_1\",\"statement\":\"two-stage process\"},\"evidence\":{\"source\":\"equation_8\",\"statement\":\"simultaneous optimization\"}}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"logit adjustment\",\"claim\":{\"source\":\"figure_1\",\"statement\":\"unlabeled data only\"},\"evidence\":{\"source\":\"equation_8\",\"statement\":\"equal contribution\"}}",
            "{\"letter\":\"A\",\"attribute\":\"regularization term\",\"claim\":{\"source\":\"expectation\",\"statement\":\"in figure\"},\"evidence\":{\"source\":\"equation_8\",\"statement\":\"L_{pair} is term\"}}",
            "{\"letter\":\"B\",\"attribute\":\"loss term\",\"claim\":{\"source\":\"expectation\",\"statement\":\"distinct term\"},\"evidence\":{\"source\":\"equation_8\",\"statement\":\"no distinct term\"}}"
          ],
          "letters": ["C", "D", "A", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"optimization process\",\"target\":\"equation_8\",\"other_involved\":\"figure_1\",\"action\":\"modify\",\"edit_statement\":\"reflect two-stage adjustment\",\"reason\":\"different\"}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"logit adjustment\",\"target\":\"equation_8\",\"other_involved\":\"figure_1\",\"action\":\"modify\",\"edit_statement\":\"reflect unlabeled data only\",\"reason\":\"different\"}",
            "{\"letter\":\"A\",\"attribute\":\"regularization term\",\"target\":\"figure_1\",\"other_involved\":\"equation_8\",\"action\":\"replace\",\"edit_statement\":\"L_{pair} with L_{b_ce}\",\"reason\":\"different\"}",
            "{\"letter\":\"B\",\"attribute\":\"pseudo-labeling process\",\"target\":\"equation_8\",\"other_involved\":\"figure_1\",\"action\":\"add\",\"edit_statement\":\"loss term\",\"reason\":\"missing\"}"
          ],
          "letters": ["C", "D", "A", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 1", "(8)"]
    },
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 4,
          "image_id": "zLHP6QDWYp_4_0b31c6e2",
          "bbox": {
            "x": 0.17023346303501946,
            "y": 0.0962406015037594,
            "width": 0.6653696498054475,
            "height": 0.3142857142857143
          }
        }
      ],
      "review_text": "Figure 1: The '?' symbol is used to represent unlabeled samples, but the labeled branch also includes a depiction of unlabeled samples, which is inconsistent with the typical understanding of labeled and unlabeled data.",
      "category": "figure-only",
      "description": "The labeled data also contains \"?\" symbolizing unlabeled data",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the figure that is consistent with a different part of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the figure that is inconsistent with a different part of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"labeling\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should not have question mark\"},\"evidence\":{\"source\":\"Figure 1\",\"statement\":\"question mark in labeled data\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"data flow\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be clear\"},\"evidence\":{\"source\":\"Figure 1\",\"statement\":\"question mark in unlabeled data\"}}",
            "{\"letter\":\"A\",\"attribute\":\"distribution\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should not change\"},\"evidence\":{\"source\":\"bar plot\",\"statement\":\"changes significantly\"}}",
            "{\"letter\":\"D\",\"attribute\":\"color\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"Figure 1\",\"statement\":\"different colors in pseudo-label data\"}}"
          ],
          "letters": ["C", "B", "A", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"question mark\",\"target\":\"Labeled data (Figure 1)\",\"other_involved\":\"Unlabeled data (Figure 1)\",\"action\":\"remove\",\"edit_statement\":\"question mark\",\"reason\":\"inconsistent\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"question mark\",\"target\":\"Unlabeled data (Figure 1)\",\"other_involved\":\"Second-stage Logit Adjustment\",\"action\":\"remove\",\"edit_statement\":\"question mark\",\"reason\":\"contradictory\"}",
            "{\"letter\":\"A\",\"attribute\":\"distributions\",\"target\":\"bar plot\",\"other_involved\":\"Labeled data, Unlabeled data, logit adjustment stages\",\"action\":\"modify\",\"edit_statement\":\"align distributions\",\"reason\":\"different\"}",
            "{\"letter\":\"D\",\"attribute\":\"colors\",\"target\":\"Pseudo-label data\",\"other_involved\":\"Known-class Dataset, Novel-class Dataset\",\"action\":\"modify\",\"edit_statement\":\"align bar colors\",\"reason\":\"different\"}"
          ],
          "letters": ["C", "B", "A", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 1"]
    }
  ],
  "zAogQOIphH": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 4,
          "image_id": "zAogQOIphH_4_3fce3a55",
          "bbox": {
            "x": 0.17023346303501946,
            "y": 0.09473684210526315,
            "width": 0.6712062256809338,
            "height": 0.29172932330827067
          }
        }
      ],
      "review_text": "Figure 2(a): The SMSD Module is labeled both as frozen and trainable, which can lead to confusion.",
      "category": "figure-only",
      "description": "The SMSD Module seems to have trainable and frozen weights at the same time",
      "confidence": 2,
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the figure that is consistent with a different part of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the figure that is inconsistent with a different part of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"icons\",\"claim\":{\"source\":\"expectation\",\"statement\":\"shouldn't be both\"},\"evidence\":{\"source\":\"figure_2\",\"statement\":\"both icons shown\"}}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"BERT component\",\"claim\":{\"source\":\"expectation\",\"statement\":\"frozen, no train path\"},\"evidence\":{\"source\":\"figure_2\",\"statement\":\"train path to BERT\"}}",
            "{\"letter\":\"B\",\"attribute\":\"decoder output\",\"claim\":{\"source\":\"expectation\",\"statement\":\"is input to generator\"},\"evidence\":{\"source\":\"figure_2c\",\"statement\":\"decoder output to generator\"}}",
            "{\"letter\":\"D\",\"attribute\":\"SMSD Module\",\"claim\":{\"source\":\"expectation\",\"statement\":\"consistent contribution\"},\"evidence\":{\"source\":\"figure_2ab\",\"statement\":\"different contributions\"}}"
          ],
          "letters": ["A", "C", "B", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"icons\",\"target\":\"SMSD_Module\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"remove one\",\"reason\":\"simultaneous\"}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"BERT_component\",\"target\":\"SMSD_Module\",\"other_involved\":\"train_path\",\"action\":\"remove\",\"edit_statement\":\"remove train path\",\"reason\":\"frozen\"}",
            "{\"letter\":\"B\",\"attribute\":\"Decoder_output\",\"target\":\"figure_2c\",\"other_involved\":\"Generator\",\"action\":\"add\",\"edit_statement\":\"connect to Generator\",\"reason\":\"input\"}",
            "{\"letter\":\"D\",\"attribute\":\"SMSD_Module\",\"target\":\"figure_2a\",\"other_involved\":\"figure_2b\",\"action\":\"modify\",\"edit_statement\":\"change contribution\",\"reason\":\"different\"}"
          ],
          "letters": ["A", "C", "B", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 2"]
    }
  ],
  "ytn0rbIfOx": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 7,
          "image_id": "ytn0rbIfOx_7_cd74b19b",
          "bbox": {
            "x": 0.17412451361867703,
            "y": 0.0962406015037594,
            "width": 0.6595330739299611,
            "height": 0.23458646616541354
          }
        }
      ],
      "review_text": "Table 1: LLM-F shows the best performance rather than ToT on task 8, which means the bolding is incorrect. This suggests that ToT did not achieve SOTA performance across all tasks.",
      "category": "table-only",
      "description": "The best model on task 8 is LLM-F, not TOT. Yet TOT is bolded",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the table that is consistent with a different part of the table?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the table that is inconsistent with a different part of the table?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"highlighted value\",\"claim\":{\"source\":\"expectation\",\"statement\":\"match best value\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"wrong value highlighted\"}}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"highlighted best values\",\"claim\":{\"source\":\"expectation\",\"statement\":\"one best value\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"multiple best values\"}}",
            "{\"letter\":\"B\",\"attribute\":\"variants\",\"claim\":{\"source\":\"caption\",\"statement\":\"F and V variants\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"LLM missing V variant\"}}",
            "{\"letter\":\"C\",\"attribute\":\"bolded method\",\"claim\":{\"source\":\"data\",\"statement\":\"RS-V is best\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"TOT bolded\"}}"
          ],
          "letters": ["A", "D", "B", "C"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"highlighted value\",\"target\":\"table_1\",\"other_involved\":\"Task 8\",\"action\":\"modify\",\"edit_statement\":\"correct value\",\"reason\":\"wrong value\"}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"highlighted value\",\"target\":\"table_1\",\"other_involved\":\"Task 2\",\"action\":\"add\",\"edit_statement\":\"missing highlighting\",\"reason\":\"two best values\"}",
            "{\"letter\":\"B\",\"attribute\":\"variants\",\"target\":\"caption\",\"other_involved\":\"LLM\",\"action\":\"modify\",\"edit_statement\":\"align variant count\",\"reason\":\"mismatch\"}",
            "{\"letter\":\"C\",\"attribute\":\"bolding\",\"target\":\"table_1\",\"other_involved\":\"Task 8, TOT, RS-V\",\"action\":\"modify\",\"edit_statement\":\"bold correct method\",\"reason\":\"wrong method bolded\"}"
          ],
          "letters": ["A", "D", "B", "C"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 1"]
    }
  ],
  "yJduhi9mDQ": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 9,
          "image_id": "yJduhi9mDQ_9_84000b03",
          "bbox": {
            "x": 0.16828793774319065,
            "y": 0.07819548872180451,
            "width": 0.6634241245136187,
            "height": 0.4150375939849624
          }
        },
        {
          "type": "text",
          "page": 3,
          "content": "H\u00f6lder Pruning effectively integrates these feature extractors without compromising model performance while also achieving robust defense against backdoor attacks.",
          "line": 108
        }
      ],
      "review_text": "Table 1: H\u00f6lder Pruning is claimed to act without compromising model performance (lines 108-109), but the results in Tables 1 and 2 show a degradation of the natural performance.",
      "category": "table-text",
      "description": "The accuracy using H\u00f6lder Pruning is worse than some of the other SOTA methods, but caption says is consistently outperforms SOTA",
      "confidence": 1,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the table consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the table inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {
          "question": "H\u00f6lder Pruning effectively integrates these feature extractors without compromising model performance while also achieving robust defense against backdoor attacks.",
          "correct": "yJduhi9mDQ_9_84000b03",
          "incorrect": [
            "yJduhi9mDQ_9_table_table4",
            "yJduhi9mDQ_21_table_table7",
            "yJduhi9mDQ_21_table_table6"
          ],
          "letters": ["B", "D", "C", "A"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"average classification accuracy\",\"claim\":{\"source\":\"caption\",\"statement\":\"outperforms SOTA\"},\"evidence\":{\"source\":\"Table 2\",\"statement\":\"lower or comparable to SOTA\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"robust defense\",\"claim\":{\"source\":\"text\",\"statement\":\"achieves robust defense\"},\"evidence\":{\"source\":\"Table 2\",\"statement\":\"not consistently highest RA\"}}",
            "{\"letter\":\"C\",\"attribute\":\"Attack Success Rate\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"Table 2\",\"statement\":\"occasionally higher ASR\"}}",
            "{\"letter\":\"D\",\"attribute\":\"classification accuracy\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be superior\"},\"evidence\":{\"source\":\"Table 2\",\"statement\":\"worse for GTSRB\"}}"
          ],
          "letters": ["A", "B", "C", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"accuracy\",\"target\":\"table_2\",\"other_involved\":\"caption\",\"action\":\"modify\",\"edit_statement\":\"align SOTA\",\"reason\":\"lower\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"robust accuracy\",\"target\":\"table_2\",\"other_involved\":\"caption\",\"action\":\"modify\",\"edit_statement\":\"align highest\",\"reason\":\"not highest\"}",
            "{\"letter\":\"C\",\"attribute\":\"attack success rate\",\"target\":\"table_2\",\"other_involved\":\"caption\",\"action\":\"modify\",\"edit_statement\":\"align outperformance\",\"reason\":\"higher\"}",
            "{\"letter\":\"D\",\"attribute\":\"accuracy\",\"target\":\"table_2\",\"other_involved\":\"caption\",\"action\":\"modify\",\"edit_statement\":\"align outperformance\",\"reason\":\"worse\"}"
          ],
          "letters": ["A", "B", "C", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 2"]
    }
  ],
  "yJAk0n0NyU": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 9,
          "image_id": "yJAk0n0NyU_9_ba5e5705",
          "bbox": {
            "x": 0.17607003891050585,
            "y": 0.744360902255639,
            "width": 0.6517509727626459,
            "height": 0.15037593984962405
          }
        }
      ],
      "review_text": "Table 4: The improvement of BlockDance-Ada over BlockDance seems limited, contradicting the claim that the adaptive reuse approach is a highlight of the paper.",
      "category": "figure-only",
      "description": "The claim of BlockDance-Ada being a better trade-off is not clear comparing it to BlockDance",
      "confidence": 2,
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the figure that is consistent with a different part of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the figure that is inconsistent with a different part of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"trade-off\",\"claim\":{\"source\":\"caption\",\"statement\":\"better trade-off\"},\"evidence\":{\"source\":\"table_4\",\"statement\":\"ambiguous trade-off\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"latency\",\"claim\":{\"source\":\"expectation\",\"statement\":\"lowest latency\"},\"evidence\":{\"source\":\"table_4\",\"statement\":\"not lowest latency\"}}",
            "{\"letter\":\"B\",\"attribute\":\"quality metrics\",\"claim\":{\"source\":\"expectation\",\"statement\":\"better or equal quality\"},\"evidence\":{\"source\":\"table_4\",\"statement\":\"inferior quality\"}}",
            "{\"letter\":\"D\",\"attribute\":\"latency improvement\",\"claim\":{\"source\":\"expectation\",\"statement\":\"related to absolute latency\"},\"evidence\":{\"source\":\"table_4\",\"statement\":\"unrelated to absolute latency\"}}"
          ],
          "letters": ["C", "A", "B", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"performance\",\"target\":\"table_4\",\"other_involved\":\"BlockDance-Ada, BlockDance (N=2), BlockDance (N=3)\",\"action\":\"modify\",\"edit_statement\":\"clarify trade-off\",\"reason\":\"ambiguous trade-off\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"latency\",\"target\":\"table_4\",\"other_involved\":\"BlockDance-Ada\",\"action\":\"modify\",\"edit_statement\":\"align claim\",\"reason\":\"contradicts claim\"}",
            "{\"letter\":\"B\",\"attribute\":\"quality metrics\",\"target\":\"table_4\",\"other_involved\":\"BlockDance-Ada\",\"action\":\"modify\",\"edit_statement\":\"align quality\",\"reason\":\"inferior quality\"}",
            "{\"letter\":\"D\",\"attribute\":\"latency improvement percentages\",\"target\":\"table_4\",\"other_involved\":\"BlockDance (N=2), BlockDance (N=3)\",\"action\":\"modify\",\"edit_statement\":\"reconcile percentages\",\"reason\":\"unrelated values\"}"
          ],
          "letters": ["C", "A", "B", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 4"]
    }
  ],
  "yEnJvc7ogD": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 10,
          "image_id": "yEnJvc7ogD_10_1042f8d4",
          "bbox": {
            "x": 0.17023346303501946,
            "y": 0.4616541353383458,
            "width": 0.7101167315175096,
            "height": 0.16090225563909774
          }
        }
      ],
      "review_text": "Figure 5: This figure is a table, but it is not presented individually, which is confusing and goes against the standard format for presenting tables.",
      "category": "figure-only",
      "description": "Figure is not a figure, but a table",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the figure that is consistent with a different part of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the figure that is inconsistent with a different part of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"figure type\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be graphical\"},\"evidence\":{\"source\":\"Figure 5\",\"statement\":\"presented as tables\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"parts\",\"claim\":{\"source\":\"caption\",\"statement\":\"left and right\"},\"evidence\":{\"source\":\"figure\",\"statement\":\"not distinguished\"}}",
            "{\"letter\":\"C\",\"attribute\":\"terms\",\"claim\":{\"source\":\"caption\",\"statement\":\"label shift and label noise\"},\"evidence\":{\"source\":\"tables\",\"statement\":\"presented differently\"}}",
            "{\"letter\":\"B\",\"attribute\":\"values\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"tables\",\"statement\":\"swapped\"}}"
          ],
          "letters": ["D", "A", "C", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"content type\",\"target\":\"figure_5\",\"other_involved\":null,\"action\":\"replace\",\"edit_statement\":\"convert\",\"reason\":\"inconsistent\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"figure parts\",\"target\":\"figure_5\",\"other_involved\":\"figure_5_caption\",\"action\":\"add\",\"edit_statement\":\"distinguish figure parts\",\"reason\":\"not present\"}",
            "{\"letter\":\"C\",\"attribute\":\"method names\",\"target\":\"figure_5_caption\",\"other_involved\":\"figure_5_caption\",\"action\":\"modify\",\"edit_statement\":\"swap\",\"reason\":\"swapped\"}",
            "{\"letter\":\"B\",\"attribute\":\"values\",\"target\":\"table_right\",\"other_involved\":\"table_left\",\"action\":\"replace\",\"edit_statement\":\"swap\",\"reason\":\"swapped\"}"
          ],
          "letters": ["D", "A", "C", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 5"]
    },
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 25,
          "image_id": "yEnJvc7ogD_25_8cf6dc4f",
          "bbox": {
            "x": 0.25,
            "y": 0.5052631578947369,
            "width": 0.5038910505836576,
            "height": 0.17744360902255638
          }
        },
        {
          "type": "image",
          "page": 10,
          "image_id": "yEnJvc7ogD_10_b0aed8d0",
          "bbox": {
            "x": 0.17217898832684825,
            "y": 0.4594235527784305,
            "width": 0.708171206225681,
            "height": 0.16090225563909774
          }
        }
      ],
      "review_text": "Figure 5 table ANLI with label noise results for G-mean do not match results in Figure 13 in the appendix. In Figure 13, Clean baseline always outperforms CWPLUGIN for all Validation Set sizes where as in Figure 5, Clean has lower mean of 0.528 as compared to 0.541 of CWPLUGIN.",
      "category": "figure-table",
      "description": "The G-mean in the table shows CWPLUGIN outperforming Clean, but Clean outperforms CWPLUGIN in the plot",
      "confidence": 2,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the content of the table?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the content of the table?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "yEnJvc7ogD_25_8cf6dc4f",
          "correct": "yEnJvc7ogD_10_b0aed8d0",
          "incorrect": [
            "yEnJvc7ogD_9_image_figure4",
            "yEnJvc7ogD_8_image_figure3",
            "yEnJvc7ogD_7_image_figure2"
          ],
          "letters": ["D", "B", "C", "A"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"G-mean\",\"claim\":{\"source\":\"Figure 5 (Right)\",\"statement\":\"higher for CWPLUGIN\"},\"evidence\":{\"source\":\"Figure 13\",\"statement\":\"Clean is higher\"}}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"F-measure\",\"claim\":{\"source\":\"Figure 5 (Left)\",\"statement\":\"higher for CWPLUGIN\"},\"evidence\":{\"source\":\"Figure 13\",\"statement\":\"not confirmed\"}}",
            "{\"letter\":\"A\",\"attribute\":\"G-mean values\",\"claim\":{\"source\":\"caption\",\"statement\":\"performs favorably\"},\"evidence\":{\"source\":\"plot\",\"statement\":\"narrow range\"}}",
            "{\"letter\":\"B\",\"attribute\":\"standard deviations\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be visualized\"},\"evidence\":{\"source\":\"plot\",\"statement\":\"not visualized\"}}"
          ],
          "letters": ["D", "C", "A", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"G-mean\",\"target\":\"figure_13\",\"other_involved\":\"figure_5_right\",\"action\":\"modify\",\"edit_statement\":\"CWPLUGIN G-mean bar position\",\"reason\":\"conflicting\"}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"F-measure\",\"target\":\"figure_13\",\"other_involved\":\"figure_5_left\",\"action\":\"modify\",\"edit_statement\":\"align CWPLUGIN F-measure\",\"reason\":\"not aligned\"}",
            "{\"letter\":\"A\",\"attribute\":\"G-mean values\",\"target\":\"figure_13\",\"other_involved\":\"figure_5_right\",\"action\":\"add\",\"edit_statement\":\"data points for CWPLUGIN\",\"reason\":\"not enough data points\"}",
            "{\"letter\":\"B\",\"attribute\":\"standard deviations\",\"target\":\"figure_13\",\"other_involved\":\"figure_5_right\",\"action\":\"add\",\"edit_statement\":\"show G-mean standard deviations\",\"reason\":\"not shown\"}"
          ],
          "letters": ["D", "C", "A", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 13", "Figure 5"]
    }
  ],
  "yDICgRUj5s": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 10,
          "image_id": "yDICgRUj5s_10_4b747cbe",
          "bbox": {
            "x": 0.49513618677042803,
            "y": 0.32631578947368417,
            "width": 0.3385214007782101,
            "height": 0.2646616541353384
          }
        }
      ],
      "review_text": "Figure 6: The legend seems to have a bug.",
      "category": "figure-only",
      "description": "The pattern of the bars is not consistent",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the figure that is consistent with a different part of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the figure that is inconsistent with a different part of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"legend\",\"claim\":{\"source\":\"expectation\",\"statement\":\"match pattern\"},\"evidence\":{\"source\":\"figure_6\",\"statement\":\"does not match\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"pattern\",\"claim\":{\"source\":\"expectation\",\"statement\":\"dotted pattern\"},\"evidence\":{\"source\":\"figure_6\",\"statement\":\"solid color\"}}",
            "{\"letter\":\"C\",\"attribute\":\"pattern\",\"claim\":{\"source\":\"expectation\",\"statement\":\"stripes\"},\"evidence\":{\"source\":\"figure_6\",\"statement\":\"dots\"}}",
            "{\"letter\":\"D\",\"attribute\":\"color\",\"claim\":{\"source\":\"expectation\",\"statement\":\"green\"},\"evidence\":{\"source\":\"figure_6\",\"statement\":\"orange\"}}"
          ],
          "letters": ["B", "A", "C", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"patterns\",\"target\":\"figure_6\",\"other_involved\":\"legend\",\"action\":\"modify\",\"edit_statement\":\"match legend\",\"reason\":\"mismatched\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"bars\",\"target\":\"figure_6\",\"other_involved\":\"legend\",\"action\":\"modify\",\"edit_statement\":\"match pattern color\",\"reason\":\"mismatched\"}",
            "{\"letter\":\"C\",\"attribute\":\"pattern\",\"target\":\"figure_6\",\"other_involved\":\"legend\",\"action\":\"modify\",\"edit_statement\":\"dots to stripes\",\"reason\":\"stripes instead of dots\"}",
            "{\"letter\":\"D\",\"attribute\":\"color\",\"target\":\"figure_6\",\"other_involved\":\"legend\",\"action\":\"modify\",\"edit_statement\":\"orange to green\",\"reason\":\"green instead of orange\"}"
          ],
          "letters": ["B", "A", "C", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 6"]
    }
  ],
  "y15LAM4u0A": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 3,
          "image_id": "y15LAM4u0A_3_c26f331e",
          "bbox": {
            "x": 0.16828793774319065,
            "y": 0.10075187969924813,
            "width": 0.7354085603112841,
            "height": 0.1849624060150376
          }
        },
        {
          "type": "image",
          "page": 5,
          "image_id": "y15LAM4u0A_5_ca385fdd",
          "bbox": {
            "x": 0.2538910505836576,
            "y": 0.09701753774083648,
            "width": 0.4941634241245136,
            "height": 0.4330827067669173
          }
        }
      ],
      "review_text": "Table 1: The authors assert that the scene is crafted from real city maps, but the quality of the assets and rendered images does not seem realistic enough to justify this claim.",
      "category": "figure-table",
      "description": "The table claims a real environment, but the figure show images of an artificial world",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the content of the table?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the content of the table?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {
          "question": "y15LAM4u0A_3_c26f331e",
          "correct": "y15LAM4u0A_5_ca385fdd",
          "incorrect": [
            "y15LAM4u0A_6_image_figure6",
            "y15LAM4u0A_6_image_figure5",
            "y15LAM4u0A_4_image_figure4"
          ],
          "letters": ["C", "A", "D", "B"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"environment\",\"claim\":{\"source\":\"table_1\",\"statement\":\"Real\"},\"evidence\":{\"source\":\"figure_3\",\"statement\":\"synthetic\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"agent type\",\"claim\":{\"source\":\"table_1\",\"statement\":\"All\"},\"evidence\":{\"source\":\"figure_3\",\"statement\":\"only aerial view\"}}",
            "{\"letter\":\"C\",\"attribute\":\"agent type\",\"claim\":{\"source\":\"table_1\",\"statement\":\"All\"},\"evidence\":{\"source\":\"figure_3\",\"statement\":\"only drone view\"}}",
            "{\"letter\":\"D\",\"attribute\":\"unreal engine\",\"claim\":{\"source\":\"expectation\",\"statement\":\"look like UE\"},\"evidence\":{\"source\":\"figure_3\",\"statement\":\"does not look like UE\"}}"
          ],
          "letters": ["B", "A", "C", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"environment type\",\"target\":\"table_1\",\"other_involved\":\"figure_3\",\"action\":\"modify\",\"edit_statement\":\"update environment description\",\"reason\":\"inconsistent\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"agent types\",\"target\":\"table_1\",\"other_involved\":\"figure_3\",\"action\":\"modify\",\"edit_statement\":\"update supported agents\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"C\",\"attribute\":\"agent types\",\"target\":\"table_1\",\"other_involved\":\"figure_3\",\"action\":\"modify\",\"edit_statement\":\"update supported agents\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"D\",\"attribute\":\"engine\",\"target\":\"table_1\",\"other_involved\":\"figure_3\",\"action\":\"modify\",\"edit_statement\":\"update engine information\",\"reason\":\"inconsistent\"}"
          ],
          "letters": ["B", "A", "C", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 1", "Figure 3"]
    }
  ],
  "xcPN6Or88c": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 7,
          "image_id": "xcPN6Or88c_7_60cb3da9",
          "bbox": {
            "x": 0.1780155642023346,
            "y": 0.09924812030075188,
            "width": 0.6556420233463035,
            "height": 0.43609022556390975
          }
        }
      ],
      "review_text": "Table 2: The last column showing imputation results with the mean/median baseline is identical, which is not expected and needs clarification.",
      "category": "figure-only",
      "description": "The last column of the table has the same value across different mask rates",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the figure that is consistent with a different part of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the figure that is inconsistent with a different part of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"MSE and MAE values\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should vary\"},\"evidence\":{\"source\":\"Table 2\",\"statement\":\"identical\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"MSE trend\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should increase\"},\"evidence\":{\"source\":\"Table 2\",\"statement\":\"decreases\"}}",
            "{\"letter\":\"D\",\"attribute\":\"best and second best\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should not be swapped\"},\"evidence\":{\"source\":\"Table 2\",\"statement\":\"swapped\"}}",
            "{\"letter\":\"B\",\"attribute\":\"notation\",\"claim\":{\"source\":\"expectation\",\"statement\":\"underline best\"},\"evidence\":{\"source\":\"caption\",\"statement\":\"bold best\"}}"
          ],
          "letters": ["C", "A", "D", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"MSE and MAE values\",\"target\":\"table_2\",\"other_involved\":\"'Mean/Median Transformer' column\",\"action\":\"modify\",\"edit_statement\":\"correct values\",\"reason\":\"identical\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"MSE values\",\"target\":\"table_2\",\"other_involved\":\"'Average' row, 'InputeINR'\",\"action\":\"modify\",\"edit_statement\":\"correct trend\",\"reason\":\"incorrect trend\"}",
            "{\"letter\":\"D\",\"attribute\":\"best and second best values\",\"target\":\"table_2\",\"other_involved\":\"'ETT' row\",\"action\":\"swap\",\"edit_statement\":\"swap values\",\"reason\":\"swapped\"}",
            "{\"letter\":\"B\",\"attribute\":\"display convention\",\"target\":\"table_2_caption\",\"other_involved\":\"table_2\",\"action\":\"modify\",\"edit_statement\":\"update convention\",\"reason\":\"contradictory\"}"
          ],
          "letters": ["C", "A", "D", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 2"]
    },
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 7,
          "image_id": "xcPN6Or88c_7_98117fef",
          "bbox": {
            "x": 0.17412451361867703,
            "y": 0.09473684210526315,
            "width": 0.6634241245136187,
            "height": 0.443609022556391
          }
        }
      ],
      "review_text": "Table 2: Mask rate is represented inconsistently, e.g., 10% or 0.1.",
      "category": "figure-only",
      "description": "The mask rate is sometimes reported as percentage, sometimes as decimals",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the figure that is consistent with a different part of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the figure that is inconsistent with a different part of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"Mask Rate format\",\"claim\":{\"source\":\"expectation\",\"statement\":\"consistent format\"},\"evidence\":{\"source\":\"Table 2\",\"statement\":\"percentages and decimals\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"Mask Rate values\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be 10, 30, 50, 70, 90\"},\"evidence\":{\"source\":\"Table 2\",\"statement\":\"includes 20 and 40\"}}",
            "{\"letter\":\"D\",\"attribute\":\"Mask Rate values\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should include all values\"},\"evidence\":{\"source\":\"Table 2\",\"statement\":\"omits some values\"}}",
            "{\"letter\":\"B\",\"attribute\":\"formatting rules\",\"claim\":{\"source\":\"expectation\",\"statement\":\"best in bold\"},\"evidence\":{\"source\":\"Table 2\",\"statement\":\"incorrectly applied\"}}"
          ],
          "letters": ["C", "A", "D", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"mask rate values\",\"target\":\"table_2\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"align value format\",\"reason\":\"inconsistent\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"mask rate values\",\"target\":\"table_2\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"align values shown\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"D\",\"attribute\":\"mask rate values\",\"target\":\"table_2\",\"other_involved\":null,\"action\":\"add\",\"edit_statement\":\"include missing values\",\"reason\":\"incomplete\"}",
            "{\"letter\":\"B\",\"attribute\":\"formatting rules\",\"target\":\"table_2\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"align formatting\",\"reason\":\"inconsistent\"}"
          ],
          "letters": ["C", "A", "D", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 2"]
    }
  ],
  "xawA8X5dHq": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 8,
          "image_id": "xawA8X5dHq_8_9308d25b",
          "bbox": {
            "x": 0.16828793774319065,
            "y": 0.09924812030075188,
            "width": 0.6770428015564202,
            "height": 0.5804511278195489
          }
        }
      ],
      "review_text": "Figure 2: The x-axes are not on the same scale, which makes the comparison between models less informative.",
      "category": "figure-only",
      "description": "The two plots have different x-axis scalin, making comparison difficult",
      "confidence": 2,
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the figure that is consistent with a different part of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the figure that is inconsistent with a different part of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"x-axis max value\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be same\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"different\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"y-axis range\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be same\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"different\"}}",
            "{\"letter\":\"B\",\"attribute\":\"y-axis spacing\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be uniform\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"not uniform\"}}",
            "{\"letter\":\"C\",\"attribute\":\"x-axis label\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be present\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"missing\"}}"
          ],
          "letters": ["D", "A", "B", "C"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"x-axis maximum\",\"target\":\"figure_2\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"x-axis maximum\",\"reason\":\"different\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"y-axis range\",\"target\":\"figure_2\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"y-axis range\",\"reason\":\"different\"}",
            "{\"letter\":\"B\",\"attribute\":\"y-axis intervals\",\"target\":\"figure_2\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"y-axis intervals\",\"reason\":\"not uniform\"}",
            "{\"letter\":\"C\",\"attribute\":\"x-axis labels\",\"target\":\"figure_2\",\"other_involved\":null,\"action\":\"add\",\"edit_statement\":\"x-axis labels\",\"reason\":\"missing\"}"
          ],
          "letters": ["D", "A", "B", "C"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 2"]
    }
  ],
  "xaXvHdH9Y4": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 4,
          "image_id": "xaXvHdH9Y4_4_e2b6e1d8",
          "bbox": {
            "x": 0.16439688715953307,
            "y": 0.21804511278195488,
            "width": 0.3404669260700389,
            "height": 0.4466165413533835
          }
        },
        {
          "type": "image",
          "page": 4,
          "image_id": "xaXvHdH9Y4_4_14c0f217",
          "bbox": {
            "x": 0.5029182879377432,
            "y": 0.22333332721452068,
            "width": 0.33463035019455256,
            "height": 0.4616541353383458
          }
        }
      ],
      "review_text": "Fig 2: For layer 2, S768 is still selected for pruning, contradicting the explanation in Fig 1 where a hidden state already marked for pruning in the previous layer should not be pruned again in the current layer.",
      "category": "figure-figure",
      "description": "The flow diagram shows that a selected to be pruned state should not be pruned again, but this happens to S768 in Figure 1",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the first figure consistent with the content of the second figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the first figure inconsistent with the content of the second figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "xaXvHdH9Y4_4_e2b6e1d8",
          "correct": "xaXvHdH9Y4_4_14c0f217",
          "incorrect": [
            "xaXvHdH9Y4_4_image_figure3",
            "xaXvHdH9Y4_5_image_figure6",
            "xaXvHdH9Y4_5_image_figure5"
          ],
          "letters": ["C", "B", "A", "D"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"hidden state removal\",\"claim\":{\"source\":\"figure_1\",\"statement\":\"not marked again\"},\"evidence\":{\"source\":\"figure_2\",\"statement\":\"marked again\"}}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"hidden states pruned\",\"claim\":{\"source\":\"figure_1\",\"statement\":\"one state\"},\"evidence\":{\"source\":\"figure_2\",\"statement\":\"multiple states\"}}",
            "{\"letter\":\"D\",\"attribute\":\"last hidden state pruning\",\"claim\":{\"source\":\"figure_1\",\"statement\":\"not pruned\"},\"evidence\":{\"source\":\"figure_2\",\"statement\":\"pruned\"}}",
            "{\"letter\":\"B\",\"attribute\":\"pruning layer\",\"claim\":{\"source\":\"caption\",\"statement\":\"Layer 2\"},\"evidence\":{\"source\":\"figure_2\",\"statement\":\"Layer 1\"}}"
          ],
          "letters": ["A", "C", "D", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"hidden state\",\"target\":\"figure_2\",\"other_involved\":\"figure_1\",\"action\":\"modify\",\"edit_statement\":\"align S768 pruning\",\"reason\":\"marked twice\"}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"pruned states\",\"target\":\"figure_2\",\"other_involved\":\"figure_1\",\"action\":\"add\",\"edit_statement\":\"add S1, S3, S767\",\"reason\":\"more than one\"}",
            "{\"letter\":\"D\",\"attribute\":\"last hidden state\",\"target\":\"figure_2\",\"other_involved\":\"figure_1\",\"action\":\"modify\",\"edit_statement\":\"align S768 pruning\",\"reason\":\"should not prune\"}",
            "{\"letter\":\"B\",\"attribute\":\"pruning layer\",\"target\":\"figure_2\",\"other_involved\":\"caption\",\"action\":\"modify\",\"edit_statement\":\"align S768 layer\",\"reason\":\"contradicts caption\"}"
          ],
          "letters": ["A", "C", "D", "B"]
        }
      },
      "severity": 1,
      "visual_elements": ["Figure 1", "Figure 2"]
    }
  ],
  "wq4AeBWQJ4": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 8,
          "image_id": "wq4AeBWQJ4_8_13607eb6",
          "bbox": {
            "x": 0.16828793774319065,
            "y": 0.09774436090225565,
            "width": 0.6731517509727626,
            "height": 0.20150375939849627
          }
        }
      ],
      "review_text": "Figure 7: The caption text mentions labels (a), (b), and (c) but they are missing from the figure.",
      "category": "figure-caption",
      "description": "The caption mentions sub-plots (a), (b), (c), but the sub-plots are not labelled like that",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the caption of the figure consistent with the content of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the caption of the figure inconsistent with the content of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"sub-plot labels\",\"claim\":{\"source\":\"caption\",\"statement\":\"labeled\"},\"evidence\":{\"source\":\"figure_7\",\"statement\":\"not labeled\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"result plots\",\"claim\":{\"source\":\"expectation\",\"statement\":\"two plots\"},\"evidence\":{\"source\":\"figure\",\"statement\":\"three plots\"}}",
            "{\"letter\":\"A\",\"attribute\":\"y-axis scaling\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be same\"},\"evidence\":{\"source\":\"sub-plots\",\"statement\":\"different\"}}",
            "{\"letter\":\"D\",\"attribute\":\"batch size\",\"claim\":{\"source\":\"caption\",\"statement\":\"claims batch size\"},\"evidence\":{\"source\":\"figure\",\"statement\":\"differs from claimed\"}}"
          ],
          "letters": ["C", "B", "A", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"labels\",\"target\":\"figure_7\",\"other_involved\":\"figure_7_caption\",\"action\":\"add\",\"edit_statement\":\"sub-plot labels\",\"reason\":\"missing\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"result plots\",\"target\":\"figure_7\",\"other_involved\":\"figure_7_caption\",\"action\":\"remove\",\"edit_statement\":\"one plot\",\"reason\":\"extra\"}",
            "{\"letter\":\"A\",\"attribute\":\"y-axis scaling\",\"target\":\"figure_7\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"align scaling\",\"reason\":\"different\"}",
            "{\"letter\":\"D\",\"attribute\":\"batch size\",\"target\":\"figure_7\",\"other_involved\":\"figure_7_caption\",\"action\":\"modify\",\"edit_statement\":\"align value\",\"reason\":\"different\"}"
          ],
          "letters": ["C", "B", "A", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 7"]
    },
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 8,
          "image_id": "wq4AeBWQJ4_8_51b24b89",
          "bbox": {
            "x": 0.17023346303501946,
            "y": 0.2947368421052632,
            "width": 0.6653696498054475,
            "height": 0.2
          }
        }
      ],
      "review_text": "Figure 7: The caption text states that all multipliers were trained using 8-bits, but the plot on the right shows results for 64-bit training.",
      "category": "figure-caption",
      "description": "The caption states training at 8-bit, yet the plot legends show different bit values",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the caption of the figure consistent with the content of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the caption of the figure inconsistent with the content of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"bit-range\",\"claim\":{\"source\":\"caption\",\"statement\":\"8-bits\"},\"evidence\":{\"source\":\"plot\",\"statement\":\"higher bit-ranges\"}}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"legend\",\"claim\":{\"source\":\"expectation\",\"statement\":\"shouldn't occlude data\"},\"evidence\":{\"source\":\"figure_8\",\"statement\":\"occludes data\"}}",
            "{\"letter\":\"C\",\"attribute\":\"batch sizes\",\"claim\":{\"source\":\"caption\",\"statement\":\"differ from titles\"},\"evidence\":{\"source\":\"sub-plot titles\",\"statement\":\"differ from caption\"}}",
            "{\"letter\":\"B\",\"attribute\":\"y-axis alignment\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be aligned\"},\"evidence\":{\"source\":\"figure_8\",\"statement\":\"not aligned\"}}"
          ],
          "letters": ["A", "D", "C", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"multipliers\",\"target\":\"figure_8_caption\",\"other_involved\":\"figure_8_legend\",\"action\":\"modify\",\"edit_statement\":\"update bit-range\",\"reason\":\"inconsistent\"}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"legends\",\"target\":\"figure_8\",\"other_involved\":\"data points\",\"action\":\"reposition\",\"edit_statement\":\"move legends\",\"reason\":\"occlusion\"}",
            "{\"letter\":\"C\",\"attribute\":\"batch sizes\",\"target\":\"figure_8_caption\",\"other_involved\":\"figure_8a_title, figure_8b_title\",\"action\":\"modify\",\"edit_statement\":\"update sizes\",\"reason\":\"mismatch\"}",
            "{\"letter\":\"B\",\"attribute\":\"y-axis values\",\"target\":\"figure_8a\",\"other_involved\":\"figure_8b\",\"action\":\"modify\",\"edit_statement\":\"align values\",\"reason\":\"misaligned\"}"
          ],
          "letters": ["A", "D", "C", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 8"]
    }
  ],
  "wixDdL0vj8": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 8,
          "image_id": "wixDdL0vj8_8_29978d8b",
          "bbox": {
            "x": 0.16828793774319065,
            "y": 0.09924812030075188,
            "width": 0.6673151750972763,
            "height": 0.17443609022556392
          }
        },
        {
          "type": "image",
          "page": 8,
          "image_id": "wixDdL0vj8_8_afd9c8c7",
          "bbox": {
            "x": 0.17023346303501946,
            "y": 0.26994986856790415,
            "width": 0.6692607003891051,
            "height": 0.14285714285714285
          }
        }
      ],
      "review_text": "Table 3: The CF-100 BYOL baseline results are shown as 51.7 \u00b1 0.3, which does not match the BYOL results in Table 2 that show 51.7 \u00b1 0.1.",
      "category": "table-table",
      "description": "Both tables show a different result for BYOL CF-100",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the first table consistent with the content of the second table?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the first table inconsistent with the content of the second table?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "wixDdL0vj8_8_29978d8b",
          "correct": "wixDdL0vj8_8_afd9c8c7",
          "incorrect": [
            "wixDdL0vj8_7_table_table2",
            "wixDdL0vj8_7_table_table4",
            "wixDdL0vj8_7_table_table5"
          ],
          "letters": ["B", "A", "D", "C"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"BYOL standard deviation\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"Table 2 and Table 3\",\"statement\":\"inconsistent for CF-100\"}}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"BYOL average accuracy\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"Table 2 and Table 3\",\"statement\":\"inconsistent for CF-100\"}}",
            "{\"letter\":\"B\",\"attribute\":\"T-IN standard deviation\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"Table 2 and Table 3\",\"statement\":\"inconsistent for CF-100\"}}",
            "{\"letter\":\"A\",\"attribute\":\"BYOL accuracy\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"Table 2 and Table 3\",\"statement\":\"inconsistent for CF-10\"}}"
          ],
          "letters": ["C", "D", "B", "A"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"standard deviation\",\"target\":\"table_2\",\"other_involved\":\"table_3\",\"action\":\"modify\",\"edit_statement\":\"BYOL standard deviation\",\"reason\":\"different\"}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"average accuracy\",\"target\":\"table_2\",\"other_involved\":\"table_3\",\"action\":\"modify\",\"edit_statement\":\"BYOL average accuracy\",\"reason\":\"different\"}",
            "{\"letter\":\"B\",\"attribute\":\"standard deviation\",\"target\":\"table_2\",\"other_involved\":\"table_3\",\"action\":\"modify\",\"edit_statement\":\"T-IN standard deviation\",\"reason\":\"different\"}",
            "{\"letter\":\"A\",\"attribute\":\"BYOL accuracy\",\"target\":\"table_2\",\"other_involved\":\"table_3\",\"action\":\"modify\",\"edit_statement\":\"BYOL accuracy\",\"reason\":\"inconsistent\"}"
          ],
          "letters": ["C", "D", "B", "A"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 2", "Table 3"]
    }
  ],
  "wYZ8rxwvMm": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 9,
          "image_id": "wYZ8rxwvMm_9_586b4bce",
          "bbox": {
            "x": 0.17217898832684825,
            "y": 0.09323308270676692,
            "width": 0.6692607003891051,
            "height": 0.18195488721804512
          }
        },
        {
          "type": "image",
          "page": 9,
          "image_id": "wYZ8rxwvMm_9_ee7bf1ca",
          "bbox": {
            "x": 0.16828793774319065,
            "y": 0.47596490616188913,
            "width": 0.6673151750972763,
            "height": 0.19849624060150375
          }
        }
      ],
      "review_text": "Figure 2: The caption states 'The reinforcement learning complexity is less in a setting if the simulated performance is high.', but the rightmost subfigure shows simulated performance going down, which contradicts this statement.",
      "category": "figure-figure",
      "description": "Both figures show the same plot with different caption",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the first figure consistent with the content of the second figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the first figure inconsistent with the content of the second figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "wYZ8rxwvMm_9_586b4bce",
          "correct": "wYZ8rxwvMm_9_ee7bf1ca",
          "incorrect": [
            "wYZ8rxwvMm_8_image_figure2",
            "wYZ8rxwvMm_7_image_figure1",
            "wYZ8rxwvMm_7_table_table1"
          ],
          "letters": ["B", "A", "C", "D"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"plots\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be different\"},\"evidence\":{\"source\":\"figure_2 and figure_3\",\"statement\":\"are identical\"}}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"plots\",\"claim\":{\"source\":\"caption\",\"statement\":\"are different\"},\"evidence\":{\"source\":\"figure_2 and figure_3\",\"statement\":\"are same\"}}",
            "{\"letter\":\"D\",\"attribute\":\"y-axis ranges\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be same\"},\"evidence\":{\"source\":\"figure_2 and figure_3\",\"statement\":\"are different\"}}",
            "{\"letter\":\"B\",\"attribute\":\"titles\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should not be swapped\"},\"evidence\":{\"source\":\"figure_2 and figure_3\",\"statement\":\"titles not swapped\"}}"
          ],
          "letters": ["A", "C", "D", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"plots\",\"target\":\"figure_2\",\"other_involved\":\"figure_3, caption_figure_2, caption_figure_3\",\"action\":\"modify\",\"edit_statement\":\"update labels\",\"reason\":\"identical\"}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"plots\",\"target\":\"figure_2\",\"other_involved\":\"figure_3, caption_figure_2, caption_figure_3\",\"action\":\"modify\",\"edit_statement\":\"align plot descriptions\",\"reason\":\"dissimilar\"}",
            "{\"letter\":\"D\",\"attribute\":\"y-axis range\",\"target\":\"figure_2\",\"other_involved\":\"figure_3\",\"action\":\"modify\",\"edit_statement\":\"align ranges\",\"reason\":\"different\"}",
            "{\"letter\":\"B\",\"attribute\":\"titles\",\"target\":\"figure_2\",\"other_involved\":\"figure_3\",\"action\":\"swap\",\"edit_statement\":\"subplot titles\",\"reason\":\"incorrect plot\"}"
          ],
          "letters": ["A", "C", "D", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 2", "Figure 3"]
    }
  ],
  "wWPiAjbR7a": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 7,
          "image_id": "wWPiAjbR7a_7_708c1e45",
          "bbox": {
            "x": 0.17217898832684825,
            "y": 0.11278195488721804,
            "width": 0.6614785992217899,
            "height": 0.22255639097744362
          }
        }
      ],
      "review_text": "Table 2: The results for GPT-3.5-turbo across different settings on the Dreaddit and IRF test sets are identical, suggesting a possible error or inconsistency.",
      "category": "table-only",
      "description": "The gpt-3-5-turbo results are the same for all settings for dreaddit and Irf",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the table that is consistent with a different part of the table?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the table that is inconsistent with a different part of the table?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"accuracy results\",\"claim\":{\"source\":\"expectation\",\"statement\":\"vary across configurations\"},\"evidence\":{\"source\":\"Table 2\",\"statement\":\"remain unchanged\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"dreaddit column\",\"claim\":{\"source\":\"expectation\",\"statement\":\"varies across configurations\"},\"evidence\":{\"source\":\"Table 2\",\"statement\":\"identical for all\"}}",
            "{\"letter\":\"B\",\"attribute\":\"Irf column\",\"claim\":{\"source\":\"expectation\",\"statement\":\"varies across configurations\"},\"evidence\":{\"source\":\"Table 2\",\"statement\":\"identical for all\"}}",
            "{\"letter\":\"D\",\"attribute\":\"AVG column\",\"claim\":{\"source\":\"expectation\",\"statement\":\"ours is best\"},\"evidence\":{\"source\":\"Table 2\",\"statement\":\"not always best\"}}"
          ],
          "letters": ["C", "A", "B", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"dreaddit and Irf columns\",\"target\":\"table_2\",\"other_involved\":\"GPT-3.5-turbo rows\",\"action\":\"modify\",\"edit_statement\":\"update values\",\"reason\":\"identical values\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"dreaddit column\",\"target\":\"table_2\",\"other_involved\":\"GPT-3.5-turbo rows\",\"action\":\"modify\",\"edit_statement\":\"update values\",\"reason\":\"identical values\"}",
            "{\"letter\":\"B\",\"attribute\":\"Irf column\",\"target\":\"table_2\",\"other_involved\":\"GPT-3.5-turbo rows\",\"action\":\"modify\",\"edit_statement\":\"update values\",\"reason\":\"identical values\"}",
            "{\"letter\":\"D\",\"attribute\":\"AVG column 'Ours' performance\",\"target\":\"table_2\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"explain variance\",\"reason\":\"not always best\"}"
          ],
          "letters": ["C", "A", "B", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 2"]
    }
  ],
  "wJ6Bx1IYrQ": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 5,
          "image_id": "wJ6Bx1IYrQ_5_3aa2361f",
          "bbox": {
            "x": 0.16828793774319065,
            "y": 0.09774436090225565,
            "width": 0.6653696498054475,
            "height": 0.31278195488721805
          }
        },
        {
          "type": "text",
          "page": 5,
          "content": "As illustrated in Figure 3 (right), for each sample yj , a learnable special token c \u2208 RC is broadcast across all Ej electrodes and appended to the end of the temporal sequence.",
          "line": 252
        }
      ],
      "review_text": "Inconsistency between Figure 3 and line 252. I do not see any graphical representation of the learnable special token $c$ in Figure 3.",
      "category": "figure-text",
      "description": "The text points towards Figure 3 for a dipiction of the learnable special token c, but it is not present in the plot",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {
          "question": "As illustrated in Figure 3 (right), for each sample yj , a learnable special token c \u2208 RC is broadcast across all Ej electrodes and appended to the end of the temporal sequence.",
          "correct": "wJ6Bx1IYrQ_5_3aa2361f",
          "incorrect": [
            "wJ6Bx1IYrQ_7_image_figure4",
            "wJ6Bx1IYrQ_2_image_figure2",
            "wJ6Bx1IYrQ_8_image_figure5"
          ],
          "letters": ["D", "C", "B", "A"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"token depiction\",\"claim\":{\"source\":\"text\",\"statement\":\"token appended at end\"},\"evidence\":{\"source\":\"Figure 3 (right)\",\"statement\":\"token not depicted\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"token usage\",\"claim\":{\"source\":\"Figure 3 (right)\",\"statement\":\"used for pre-training\"},\"evidence\":{\"source\":\"text\",\"statement\":\"used for finetuning\"}}",
            "{\"letter\":\"C\",\"attribute\":\"token position\",\"claim\":{\"source\":\"text\",\"statement\":\"appended at end\"},\"evidence\":{\"source\":\"Figure 3 (right)\",\"statement\":\"added at start\"}}",
            "{\"letter\":\"D\",\"attribute\":\"figure reference\",\"claim\":{\"source\":\"text\",\"statement\":\"illustrated in Figure 3 (right)\"},\"evidence\":{\"source\":\"Figure 3 (left)\",\"statement\":\"token shown\"}}"
          ],
          "letters": ["A", "B", "C", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"token c\",\"target\":\"figure_3_right\",\"other_involved\":\"text\",\"action\":\"add\",\"edit_statement\":\"token c\",\"reason\":\"missing\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"token c\",\"target\":\"figure_3_right\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"use for finetuning\",\"reason\":\"contradiction\"}",
            "{\"letter\":\"C\",\"attribute\":\"token c position\",\"target\":\"figure_3_right\",\"other_involved\":\"text\",\"action\":\"reposition\",\"edit_statement\":\"end of sequence\",\"reason\":\"misplaced\"}",
            "{\"letter\":\"D\",\"attribute\":\"token c location\",\"target\":\"figure_3_right\",\"other_involved\":\"figure_3_left\",\"action\":\"reposition\",\"edit_statement\":\"to figure 3 right\",\"reason\":\"misplaced\"}"
          ],
          "letters": ["A", "B", "C", "D"]
        }
      },
      "severity": 1,
      "visual_elements": ["Figure 3"]
    }
  ],
  "w0MAu8vjwj": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 14,
          "image_id": "w0MAu8vjwj_14_dc6bf716",
          "bbox": {
            "x": 0.17607003891050585,
            "y": 0.13383458646616542,
            "width": 0.6536964980544747,
            "height": 0.26917293233082706
          }
        }
      ],
      "review_text": "Figure 7: The inconsistency 'helpfulness or helpful' contradicts itself.",
      "category": "figure-only",
      "description": "The prefix <helpfulness> turns into <helpful> in the output of the reward model",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the figure that is consistent with a different part of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the figure that is inconsistent with a different part of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"helpfulness tag\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should not change form\"},\"evidence\":{\"source\":\"figure_7\",\"statement\":\"changes form\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"color coding\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should match\"},\"evidence\":{\"source\":\"figure_7\",\"statement\":\"does not match\"}}",
            "{\"letter\":\"C\",\"attribute\":\"honesty tag\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be found\"},\"evidence\":{\"source\":\"figure_7\",\"statement\":\"not found\"}}",
            "{\"letter\":\"B\",\"attribute\":\"tag\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be specified\"},\"evidence\":{\"source\":\"figure_7\",\"statement\":\"not specified\"}}"
          ],
          "letters": ["D", "A", "C", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"tag\",\"target\":\"reward model output\",\"other_involved\":\"input prefix\",\"action\":\"modify\",\"edit_statement\":\"change tag\",\"reason\":\"form change\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"color coding\",\"target\":\"input prefix\",\"other_involved\":\"reward model output\",\"action\":\"modify\",\"edit_statement\":\"match color coding\",\"reason\":\"mismatch\"}",
            "{\"letter\":\"C\",\"attribute\":\"honesty tag\",\"target\":\"reward model output\",\"other_involved\":\"input prefix\",\"action\":\"add\",\"edit_statement\":\"add tag\",\"reason\":\"missing\"}",
            "{\"letter\":\"B\",\"attribute\":\"tags\",\"target\":\"reward model output\",\"other_involved\":\"input prefix\",\"action\":\"add\",\"edit_statement\":\"add tag\",\"reason\":\"unspecified\"}"
          ],
          "letters": ["D", "A", "C", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 7"]
    }
  ],
  "vikwIayXOx": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 7,
          "image_id": "vikwIayXOx_7_54200bc2",
          "bbox": {
            "x": 0.17412451361867703,
            "y": 0.6571428571428573,
            "width": 0.6575875486381323,
            "height": 0.21804511278195488
          }
        },
        {
          "type": "text",
          "page": 1,
          "content": "To assess robustness at high resolutions we employ PPA (Struppek et al., 2022) against attacks targeting 224\u00d7224 pixels and MIRROR (An\net al., 2022) against attacks targeting 116\u00d7116 pixels. For low resolution 64\u00d764 pixels, we leverage\nfour SOTA white-box attacks: GMI (Zhang et al., 2020), KedMI (Chen et al., 2021), PLG-MI (Yuan\net al., 2023), and LOMMA (Nguyen et al., 2023) (including LOMMA+GMI and LOMMA+KedMI).\nAdditionally, we incorporate BREPMI (Kahla et al., 2022) for label-only attacks",
          "line": 377
        }
      ],
      "review_text": "The resolution of Mirror is inconsistent throughout the text. Could you clarify whether it is 116*116 or 160*160?",
      "category": "table-text",
      "description": "MIRROR is shown to use attacks of shape 116x116 in the text but 160x160 in the image",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the table consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the table inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "To assess robustness at high resolutions we employ PPA (Struppek et al., 2022) against attacks targeting 224\u00d7224 pixels and MIRROR (An\net al., 2022) against attacks targeting 116\u00d7116 pixels. For low resolution 64\u00d764 pixels, we leverage\nfour SOTA white-box attacks: GMI (Zhang et al., 2020), KedMI (Chen et al., 2021), PLG-MI (Yuan\net al., 2023), and LOMMA (Nguyen et al., 2023) (including LOMMA+GMI and LOMMA+KedMI).\nAdditionally, we incorporate BREPMI (Kahla et al., 2022) for label-only attacks",
          "correct": "vikwIayXOx_7_54200bc2",
          "incorrect": [
            "vikwIayXOx_9_table_table4",
            "vikwIayXOx_9_table_table3",
            "vikwIayXOx_4_table_table1"
          ],
          "letters": ["B", "A", "C", "D"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"resolution\",\"claim\":{\"source\":\"text\",\"statement\":\"116x116\"},\"evidence\":{\"source\":\"Table 2\",\"statement\":\"different resolution\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"resolution\",\"claim\":{\"source\":\"text\",\"statement\":\"high resolution\"},\"evidence\":{\"source\":\"Table 2\",\"statement\":\"lower resolution\"}}",
            "{\"letter\":\"B\",\"attribute\":\"resolution\",\"claim\":{\"source\":\"text\",\"statement\":\"GMI and KedMI\"},\"evidence\":{\"source\":\"Table 2\",\"statement\":\"GMI, KedMI, LOMMA, and PLGMI\"}}",
            "{\"letter\":\"D\",\"attribute\":\"resolution\",\"claim\":{\"source\":\"expectation\",\"statement\":\"224x224\"},\"evidence\":{\"source\":\"Table 2\",\"statement\":\"160x160\"}}"
          ],
          "letters": ["C", "A", "B", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"resolution\",\"target\":\"table_2\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align resolution\",\"reason\":\"contradiction\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"resolution\",\"target\":\"table_2\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"show high resolution\",\"reason\":\"contradiction\"}",
            "{\"letter\":\"B\",\"attribute\":\"resolution\",\"target\":\"table_2\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align resolution\",\"reason\":\"contradiction\"}",
            "{\"letter\":\"D\",\"attribute\":\"resolution\",\"target\":\"table_2\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align resolution\",\"reason\":\"contradiction\"}"
          ],
          "letters": ["C", "A", "B", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 2"]
    }
  ],
  "vXSCD3ToCS": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 5,
          "image_id": "vXSCD3ToCS_5_a1e8a4c6",
          "bbox": {
            "x": 0.49902723735408555,
            "y": 0.25263157894736843,
            "width": 0.3463035019455253,
            "height": 0.3067669172932331
          }
        },
        {
          "type": "text",
          "page": 5,
          "content": "The results demonstrate that the adjacency matrix generated by our algorithm perfectly matches the actual road network structure.",
          "line": 262
        }
      ],
      "review_text": "Figure 2: The bottom left corner shows a road segment between two points that is not represented as an edge in the topology, which appears inconsistent with the actual road network.",
      "category": "figure-text",
      "description": "We can see missing edges between nodes to perfectly match the road network structure as claimed in the text of the paper",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {
          "question": "The results demonstrate that the adjacency matrix generated by our algorithm perfectly matches the actual road network structure.",
          "correct": "vXSCD3ToCS_5_a1e8a4c6",
          "incorrect": [
            "vXSCD3ToCS_5_image_figure3",
            "vXSCD3ToCS_5_image_figure4",
            "vXSCD3ToCS_6_image_figure5"
          ],
          "letters": ["D", "A", "C", "B"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"edges\",\"claim\":{\"source\":\"text\",\"statement\":\"perfectly matches\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"missing edges\"}}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"edges\",\"claim\":{\"source\":\"text\",\"statement\":\"perfectly matches\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"extraneous edges\"}}",
            "{\"letter\":\"D\",\"attribute\":\"network\",\"claim\":{\"source\":\"expectation\",\"statement\":\"complete structure\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"disconnected portion\"}}",
            "{\"letter\":\"B\",\"attribute\":\"nodes\",\"claim\":{\"source\":\"expectation\",\"statement\":\"evenly distributed\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"unevenly distributed\"}}"
          ],
          "letters": ["A", "C", "D", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"edges\",\"target\":\"figure_2\",\"other_involved\":\"text\",\"action\":\"add\",\"edit_statement\":\"missing edges\",\"reason\":\"contradicts claim\"}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"edges\",\"target\":\"figure_2\",\"other_involved\":\"text\",\"action\":\"remove\",\"edit_statement\":\"extraneous edges\",\"reason\":\"contradicts claim\"}",
            "{\"letter\":\"D\",\"attribute\":\"road network\",\"target\":\"figure_2\",\"other_involved\":\"algorithm\",\"action\":\"modify\",\"edit_statement\":\"disconnected portion\",\"reason\":\"incomplete structure\"}",
            "{\"letter\":\"B\",\"attribute\":\"blue nodes\",\"target\":\"figure_2\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"distribute nodes evenly\",\"reason\":\"unclear paths\"}"
          ],
          "letters": ["A", "C", "D", "B"]
        }
      },
      "severity": 1,
      "visual_elements": ["Figure 2"]
    }
  ],
  "vVVtTVIR5O": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 7,
          "image_id": "vVVtTVIR5O_7_e354e621",
          "bbox": {
            "x": 0.17217898832684825,
            "y": 0.0962406015037594,
            "width": 0.6634241245136187,
            "height": 0.3037593984962406
          }
        }
      ],
      "review_text": "Table 1: The heading should be 'methods with image data', not 'methods with text data'.",
      "category": "table-only",
      "description": "The table sub-section headings shows twice \"methods without image data\", while it should be once \"methods without image data\" and once \"methods with image data\"",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the table that is consistent with a different part of the table?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the table that is inconsistent with a different part of the table?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"sub-section heading\",\"claim\":{\"source\":\"text\",\"statement\":\"first block requires image data\"},\"evidence\":{\"source\":\"table\",\"statement\":\"first block is methods without image data\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"caption\",\"claim\":{\"source\":\"caption\",\"statement\":\"all methods require image data\"},\"evidence\":{\"source\":\"table\",\"statement\":\"contains methods without image data\"}}",
            "{\"letter\":\"A\",\"attribute\":\"sub-section headings\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be ordered correctly\"},\"evidence\":{\"source\":\"table\",\"statement\":\"are not ordered correctly\"}}",
            "{\"letter\":\"D\",\"attribute\":\"caption\",\"claim\":{\"source\":\"caption\",\"statement\":\"bold indicates best performing\"},\"evidence\":{\"source\":\"table\",\"statement\":\"bold not always best performing\"}}"
          ],
          "letters": ["C", "B", "A", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"sub-section heading\",\"target\":\"table_1\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align with text\",\"reason\":\"contradiction\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"caption claim\",\"target\":\"caption\",\"other_involved\":\"table_1 headings\",\"action\":\"modify\",\"edit_statement\":\"method requirements\",\"reason\":\"contradiction\"}",
            "{\"letter\":\"A\",\"attribute\":\"sub-section headings\",\"target\":\"table_1\",\"other_involved\":null,\"action\":\"reposition\",\"edit_statement\":\"swap\",\"reason\":\"wrong order\"}",
            "{\"letter\":\"D\",\"attribute\":\"bolded numbers\",\"target\":\"table_1\",\"other_involved\":\"caption\",\"action\":\"modify\",\"edit_statement\":\"highlight best\",\"reason\":\"best not always highlighted\"}"
          ],
          "letters": ["C", "B", "A", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 1"]
    }
  ],
  "v8GuB74YRA": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 2,
          "image_id": "v8GuB74YRA_2_b2e3f23a",
          "bbox": {
            "x": 0.17023346303501946,
            "y": 0.10075187969924813,
            "width": 0.6673151750972763,
            "height": 0.29774436090225564
          }
        }
      ],
      "review_text": "Figure 1: The MAE-B16/SimMIM-B16 models have different GT radar plots between (a) and (b), which contradicts the expectation that the plots should be consistent.",
      "category": "figure-only",
      "description": "The MAE-B16/SimMIM-B16 models have different GT radar plots between (a) and (b), even though they should be the same",
      "confidence": 1,
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the figure that is consistent with a different part of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the figure that is inconsistent with a different part of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"radar plots\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be same\"},\"evidence\":{\"source\":\"Figure 1\",\"statement\":\"different\"}}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"model presence\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be comparable\"},\"evidence\":{\"source\":\"Figure 1\",\"statement\":\"not comparable\"}}",
            "{\"letter\":\"A\",\"attribute\":\"color-coding\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"Figure 1\",\"statement\":\"inconsistent\"}}",
            "{\"letter\":\"D\",\"attribute\":\"plot identity\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should differ\"},\"evidence\":{\"source\":\"Figure 1\",\"statement\":\"identical\"}}"
          ],
          "letters": ["B", "C", "A", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"radar plots\",\"target\":\"figure_1b\",\"other_involved\":\"figure_1a\",\"action\":\"modify\",\"edit_statement\":\"show MAE-B16, SimMIM-B16 same\",\"reason\":\"different\"}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"plots\",\"target\":\"figure_1a\",\"other_involved\":\"figure_1b\",\"action\":\"add\",\"edit_statement\":\"add MAE-B16, SimMIM-B16 to compare\",\"reason\":\"missing\"}",
            "{\"letter\":\"A\",\"attribute\":\"color-coding\",\"target\":\"figure_1a\",\"other_involved\":\"figure_1b\",\"action\":\"modify\",\"edit_statement\":\"match model\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"D\",\"attribute\":\"plots\",\"target\":\"figure_1b\",\"other_involved\":\"figure_1a\",\"action\":\"modify\",\"edit_statement\":\"update plots\",\"reason\":\"identical\"}"
          ],
          "letters": ["B", "C", "A", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 1"]
    }
  ],
  "v5bK7cQch3": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 8,
          "image_id": "v5bK7cQch3_8_7d9ad9a1",
          "bbox": {
            "x": 0.16828793774319065,
            "y": 0.11127819548872181,
            "width": 0.6731517509727626,
            "height": 0.2887218045112782
          }
        },
        {
          "type": "text",
          "page": 7,
          "content": "Using group-level reference networks, the CINP model with the network prompting protocol held the best MCC performance (29.33%) on the ADHD dataset.",
          "line": 374
        }
      ],
      "review_text": "Table 3: The number 29.33% seems inconsistent with the figures in the table.",
      "category": "table-text",
      "description": "The MCC performance of 29.33% on ADHD can't be found in the Table",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the table consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the table inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {
          "question": "Using group-level reference networks, the CINP model with the network prompting protocol held the best MCC performance (29.33%) on the ADHD dataset.",
          "correct": "v5bK7cQch3_8_7d9ad9a1",
          "incorrect": [
            "v5bK7cQch3_7_table_table4",
            "v5bK7cQch3_6_table_table2",
            "v5bK7cQch3_4_table_table1"
          ],
          "letters": ["B", "A", "D", "C"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"MCC performance\",\"claim\":{\"source\":\"text\",\"statement\":\"29.33%\"},\"evidence\":{\"source\":\"Table 3\",\"statement\":\"not found\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"dataset\",\"claim\":{\"source\":\"text\",\"statement\":\"ADHD\"},\"evidence\":{\"source\":\"Table 3\",\"statement\":\"ABIDE\"}}",
            "{\"letter\":\"C\",\"attribute\":\"MCC performance rank\",\"claim\":{\"source\":\"text\",\"statement\":\"best\"},\"evidence\":{\"source\":\"Table 3\",\"statement\":\"second-best\"}}",
            "{\"letter\":\"D\",\"attribute\":\"confidence interval\",\"claim\":{\"source\":\"caption\",\"statement\":\"95%\"},\"evidence\":{\"source\":\"text\",\"statement\":\"best performance\"}}"
          ],
          "letters": ["A", "B", "C", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"MCC performance\",\"target\":\"Table_3\",\"other_involved\":\"text\",\"action\":\"add\",\"edit_statement\":\"missing value\",\"reason\":\"not found\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"dataset\",\"target\":\"Table_3\",\"other_involved\":\"text\",\"action\":\"add\",\"edit_statement\":\"missing dataset\",\"reason\":\"not present\"}",
            "{\"letter\":\"C\",\"attribute\":\"MCC performance\",\"target\":\"text\",\"other_involved\":\"Table_3\",\"action\":\"modify\",\"edit_statement\":\"ranking value\",\"reason\":\"mismatch\"}",
            "{\"letter\":\"D\",\"attribute\":\"value meaning\",\"target\":\"text\",\"other_involved\":\"caption\",\"action\":\"modify\",\"edit_statement\":\"interpret value\",\"reason\":\"mismatch\"}"
          ],
          "letters": ["A", "B", "C", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 3"]
    }
  ],
  "v44CUwEeDY": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 10,
          "image_id": "v44CUwEeDY_10_cc718971",
          "bbox": {
            "x": 0.17023346303501946,
            "y": 0.25263157894736843,
            "width": 0.6770428015564202,
            "height": 0.29172932330827067
          }
        },
        {
          "type": "text",
          "page": 1,
          "content": "In this paper, we propose a new sketch-based algorithm, PGNN, employing the Proper orthogonal decomposition (POD) method to craft update rules to train GNNs, improving the memory requirement and training time without the complication of updating the sketches during training. Experiments on standard graph datasets show that PGNN can reach much lower sketch ratios without compromising the performance. We prove the optimality of the POD update rule for the linearized GNN (SGC). Empirical findings validate our approach, demonstrating superior performance at reduced sketch ratios and adaptability across various GNN architectures.",
          "line": 22
        }
      ],
      "review_text": "Table 6: The method does not perform well when compared against nonlinear baselines, contradicting the overall positive presentation of the method in the paper.",
      "category": "table-text",
      "description": "Abstract claims superior performance, but performance comparison table does not reflect this",
      "confidence": 2,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the table consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the table inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "In this paper, we propose a new sketch-based algorithm, PGNN, employing the Proper orthogonal decomposition (POD) method to craft update rules to train GNNs, improving the memory requirement and training time without the complication of updating the sketches during training. Experiments on standard graph datasets show that PGNN can reach much lower sketch ratios without compromising the performance. We prove the optimality of the POD update rule for the linearized GNN (SGC). Empirical findings validate our approach, demonstrating superior performance at reduced sketch ratios and adaptability across various GNN architectures.",
          "correct": "v44CUwEeDY_10_cc718971",
          "incorrect": [
            "v44CUwEeDY_9_table_table4",
            "v44CUwEeDY_8_table_table3",
            "v44CUwEeDY_8_table_table2"
          ],
          "letters": ["D", "B", "C", "A"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"performance\",\"claim\":{\"source\":\"expectation\",\"statement\":\"superior performance\"},\"evidence\":{\"source\":\"Table 6\",\"statement\":\"lower performance metrics\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"sketch ratio\",\"claim\":{\"source\":\"abstract\",\"statement\":\"reduced sketch ratios\"},\"evidence\":{\"source\":\"Table 6\",\"statement\":\"higher sketch ratios\"}}",
            "{\"letter\":\"C\",\"attribute\":\"performance\",\"claim\":{\"source\":\"abstract\",\"statement\":\"superior performance\"},\"evidence\":{\"source\":\"Table 6\",\"statement\":\"higher sketch ratio\"}}",
            "{\"letter\":\"B\",\"attribute\":\"optimality\",\"claim\":{\"source\":\"abstract\",\"statement\":\"optimal for SGC\"},\"evidence\":{\"source\":\"Table 6\",\"statement\":\"SGC lowest performance\"}}"
          ],
          "letters": ["D", "A", "C", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"performance metrics\",\"target\":\"abstract\",\"other_involved\":\"table_6\",\"action\":\"modify\",\"edit_statement\":\"align superiority claim\",\"reason\":\"contradictory\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"sketch ratios\",\"target\":\"abstract\",\"other_involved\":\"table_6\",\"action\":\"modify\",\"edit_statement\":\"align superiority claim\",\"reason\":\"contradictory\"}",
            "{\"letter\":\"C\",\"attribute\":\"performance, sketch ratios\",\"target\":\"abstract\",\"other_involved\":\"table_6\",\"action\":\"modify\",\"edit_statement\":\"align superiority claim\",\"reason\":\"contradictory\"}",
            "{\"letter\":\"B\",\"attribute\":\"optimality claim\",\"target\":\"abstract\",\"other_involved\":\"table_6\",\"action\":\"modify\",\"edit_statement\":\"align SGC optimality\",\"reason\":\"contradictory\"}"
          ],
          "letters": ["D", "A", "C", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 6"]
    }
  ],
  "v27yHgKtMv": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 8,
          "image_id": "v27yHgKtMv_8_ae78d5b8",
          "bbox": {
            "x": 0.17217898832684825,
            "y": 0.09774436090225565,
            "width": 0.6673151750972763,
            "height": 0.33984962406015035
          }
        },
        {
          "type": "text",
          "page": 3,
          "content": "The gradient of LCE with respect to the logit zn,k is: \u2202LCE \u2202zn,k = \u02c6yn,k \u2212 yn,k. This formulation forces the model to focus sharply on the true label, resulting in overconfident predictions that ignore relationships between adjacent classes.",
          "line": 147
        }
      ],
      "review_text": "2. The paper claims that CE loss leads to overconfident predictions, yet the reliability diagrams presented indicate underconfident outcomes in the experiments, seemingly contradicting this claim.",
      "category": "figure-text",
      "description": "The figure shows underconfidence for Cross Entropy, while the text claims it results in overconfidence",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {
          "question": "The gradient of LCE with respect to the logit zn,k is: \u2202LCE \u2202zn,k = \u02c6yn,k \u2212 yn,k. This formulation forces the model to focus sharply on the true label, resulting in overconfident predictions that ignore relationships between adjacent classes.",
          "correct": "v27yHgKtMv_8_ae78d5b8",
          "incorrect": [
            "v27yHgKtMv_1_image_figure1",
            "v27yHgKtMv_8_table_table4",
            "v27yHgKtMv_7_table_table3"
          ],
          "letters": ["C", "D", "A", "B"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"confidence\",\"claim\":{\"source\":\"text\",\"statement\":\"overconfident predictions\"},\"evidence\":{\"source\":\"Figure 2(a)\",\"statement\":\"underconfidence\"}}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"calibration\",\"claim\":{\"source\":\"Figure 2(a)\",\"statement\":\"accuracy above expected line\"},\"evidence\":{\"source\":\"text\",\"statement\":\"overconfident predictions\"}}",
            "{\"letter\":\"D\",\"attribute\":\"calibration\",\"claim\":{\"source\":\"expectation\",\"statement\":\"low ECE is poor calibration\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"notably low ECE\"}}",
            "{\"letter\":\"B\",\"attribute\":\"confidence\",\"claim\":{\"source\":\"expectation\",\"statement\":\"indicates confidence\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"only ECE values\"}}"
          ],
          "letters": ["A", "C", "D", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"predictions\",\"target\":\"text\",\"other_involved\":\"figure_2a\",\"action\":\"modify\",\"edit_statement\":\"update prediction confidence\",\"reason\":\"contradiction\"}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"predictions\",\"target\":\"text\",\"other_involved\":\"figure_2a\",\"action\":\"modify\",\"edit_statement\":\"update prediction confidence\",\"reason\":\"contradiction\"}",
            "{\"letter\":\"D\",\"attribute\":\"ECE values\",\"target\":\"figure_2\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align calibration implication\",\"reason\":\"discrepancy\"}",
            "{\"letter\":\"B\",\"attribute\":\"ECE values\",\"target\":\"figure_2\",\"other_involved\":null,\"action\":\"add\",\"edit_statement\":\"confidence indication\",\"reason\":\"missing\"}"
          ],
          "letters": ["A", "C", "D", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 2"]
    }
  ],
  "uBxN9JA29p": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 2,
          "image_id": "uBxN9JA29p_2_01ff60a4",
          "bbox": {
            "x": 0.16245136186770429,
            "y": 0.10526315789473684,
            "width": 0.6984435797665369,
            "height": 0.19849624060150375
          }
        },
        {
          "type": "text",
          "page": 2,
          "content": "In sum, current 3D human pose estimators face three primary challenges: 1) a scarcity of high-quality 3D human pose datasets, 2) high-reliance on two-stage models, and 3) time-intensive many-to-one processing approaches.",
          "line": 68
        }
      ],
      "review_text": "Table 1: The authors state that a primary challenge of 3D HPE is the high reliance on two-stage methods, but the table shows that one-stage methods are not worse per se.",
      "category": "figure-text",
      "description": "There seem to be many one-stage models available according to the figure, but the text mentions this as a scarcity",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {
          "question": "In sum, current 3D human pose estimators face three primary challenges: 1) a scarcity of high-quality 3D human pose datasets, 2) high-reliance on two-stage models, and 3) time-intensive many-to-one processing approaches.",
          "correct": "uBxN9JA29p_2_01ff60a4",
          "incorrect": [
            "uBxN9JA29p_8_table_table2",
            "uBxN9JA29p_4_image_figure1",
            "uBxN9JA29p_4_interline-equation_equation30.5"
          ],
          "letters": ["C", "B", "D", "A"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"model stages\",\"claim\":{\"source\":\"text\",\"statement\":\"high-reliance on two-stage models\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"multiple one-stage models\"}}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"dataset quality\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should have high-quality datasets\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"lack of data augment checkmarks\"}}",
            "{\"letter\":\"B\",\"attribute\":\"input type\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should focus on video inputs\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"majority use video input\"}}",
            "{\"letter\":\"C\",\"attribute\":\"processing approach\",\"claim\":{\"source\":\"text\",\"statement\":\"time-intensive many-to-one\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"many models are many-to-many\"}}"
          ],
          "letters": ["A", "D", "B", "C"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"models\",\"target\":\"text\",\"other_involved\":\"Table 1\",\"action\":\"modify\",\"edit_statement\":\"update model count\",\"reason\":\"contradiction\"}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"datasets\",\"target\":\"text\",\"other_involved\":\"Table 1\",\"action\":\"no-action\",\"edit_statement\":\"consistent\",\"reason\":\"consistent\"}",
            "{\"letter\":\"B\",\"attribute\":\"input type\",\"target\":\"text\",\"other_involved\":\"Table 1\",\"action\":\"modify\",\"edit_statement\":\"align input type\",\"reason\":\"contradiction\"}",
            "{\"letter\":\"C\",\"attribute\":\"processing approaches\",\"target\":\"text\",\"other_involved\":\"Table 1\",\"action\":\"modify\",\"edit_statement\":\"align processing type\",\"reason\":\"contradiction\"}"
          ],
          "letters": ["A", "D", "B", "C"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 1"]
    }
  ],
  "tsfR7JCwTf": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 5,
          "image_id": "tsfR7JCwTf_5_753fcd43",
          "bbox": {
            "x": 0.1663424124513619,
            "y": 0.3037593984962406,
            "width": 0.3326848249027238,
            "height": 0.21503759398496242
          }
        },
        {
          "type": "image",
          "page": 5,
          "image_id": "tsfR7JCwTf_5_61caa2e9",
          "bbox": {
            "x": 0.5087548638132295,
            "y": 0.30002505653782896,
            "width": 0.32490272373540857,
            "height": 0.21954887218045113
          }
        }
      ],
      "review_text": "Figure 2: Why does not have the results for \u03c3=0.1, 0.5?",
      "category": "figure-figure",
      "description": "The first figure shows results for all sigma values, the second figure misses results for sigma=0.1 and sigma=0.5",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the first figure consistent with the content of the second figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the first figure inconsistent with the content of the second figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {
          "question": "tsfR7JCwTf_5_753fcd43",
          "correct": "tsfR7JCwTf_5_61caa2e9",
          "incorrect": [
            "tsfR7JCwTf_4_image_figure1",
            "tsfR7JCwTf_5_table_table1",
            "tsfR7JCwTf_6_table_table2"
          ],
          "letters": ["C", "D", "A", "B"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"plot\",\"claim\":{\"source\":\"caption\",\"statement\":\"include all \u03c3\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"missing \u03c3 values\"}}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"\u03c3 values\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"Figure 1\",\"statement\":\"caption and legend differ\"}}",
            "{\"letter\":\"A\",\"attribute\":\"x-axis\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"Figure 1 and Figure 2\",\"statement\":\"different maximum values\"}}",
            "{\"letter\":\"B\",\"attribute\":\"results\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be included\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"omits results\"}}"
          ],
          "letters": ["C", "D", "A", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"curves for \\nlsigma\",\"target\":\"figure_2_caption\",\"other_involved\":\"figure_2_plot, \\nfigure_2_legend\",\"action\":\"modify\",\"edit_statement\":\"align values\",\"reason\":\"inconsistent\"}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"\\nlsigma values\",\"target\":\"figure_1\",\"other_involved\":\"figure_1_caption, \\nfigure_1_legend\",\"action\":\"modify\",\"edit_statement\":\"align values\",\"reason\":\"different\"}",
            "{\"letter\":\"A\",\"attribute\":\"Radius_x-axis\",\"target\":\"figure_1\",\"other_involved\":\"figure_2\",\"action\":\"modify\",\"edit_statement\":\"align maximum values\",\"reason\":\"different\"}",
            "{\"letter\":\"B\",\"attribute\":\"results for \\nlsigma = 1.0 and \\nlsigma = 0.25\",\"target\":\"figure_2\",\"other_involved\":\"figure_2_caption\",\"action\":\"add\",\"edit_statement\":\"add results\",\"reason\":\"omitted\"}"
          ],
          "letters": ["C", "D", "A", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 1", "Figure 2"]
    }
  ],
  "tpUEqmjZiS": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 7,
          "image_id": "tpUEqmjZiS_7_64caaf4c",
          "bbox": {
            "x": 0.17023346303501946,
            "y": 0.09924812030075188,
            "width": 0.6692607003891051,
            "height": 0.21353383458646616
          }
        }
      ],
      "review_text": "Figure 3: The wrong images are placed for 'Grasp the croissant'.",
      "category": "figure-only",
      "description": "The images for grasp the croissant do not show a croissant but a banana",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the figure that is consistent with a different part of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the figure that is inconsistent with a different part of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"object\",\"claim\":{\"source\":\"expectation\",\"statement\":\"grasping a croissant\"},\"evidence\":{\"source\":\"figure_3\",\"statement\":\"grasping a banana\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"object\",\"claim\":{\"source\":\"expectation\",\"statement\":\"grasping a banana\"},\"evidence\":{\"source\":\"figure_3\",\"statement\":\"grasping a block\"}}",
            "{\"letter\":\"A\",\"attribute\":\"object\",\"claim\":{\"source\":\"expectation\",\"statement\":\"grasping a drink\"},\"evidence\":{\"source\":\"figure_3\",\"statement\":\"grasping a block\"}}",
            "{\"letter\":\"D\",\"attribute\":\"steps\",\"claim\":{\"source\":\"expectation\",\"statement\":\"fewer or equal steps\"},\"evidence\":{\"source\":\"figure_3\",\"statement\":\"more steps\"}}"
          ],
          "letters": ["C", "B", "A", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"object grasped\",\"target\":\"Figure 3 robot\",\"other_involved\":\"Lifelong New Skill 2: Grasp the croissant\",\"action\":\"modify\",\"edit_statement\":\"show croissant\",\"reason\":\"inconsistent\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"object grasped\",\"target\":\"Figure 3 robot\",\"other_involved\":\"Pre-Trained Skill 1: Grasp the banana\",\"action\":\"modify\",\"edit_statement\":\"show banana\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"A\",\"attribute\":\"object grasped\",\"target\":\"Figure 3 robot\",\"other_involved\":\"Lifelong New Skill 3: Place the drink on cutting board\",\"action\":\"modify\",\"edit_statement\":\"show drink\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"D\",\"attribute\":\"number of steps\",\"target\":\"description\",\"other_involved\":\"Pre-Trained Skill 1: Grasp the banana, Lifelong New Skill 3: Push down the teapot handle\",\"action\":\"modify\",\"edit_statement\":\"align step count\",\"reason\":\"mismatch\"}"
          ],
          "letters": ["C", "B", "A", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 3"]
    }
  ],
  "thqPibDg6A": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 4,
          "image_id": "thqPibDg6A_4_5171948f",
          "bbox": {
            "x": 0.17023346303501946,
            "y": 0.09774436090225565,
            "width": 0.6673151750972763,
            "height": 0.1804511278195489
          }
        },
        {
          "type": "text",
          "page": 3,
          "content": "Representative results from layers 2, 9, and 15 are shown in Figure 3, where the cluster structures are preserved.",
          "line": 154
        }
      ],
      "review_text": "Line 154: Figure 3 shows features from layers 2, 9, and 11, not layers 2, 9, and 15 as mentioned in the text.",
      "category": "figure-text",
      "description": "The text says Figure 3 shows results for layers [2, 9, 15], but the Figure shows layers [2, 9, 11]",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "Representative results from layers 2, 9, and 15 are shown in Figure 3, where the cluster structures are preserved.",
          "correct": "thqPibDg6A_4_5171948f",
          "incorrect": [
            "thqPibDg6A_4_image_figure4",
            "thqPibDg6A_1_image_figure1",
            "thqPibDg6A_1_image_figure2"
          ],
          "letters": ["D", "A", "C", "B"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"layers shown\",\"claim\":{\"source\":\"text\",\"statement\":\"one set of 3 layers\"},\"evidence\":{\"source\":\"Figure 3\",\"statement\":\"different set of 3 layers\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"layers shown\",\"claim\":{\"source\":\"text\",\"statement\":\"one set of 3 layers\"},\"evidence\":{\"source\":\"Figure 3\",\"statement\":\"one set of 2 layers\"}}",
            "{\"letter\":\"C\",\"attribute\":\"layers shown\",\"claim\":{\"source\":\"Figure 3\",\"statement\":\"one set of 3 layers\"},\"evidence\":{\"source\":\"text\",\"statement\":\"layers in wrong order\"}}",
            "{\"letter\":\"D\",\"attribute\":\"layers shown\",\"claim\":{\"source\":\"caption\",\"statement\":\"one layer\"},\"evidence\":{\"source\":\"Figure 3b\",\"statement\":\"one different layer\"}}"
          ],
          "letters": ["B", "A", "C", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"layers\",\"target\":\"figure_3\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"change layer 11 to 15\",\"reason\":\"mismatch\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"layers\",\"target\":\"figure_3\",\"other_involved\":\"text\",\"action\":\"add\",\"edit_statement\":\"add layer 15\",\"reason\":\"missing\"}",
            "{\"letter\":\"C\",\"attribute\":\"layers\",\"target\":\"text\",\"other_involved\":\"figure_3\",\"action\":\"add\",\"edit_statement\":\"add layer 11 discussion\",\"reason\":\"omitted\"}",
            "{\"letter\":\"D\",\"attribute\":\"layer\",\"target\":\"figure_caption\",\"other_involved\":\"figure_3b\",\"action\":\"replace\",\"edit_statement\":\"replace 'layer 9' with 'layer 2'\",\"reason\":\"incorrect\"}"
          ],
          "letters": ["B", "A", "C", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 3"]
    }
  ],
  "t5mpbfpZuF": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 4,
          "image_id": "t5mpbfpZuF_4_1f4fdc72",
          "bbox": {
            "x": 0.17607003891050585,
            "y": 0.09774436090225565,
            "width": 0.6498054474708171,
            "height": 0.21954887218045113
          }
        },
        {
          "type": "text",
          "page": 4,
          "content": "More formally, given ns source and nt target examples, source and target embeddings es, et, and\nsource labels ys, we learn a main task head fmain and a domain critic head fcritic. Tne critic head\noutputs scores that attempt to discriminate between source and target embeddings, as given by the\nWasserstein distance loss, and is regularized with a gradient penalty. We offer an illustration of our\nmethod in Figure 2.",
          "line": 200
        }
      ],
      "review_text": "Figure 2 and Method Description (Lines 200-204): The terms 'DA Head' and 'Reward Head' in the figure do not directly match 'a main task head' and 'a domain critic head' in the text, causing difficulty in understanding their equivalence.",
      "category": "figure-text",
      "description": "The figure uses different naming for concepts than the text, making it hard to understand which part is what",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {
          "question": "More formally, given ns source and nt target examples, source and target embeddings es, et, and\nsource labels ys, we learn a main task head fmain and a domain critic head fcritic. Tne critic head\noutputs scores that attempt to discriminate between source and target embeddings, as given by the\nWasserstein distance loss, and is regularized with a gradient penalty. We offer an illustration of our\nmethod in Figure 2.",
          "correct": "t5mpbfpZuF_4_1f4fdc72",
          "incorrect": [
            "t5mpbfpZuF_1_image_figure1",
            "t5mpbfpZuF_7_image_figure3",
            "t5mpbfpZuF_8_image_figure4"
          ],
          "letters": ["B", "C", "D", "A"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"head names\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"inconsistent names\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"main task head\",\"claim\":{\"source\":\"text\",\"statement\":\"describes main task head\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"absent from diagram\"}}",
            "{\"letter\":\"B\",\"attribute\":\"heads\",\"claim\":{\"source\":\"text\",\"statement\":\"mentions three heads\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"illustrates two heads\"}}",
            "{\"letter\":\"D\",\"attribute\":\"domain critic head output\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should discriminate embeddings\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"outputs Source or Target\"}}"
          ],
          "letters": ["C", "A", "B", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"labels\",\"target\":\"figure_2\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"name components\",\"reason\":\"different\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"main task head\",\"target\":\"figure_2\",\"other_involved\":\"text\",\"action\":\"add\",\"edit_statement\":\"main task head\",\"reason\":\"absent\"}",
            "{\"letter\":\"B\",\"attribute\":\"heads\",\"target\":\"text\",\"other_involved\":\"figure_2\",\"action\":\"modify\",\"edit_statement\":\"number of heads\",\"reason\":\"different\"}",
            "{\"letter\":\"D\",\"attribute\":\"DA Head outputs\",\"target\":\"figure_2\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"output description\",\"reason\":\"contradicts\"}"
          ],
          "letters": ["C", "A", "B", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 2"]
    }
  ],
  "syUJqBnuD6": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 7,
          "image_id": "syUJqBnuD6_7_44550a75",
          "bbox": {
            "x": 0.16828793774319065,
            "y": 0.09172932330827067,
            "width": 0.6692607003891051,
            "height": 0.3383458646616541
          }
        }
      ],
      "review_text": "Figure 2: The reviewer asks for clarification on what the light blue and orange colors represent in the zoom-in view, indicating a potential inconsistency in the figure's legend or description.",
      "category": "figure-caption",
      "description": "Caption does only explain blue color, but not yellow",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the caption of the figure consistent with the content of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the caption of the figure inconsistent with the content of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"color legend\",\"claim\":{\"source\":\"expectation\",\"statement\":\"explain all colors\"},\"evidence\":{\"source\":\"caption\",\"statement\":\"omits yellow\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"color bar\",\"claim\":{\"source\":\"caption\",\"statement\":\"state numerical range\"},\"evidence\":{\"source\":\"caption\",\"statement\":\"omits range\"}}",
            "{\"letter\":\"D\",\"attribute\":\"color legend\",\"claim\":{\"source\":\"expectation\",\"statement\":\"explain all colors\"},\"evidence\":{\"source\":\"caption\",\"statement\":\"omits red\"}}",
            "{\"letter\":\"C\",\"attribute\":\"color meaning\",\"claim\":{\"source\":\"expectation\",\"statement\":\"explain color variation\"},\"evidence\":{\"source\":\"caption\",\"statement\":\"omits red and blue explanation\"}}"
          ],
          "letters": ["A", "B", "D", "C"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"yellow points\",\"target\":\"caption\",\"other_involved\":\"blue points\",\"action\":\"add\",\"edit_statement\":\"explanation\",\"reason\":\"missing\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"numerical range\",\"target\":\"figure_2\",\"other_involved\":\"caption\",\"action\":\"add\",\"edit_statement\":\"definition\",\"reason\":\"undefined\"}",
            "{\"letter\":\"D\",\"attribute\":\"red points\",\"target\":\"caption\",\"other_involved\":\"blue points\",\"action\":\"add\",\"edit_statement\":\"description\",\"reason\":\"omitted\"}",
            "{\"letter\":\"C\",\"attribute\":\"color meaning\",\"target\":\"caption\",\"other_involved\":\"figure_2\",\"action\":\"add\",\"edit_statement\":\"explanation\",\"reason\":\"unclear\"}"
          ],
          "letters": ["A", "B", "D", "C"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 2"]
    }
  ],
  "sec09tLQUl": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 4,
          "image_id": "sec09tLQUl_4_5d2d220c",
          "bbox": {
            "x": 0.17217898832684825,
            "y": 0.1037593984962406,
            "width": 0.6692607003891051,
            "height": 0.2796992481203007
          }
        },
        {
          "type": "text",
          "page": 4,
          "content": "Fig. 2 shows the average and worst group performance. It can be observed in Fig. 2, a discrepancy\nin generalization behaviors between the majority groups (represented by the average performance,\nbut the same behavior applies) and the minority group. Specifically, we observe a large general-\nization gap for the minority group, a synonym of overfitting\u2013a point that has not been sufficiently\nemphasized in prior research.",
          "line": 189
        }
      ],
      "review_text": "Figure 1 and 2: There is a mismatch between L189 and its labels, and it\u2019s incorrectly referenced as Fig 2.",
      "category": "figure-text",
      "description": "Figure does not show all information claimed in the text",
      "confidence": 2,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {
          "question": "Fig. 2 shows the average and worst group performance. It can be observed in Fig. 2, a discrepancy\nin generalization behaviors between the majority groups (represented by the average performance,\nbut the same behavior applies) and the minority group. Specifically, we observe a large general-\nization gap for the minority group, a synonym of overfitting\u2013a point that has not been sufficiently\nemphasized in prior research.",
          "correct": "sec09tLQUl_4_5d2d220c",
          "incorrect": [
            "sec09tLQUl_4_image_figure3",
            "sec09tLQUl_5_image_figure4",
            "sec09tLQUl_5_image_figure5"
          ],
          "letters": ["A", "C", "D", "B"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"figure content\",\"claim\":{\"source\":\"text\",\"statement\":\"average and worst group performance\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"does not contain information\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"figure content\",\"claim\":{\"source\":\"caption\",\"statement\":\"number of neurons\"},\"evidence\":{\"source\":\"Figure 2(a)\",\"statement\":\"probability of flipping\"}}",
            "{\"letter\":\"D\",\"attribute\":\"figure content\",\"claim\":{\"source\":\"expectation\",\"statement\":\"minority group's performance\"},\"evidence\":{\"source\":\"Figure 2(b)\",\"statement\":\"majority group\"}}",
            "{\"letter\":\"B\",\"attribute\":\"y-axis label\",\"claim\":{\"source\":\"caption\",\"statement\":\"different metrics\"},\"evidence\":{\"source\":\"Figure 2(a) and Figure 2(b)\",\"statement\":\"same label\"}}"
          ],
          "letters": ["C", "A", "D", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"performance metrics\",\"target\":\"Figure 2\",\"other_involved\":\"text\",\"action\":\"add\",\"edit_statement\":\"add information\",\"reason\":\"missing\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"number of neurons\",\"target\":\"caption of Figure 2(a)\",\"other_involved\":\"figure_2a\",\"action\":\"modify\",\"edit_statement\":\"update explanation\",\"reason\":\"contradictory\"}",
            "{\"letter\":\"D\",\"attribute\":\"group displayed\",\"target\":\"Figure 2(b)\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"change group\",\"reason\":\"contradictory\"}",
            "{\"letter\":\"B\",\"attribute\":\"y-axis label\",\"target\":\"Figure 2(a)\",\"other_involved\":\"Figure 2(b)\",\"action\":\"modify\",\"edit_statement\":\"change label\",\"reason\":\"contradictory\"}"
          ],
          "letters": ["C", "A", "D", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 2"]
    }
  ],
  "scozdyKzET": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 4,
          "image_id": "scozdyKzET_4_a5999df9",
          "bbox": {
            "x": 0.1663424124513619,
            "y": 0.0962406015037594,
            "width": 0.6731517509727626,
            "height": 0.3533834586466165
          }
        }
      ],
      "review_text": "Figure 1: The block output of Dispatcher Layer l has several rectangles and colors that are not defined in the notion. Furthermore, why do we have the same notations for the two first rectangles for Expert $k$, Expert $1$, and Expert $K$, which contradicts the equation (5)?",
      "category": "figure-only",
      "description": "The meaning of the colored outlined rectangles in the dynamic dispatching layer are not explained, while all other rectangles are",
      "confidence": 1,
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the figure that is consistent with a different part of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the figure that is inconsistent with a different part of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"legend\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should define colored outlined rectangles\"},\"evidence\":{\"source\":\"Figure 1\",\"statement\":\"does not define colored outlined rectangles\"}}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"legend\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should explain symbols\"},\"evidence\":{\"source\":\"Figure 1\",\"statement\":\"uses symbols without explanation\"}}",
            "{\"letter\":\"D\",\"attribute\":\"legend\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should define yellow rectangles\"},\"evidence\":{\"source\":\"Figure 1\",\"statement\":\"uses yellow rectangles without definition\"}}",
            "{\"letter\":\"B\",\"attribute\":\"representation\",\"claim\":{\"source\":\"legend\",\"statement\":\"defines light blue rectangles as Accumulated Prompt Tokens\"},\"evidence\":{\"source\":\"Figure 1\",\"statement\":\"does not show light blue rectangles\"}}"
          ],
          "letters": ["A", "C", "D", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"rectangles\",\"target\":\"figure_1\",\"other_involved\":\"legend\",\"action\":\"add\",\"edit_statement\":\"meaning\",\"reason\":\"missing\"}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"multiplication (\u00d7) and addition (\u2295) symbols\",\"target\":\"legend\",\"other_involved\":\"figure_1\",\"action\":\"add\",\"edit_statement\":\"explanation\",\"reason\":\"missing\"}",
            "{\"letter\":\"D\",\"attribute\":\"Dispatching Weights D_{expert_k}\",\"target\":\"legend\",\"other_involved\":\"figure_1\",\"action\":\"add\",\"edit_statement\":\"explanation\",\"reason\":\"missing\"}",
            "{\"letter\":\"B\",\"attribute\":\"Accumulated Prompt Tokens Z_{P_l}\",\"target\":\"figure_1\",\"other_involved\":\"legend\",\"action\":\"add\",\"edit_statement\":\"visual representation\",\"reason\":\"missing\"}"
          ],
          "letters": ["A", "C", "D", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 1"]
    }
  ],
  "sELO2DCCC1": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 3,
          "image_id": "sELO2DCCC1_3_6a8f1e8e",
          "bbox": {
            "x": 0.17217898832684825,
            "y": 0.08872180451127819,
            "width": 0.6712062256809338,
            "height": 0.23007518796992482
          }
        }
      ],
      "review_text": "Figure 1: The caption states it's a uniform field with 16x16 patches, but the field is clearly not uniform.",
      "category": "figure-caption",
      "description": "The distribution shown in the figure is clearly not uniform as described in the caption",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the caption of the figure consistent with the content of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the caption of the figure inconsistent with the content of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"uniformity\",\"claim\":{\"source\":\"caption\",\"statement\":\"uniform 2Wm^-2 TOA forcing field\"},\"evidence\":{\"source\":\"Figure 1\",\"statement\":\"non-uniform distribution\"}}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"consistency\",\"claim\":{\"source\":\"expectation\",\"statement\":\"static forcing field\"},\"evidence\":{\"source\":\"Figure 1\",\"statement\":\"varying TOA forcing\"}}",
            "{\"letter\":\"C\",\"attribute\":\"continuity\",\"claim\":{\"source\":\"caption\",\"statement\":\"256 discrete local patches\"},\"evidence\":{\"source\":\"Figure 1\",\"statement\":\"continuous bands\"}}",
            "{\"letter\":\"B\",\"attribute\":\"uniformity\",\"claim\":{\"source\":\"caption\",\"statement\":\"uniform distribution\"},\"evidence\":{\"source\":\"Figure 1\",\"statement\":\"non-uniform distribution\"}}"
          ],
          "letters": ["A", "D", "C", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"forcing field\",\"target\":\"figure_1\",\"other_involved\":\"caption\",\"action\":\"modify\",\"edit_statement\":\"map distribution\",\"reason\":\"not uniform\"}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"forcing field\",\"target\":\"figure_1\",\"other_involved\":\"caption\",\"action\":\"modify\",\"edit_statement\":\"show static\",\"reason\":\"not static\"}",
            "{\"letter\":\"C\",\"attribute\":\"local patches\",\"target\":\"figure_1\",\"other_involved\":\"caption\",\"action\":\"replace\",\"edit_statement\":\"continuous bands\",\"reason\":\"not discrete\"}",
            "{\"letter\":\"B\",\"attribute\":\"uniform distribution\",\"target\":\"figure_1\",\"other_involved\":\"caption\",\"action\":\"modify\",\"edit_statement\":\"show non-uniform\",\"reason\":\"not uniform\"}"
          ],
          "letters": ["A", "D", "C", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 1"]
    }
  ],
  "rtUjj03qZv": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 3,
          "image_id": "rtUjj03qZv_3_0864fdcd",
          "bbox": {
            "x": 0.16828793774319065,
            "y": 0.09473684210526315,
            "width": 0.6653696498054475,
            "height": 0.3218045112781955
          }
        }
      ],
      "review_text": "2. In Figure 2, in the 'contrastive-enhanced answer generation' module, why does the 'exclude' branch link to the generation decoder? This contradicts the explanation in the text where it's mentioned that the 'exclude' branch is used to 'help the model focus on the relevant information'.",
      "category": "figure-only",
      "description": "In the contrastive-enhanced answer generation, the exclude branch still goes into the decoder",
      "confidence": 2,
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the figure that is consistent with a different part of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the figure that is inconsistent with a different part of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"branch connection\",\"claim\":{\"source\":\"expectation\",\"statement\":\"excludes features\"},\"evidence\":{\"source\":\"figure_2\",\"statement\":\"connected to decoder\"}}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"feature count\",\"claim\":{\"source\":\"expectation\",\"statement\":\"matches input\"},\"evidence\":{\"source\":\"figure_2\",\"statement\":\"does not match input\"}}",
            "{\"letter\":\"B\",\"attribute\":\"branch connection\",\"claim\":{\"source\":\"expectation\",\"statement\":\"excludes features\"},\"evidence\":{\"source\":\"figure_2\",\"statement\":\"connected to encoder\"}}",
            "{\"letter\":\"D\",\"attribute\":\"operation\",\"claim\":{\"source\":\"caption\",\"statement\":\"concatenation\"},\"evidence\":{\"source\":\"figure_2\",\"statement\":\"cross product\"}}"
          ],
          "letters": ["A", "C", "B", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"output connection\",\"target\":\"Exclude branch\",\"other_involved\":\"Decoder, Expand branch\",\"action\":\"modify\",\"edit_statement\":\"output connection\",\"reason\":\"inconsistent\"}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"number of features\",\"target\":\"Expand branch\",\"other_involved\":\"Cross-model Encoder\",\"action\":\"modify\",\"edit_statement\":\"number of features\",\"reason\":\"mismatch\"}",
            "{\"letter\":\"B\",\"attribute\":\"output connection\",\"target\":\"Exclude branch\",\"other_involved\":\"Encoder, Expand branch\",\"action\":\"modify\",\"edit_statement\":\"output connection\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"D\",\"attribute\":\"\u2295 symbol\",\"target\":\"caption\",\"other_involved\":\"Temporal Grounding module of figure 2\",\"action\":\"modify\",\"edit_statement\":\"symbol definition\",\"reason\":\"inconsistent\"}"
          ],
          "letters": ["A", "C", "B", "D"]
        }
      },
      "severity": 1,
      "visual_elements": ["Figure 2"]
    }
  ],
  "r6XqXoRT6N": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 4,
          "image_id": "r6XqXoRT6N_4_9193e9a7",
          "bbox": {
            "x": 0.17023346303501946,
            "y": 0.08721804511278196,
            "width": 0.6653696498054475,
            "height": 0.2661654135338346
          }
        }
      ],
      "review_text": "Figure 2: The team name 'Warriors' is misspelled as 'VAARR', and Stephen Curry\u2019s jersey number, which should be 30, is incorrect.",
      "category": "figure-only",
      "description": " 'Warriors' is misspelled as 'VAARR', and Stephen Curry\u2019s jersey number, which should be 30, is incorrect.",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the figure that is consistent with a different part of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the figure that is inconsistent with a different part of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"text\",\"claim\":{\"source\":\"expectation\",\"statement\":\"Warriors\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"VAARR\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"number\",\"claim\":{\"source\":\"expectation\",\"statement\":\"30\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"24\"}}",
            "{\"letter\":\"C\",\"attribute\":\"bounding box\",\"claim\":{\"source\":\"expectation\",\"statement\":\"fit jersey\"},\"evidence\":{\"source\":\"Figure 2 Step 3\",\"statement\":\"overflow jersey\"}}",
            "{\"letter\":\"A\",\"attribute\":\"name\",\"claim\":{\"source\":\"caption\",\"statement\":\"Stephen Curry\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"Stephen Russell\"}}"
          ],
          "letters": ["D", "B", "C", "A"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"text\",\"target\":\"jersey\",\"other_involved\":\"jersey number\",\"action\":\"modify\",\"edit_statement\":\"Warriors spelling\",\"reason\":\"misspelled\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"jersey number\",\"target\":\"jersey\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"correct jersey number\",\"reason\":\"incorrect\"}",
            "{\"letter\":\"C\",\"attribute\":\"bounding box\",\"target\":\"figure_2/step_3\",\"other_involved\":null,\"action\":\"reposition\",\"edit_statement\":\"align logo\",\"reason\":\"overflow\"}",
            "{\"letter\":\"A\",\"attribute\":\"name\",\"target\":\"jersey\",\"other_involved\":\"caption\",\"action\":\"modify\",\"edit_statement\":\"Stephen Curry name\",\"reason\":\"different\"}"
          ],
          "letters": ["D", "B", "C", "A"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 2"]
    },
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 6,
          "image_id": "r6XqXoRT6N_6_37e2c28c",
          "bbox": {
            "x": 0.17217898832684825,
            "y": 0.28270676691729324,
            "width": 0.6653696498054475,
            "height": 0.11578947368421053
          }
        },
        {
          "type": "text",
          "page": 9,
          "content": "w/o text module. For w/o text module, the ablation experiment on MHaluBench dataset aimed to\nexamine the impact of removing the text generation module in our model. The results, shown in\nTable 3, highlight that without the text generation module, the model faced challenges in generating\naccurate text",
          "line": 482
        }
      ],
      "review_text": "Table 3: The OH ACC in the first column is unexpectedly higher when the text module is omitted, and the removal of KG extraction does not seem to improve TH or FH ACC.",
      "category": "figure-text",
      "description": "OH ACC is higher w/o text module in the table, while text says it is lower",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "w/o text module. For w/o text module, the ablation experiment on MHaluBench dataset aimed to\nexamine the impact of removing the text generation module in our model. The results, shown in\nTable 3, highlight that without the text generation module, the model faced challenges in generating\naccurate text",
          "correct": "r6XqXoRT6N_6_37e2c28c",
          "incorrect": [
            "r6XqXoRT6N_5_table_table2",
            "r6XqXoRT6N_6_image_figure4",
            "r6XqXoRT6N_6_image_figure3"
          ],
          "letters": ["C", "B", "A", "D"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"OH Acc.(%)\",\"claim\":{\"source\":\"text\",\"statement\":\"leads to challenges\"},\"evidence\":{\"source\":\"Table 3\",\"statement\":\"higher than ours\"}}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"text generation\",\"claim\":{\"source\":\"expectation\",\"statement\":\"not possible\"},\"evidence\":{\"source\":\"Table 3\",\"statement\":\"shows results\"}}",
            "{\"letter\":\"B\",\"attribute\":\"TFH Acc.(%)\",\"claim\":{\"source\":\"expectation\",\"statement\":\"shouldn't be 0.00%\"},\"evidence\":{\"source\":\"Table 3\",\"statement\":\"0.00%\"}}",
            "{\"letter\":\"A\",\"attribute\":\"Overall Acc.(%)\",\"claim\":{\"source\":\"text\",\"statement\":\"leads to challenges\"},\"evidence\":{\"source\":\"Table 3\",\"statement\":\"higher than ours\"}}"
          ],
          "letters": ["D", "C", "B", "A"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"OH Acc.(%)\",\"target\":\"table_3\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"update\",\"reason\":\"contradiction\"}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"results\",\"target\":\"table_3\",\"other_involved\":\"text module\",\"action\":\"remove\",\"edit_statement\":\"entry\",\"reason\":\"impossible\"}",
            "{\"letter\":\"B\",\"attribute\":\"TFH Acc.(%)\",\"target\":\"table_3\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"update\",\"reason\":\"contradiction\"}",
            "{\"letter\":\"A\",\"attribute\":\"Overall Acc.(%)\",\"target\":\"table_3\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"update\",\"reason\":\"contradiction\"}"
          ],
          "letters": ["D", "C", "B", "A"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 3"]
    }
  ],
  "r0JfDTXAWx": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 24,
          "image_id": "r0JfDTXAWx_24_e5fe0b34",
          "bbox": {
            "x": 0.17412451361867703,
            "y": 0.10075187969924813,
            "width": 0.6595330739299611,
            "height": 0.12481203007518797
          }
        },
        {
          "type": "image",
          "page": 24,
          "image_id": "r0JfDTXAWx_24_a3b0ddab",
          "bbox": {
            "x": 0.17607003891050585,
            "y": 0.2428821993949718,
            "width": 0.6556420233463035,
            "height": 0.12781954887218044
          }
        }
      ],
      "review_text": "Tables 16 and 17: Both tables share the same result, even when changing the 2-hop EG to 3-hop EG.",
      "category": "table-table",
      "description": "Both tables show the same results for the two last columns, even though they are 2-hop resp. 3-hop",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the first table consistent with the content of the second table?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the first table inconsistent with the content of the second table?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "r0JfDTXAWx_24_e5fe0b34",
          "correct": "r0JfDTXAWx_24_a3b0ddab",
          "incorrect": [
            "r0JfDTXAWx_23_table_table16",
            "r0JfDTXAWx_18_table_table14",
            "r0JfDTXAWx_18_table_table13"
          ],
          "letters": ["A", "B", "C", "D"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"performance values\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should vary\"},\"evidence\":{\"source\":\"Table 16 and Table 17\",\"statement\":\"are identical\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"AUC-PR values\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be same\"},\"evidence\":{\"source\":\"Table 16 and Table 17\",\"statement\":\"are different\"}}",
            "{\"letter\":\"B\",\"attribute\":\"units\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be provided\"},\"evidence\":{\"source\":\"Table 16 and Table 17\",\"statement\":\"not provided\"}}",
            "{\"letter\":\"D\",\"attribute\":\"percentage improvements\",\"claim\":{\"source\":\"caption\",\"statement\":\"are identical\"},\"evidence\":{\"source\":\"Table 16 and Table 17\",\"statement\":\"are identical\"}}"
          ],
          "letters": ["C", "A", "B", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"performance values\",\"target\":\"table_16\",\"other_involved\":\"table_17\",\"action\":\"modify\",\"edit_statement\":\"align values\",\"reason\":\"identical\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"AUC-PR values\",\"target\":\"table_16\",\"other_involved\":\"table_17\",\"action\":\"modify\",\"edit_statement\":\"align values\",\"reason\":\"different\"}",
            "{\"letter\":\"B\",\"attribute\":\"units\",\"target\":\"table_16\",\"other_involved\":\"table_17\",\"action\":\"add\",\"edit_statement\":\"missing units\",\"reason\":\"not provided\"}",
            "{\"letter\":\"D\",\"attribute\":\"percentage improvements\",\"target\":\"table_16\",\"other_involved\":\"table_17\",\"action\":\"modify\",\"edit_statement\":\"align values\",\"reason\":\"identical\"}"
          ],
          "letters": ["C", "A", "B", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 16", "Table 17"]
    }
  ],
  "qW5f8TAZ4J": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 8,
          "image_id": "qW5f8TAZ4J_8_64db0d51",
          "bbox": {
            "x": 0.17023346303501946,
            "y": 0.10225563909774436,
            "width": 0.6634241245136187,
            "height": 0.1729323308270677
          }
        },
        {
          "type": "text",
          "page": 1,
          "content": "we propose FairSkin, a novel DM framework that mitigates these biases through a three-level resampling mechanism, ensuring fairer representation across racial and disease categories.",
          "line": 19
        }
      ],
      "review_text": "1) The main claim of the paper is that it improves fairness by balancing out the lack of high quality training data of darker skin-types in the dataset. But the results in Fig 4 show that the accuracy increases for 'Caucasian' and decreases for 'Asian' and 'African' compared to the vanilla approach.",
      "category": "figure-text",
      "description": "The paper claims to improve fairness, but the accuracy decreases for African and Asian skin types",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "we propose FairSkin, a novel DM framework that mitigates these biases through a three-level resampling mechanism, ensuring fairer representation across racial and disease categories.",
          "correct": "qW5f8TAZ4J_8_64db0d51",
          "incorrect": [
            "qW5f8TAZ4J_7_image_figure6",
            "qW5f8TAZ4J_7_image_figure5",
            "qW5f8TAZ4J_6_image_figure3"
          ],
          "letters": ["C", "A", "D", "B"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"accuracy\",\"claim\":{\"source\":\"expectation\",\"statement\":\"enhance fairness\"},\"evidence\":{\"source\":\"Figure 4\",\"statement\":\"decrease for African and Asian\"}}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"validation accuracy\",\"claim\":{\"source\":\"expectation\",\"statement\":\"targeted fairness improvement\"},\"evidence\":{\"source\":\"Figure 4\",\"statement\":\"lower overall accuracy\"}}",
            "{\"letter\":\"B\",\"attribute\":\"performance\",\"claim\":{\"source\":\"expectation\",\"statement\":\"enhance fairness\"},\"evidence\":{\"source\":\"Figure 4\",\"statement\":\"outperform on Caucasian\"}}",
            "{\"letter\":\"A\",\"attribute\":\"accuracy\",\"claim\":{\"source\":\"expectation\",\"statement\":\"enhance fairness\"},\"evidence\":{\"source\":\"Figure 4\",\"statement\":\"Asian accuracy higher than African\"}}"
          ],
          "letters": ["D", "C", "B", "A"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"accuracy\",\"target\":\"figure_4\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align claim\",\"reason\":\"contradiction\"}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"validation accuracy\",\"target\":\"figure_4\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"align claim\",\"reason\":\"contradiction\"}",
            "{\"letter\":\"B\",\"attribute\":\"performance\",\"target\":\"figure_4\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align claim\",\"reason\":\"contradiction\"}",
            "{\"letter\":\"A\",\"attribute\":\"accuracy\",\"target\":\"figure_4\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align claim\",\"reason\":\"contradiction\"}"
          ],
          "letters": ["D", "C", "B", "A"]
        }
      },
      "severity": 1,
      "visual_elements": ["Figure 4"]
    }
  ],
  "qIJenSdGbW": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 8,
          "image_id": "qIJenSdGbW_8_b1e3e0b1",
          "bbox": {
            "x": 0.17217898832684825,
            "y": 0.0962406015037594,
            "width": 0.6614785992217899,
            "height": 0.1729323308270677
          }
        }
      ],
      "review_text": "Table 3: Only one metric (ImageReward) supports the argument that NPNet is orthogonal to DPO, while the other three metrics do not.",
      "category": "figure-caption",
      "description": "Caption claims DPO and NPNet are orthogonal, but only ImageReward results support this, the other metrics do not",
      "confidence": 1,
      "mcq": {
        "binary_consistent": {
          "question": "Is the caption of the figure consistent with the content of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the caption of the figure inconsistent with the content of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"performance improvement\",\"claim\":{\"source\":\"caption\",\"statement\":\"improved performance\"},\"evidence\":{\"source\":\"Table 3\",\"statement\":\"AES metric decrease\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"orthogonality claim\",\"claim\":{\"source\":\"caption\",\"statement\":\"orthogonality\"},\"evidence\":{\"source\":\"Table 3\",\"statement\":\"negligible increase\"}}",
            "{\"letter\":\"C\",\"attribute\":\"orthogonality\",\"claim\":{\"source\":\"expectation\",\"statement\":\"methods not orthogonal\"},\"evidence\":{\"source\":\"Table 3\",\"statement\":\"higher ImageReward\"}}",
            "{\"letter\":\"D\",\"attribute\":\"arrow direction\",\"claim\":{\"source\":\"expectation\",\"statement\":\"higher is better\"},\"evidence\":{\"source\":\"Table 3\",\"statement\":\"not highest value\"}}"
          ],
          "letters": ["A", "B", "C", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"AES metric\",\"target\":\"table_3\",\"other_involved\":\"caption\",\"action\":\"modify\",\"edit_statement\":\"update value\",\"reason\":\"decrease observed\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"PickScore and HPSv2 metrics\",\"target\":\"table_3\",\"other_involved\":\"caption\",\"action\":\"modify\",\"edit_statement\":\"update values\",\"reason\":\"negligible increase\"}",
            "{\"letter\":\"C\",\"attribute\":\"ImageReward metric\",\"target\":\"table_3\",\"other_involved\":\"caption\",\"action\":\"modify\",\"edit_statement\":\"update value\",\"reason\":\"significantly higher\"}",
            "{\"letter\":\"D\",\"attribute\":\"PickScore value\",\"target\":\"table_3\",\"other_involved\":\"arrows\",\"action\":\"modify\",\"edit_statement\":\"update value\",\"reason\":\"not highest\"}"
          ],
          "letters": ["A", "B", "C", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 3"]
    }
  ],
  "pshLnZzIbW": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 2,
          "image_id": "pshLnZzIbW_2_f2d07c41",
          "bbox": {
            "x": 0.16828793774319065,
            "y": 0.3067669172932331,
            "width": 0.7042801556420234,
            "height": 0.2330827067669173
          }
        }
      ],
      "review_text": "Table 1: The best results are said to be bold, but only the results in this work are bold. The results for $1/e-\\epsilon$, $O(\\log n)$, and $O(n)$ should also be bold.",
      "category": "table-only",
      "description": "The best results should be bold, but only the \"this work\" is bold",
      "confidence": 2,
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the table that is consistent with a different part of the table?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the table that is inconsistent with a different part of the table?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"bolding rule\",\"claim\":{\"source\":\"caption\",\"statement\":\"best result is bold\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"not consistently applied\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"bolding\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should indicate best result\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"every this work is bold\"}}",
            "{\"letter\":\"C\",\"attribute\":\"notation\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be O-notation\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"uses different notation\"}}",
            "{\"letter\":\"D\",\"attribute\":\"ratio values\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be correct\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"values are incorrect\"}}"
          ],
          "letters": ["A", "B", "C", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"bolding\",\"target\":\"table_1\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"make consistent\",\"reason\":\"inconsistent\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"bolding\",\"target\":\"table_1\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"remove unnecessary\",\"reason\":\"too many bolded\"}",
            "{\"letter\":\"C\",\"attribute\":\"notation\",\"target\":\"table_1\",\"other_involved\":null,\"action\":\"replace\",\"edit_statement\":\"standard O-notation\",\"reason\":\"unconventional notation\"}",
            "{\"letter\":\"D\",\"attribute\":\"Ratio column values\",\"target\":\"table_1\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"correct approximation\",\"reason\":\"incorrect\"}"
          ],
          "letters": ["A", "B", "C", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 1"]
    }
  ],
  "pWdUcV5axb": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 10,
          "image_id": "pWdUcV5axb_10_2e092a7c",
          "bbox": {
            "x": 0.17728237791932058,
            "y": 0.17443609022556392,
            "width": 0.6475583864118897,
            "height": 0.07969924812030074
          }
        },
        {
          "type": "text",
          "page": 10,
          "content": "Table 8 shows that fine-tuning on our expanded VH test cases maintains the model\u2019s performance on other general-purpose VQA datasets, MME Perception and MME Recognition",
          "line": 565
        }
      ],
      "review_text": "Table 8: The performance on MME dataset is harmed after fine-tuning on expanded VH test cases, contradicting the authors' interpretation that 'the model\u2019s performance are maintained'.",
      "category": "table-text",
      "description": "Performance is not kept, but it decreases",
      "confidence": 2,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the table consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the table inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {
          "question": "Table 8 shows that fine-tuning on our expanded VH test cases maintains the model\u2019s performance on other general-purpose VQA datasets, MME Perception and MME Recognition",
          "correct": "pWdUcV5axb_10_2e092a7c",
          "incorrect": [
            "pWdUcV5axb_9_table_table7",
            "pWdUcV5axb_8_table_table6",
            "pWdUcV5axb_5_table_table3"
          ],
          "letters": ["B", "A", "D", "C"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"performance\",\"claim\":{\"source\":\"text\",\"statement\":\"maintains performance\"},\"evidence\":{\"source\":\"Table 8\",\"statement\":\"decreased\"}}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"scores\",\"claim\":{\"source\":\"expectation\",\"statement\":\"shouldn't increase\"},\"evidence\":{\"source\":\"Table 8\",\"statement\":\"increased\"}}",
            "{\"letter\":\"B\",\"attribute\":\"performance\",\"claim\":{\"source\":\"text\",\"statement\":\"maintaining performance\"},\"evidence\":{\"source\":\"Table 8\",\"statement\":\"significantly improves\"}}",
            "{\"letter\":\"A\",\"attribute\":\"content\",\"claim\":{\"source\":\"text\",\"statement\":\"before and after fine-tuning\"},\"evidence\":{\"source\":\"Table 8\",\"statement\":\"after fine-tuning\"}}"
          ],
          "letters": ["D", "C", "B", "A"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"model performance\",\"target\":\"text\",\"other_involved\":\"table_8\",\"action\":\"modify\",\"edit_statement\":\"describe score decrease\",\"reason\":\"contradicts\"}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"MME Cognition scores\",\"target\":\"text\",\"other_involved\":\"table_8\",\"action\":\"modify\",\"edit_statement\":\"update MME Cognition scores\",\"reason\":\"contradicts\"}",
            "{\"letter\":\"B\",\"attribute\":\"performance improvement\",\"target\":\"text\",\"other_involved\":\"table_8\",\"action\":\"modify\",\"edit_statement\":\"describe significant improvement\",\"reason\":\"contradicts\"}",
            "{\"letter\":\"A\",\"attribute\":\"fine-tuning scores\",\"target\":\"table_8\",\"other_involved\":\"text\",\"action\":\"add\",\"edit_statement\":\"before fine-tuning scores\",\"reason\":\"missing\"}"
          ],
          "letters": ["D", "C", "B", "A"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 8"]
    }
  ],
  "pQJi9EsmCc": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 8,
          "image_id": "pQJi9EsmCc_8_ebe434aa",
          "bbox": {
            "x": 0.20136186770428016,
            "y": 0.09924812030075188,
            "width": 0.6050583657587548,
            "height": 0.25263157894736843
          }
        },
        {
          "type": "image",
          "page": 8,
          "image_id": "pQJi9EsmCc_8_949f1b9c",
          "bbox": {
            "x": 0.17217898832684825,
            "y": 0.656416033981438,
            "width": 0.6614785992217899,
            "height": 0.15639097744360902
          }
        }
      ],
      "review_text": "Fig. 3: The qualitative improvement shown does not align with the quantitative results in Table 1, especially for the red boxes where the results are mostly comparable.",
      "category": "figure-table",
      "description": "The table shows significant differences in L_1 distance, but the visual inspection in the figure does not show almost any difference",
      "confidence": 2,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the content of the table?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the content of the table?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "pQJi9EsmCc_8_ebe434aa",
          "correct": "pQJi9EsmCc_8_949f1b9c",
          "incorrect": [
            "pQJi9EsmCc_8_image_figure4",
            "pQJi9EsmCc_7_image_figure3",
            "pQJi9EsmCc_9_image_figure5"
          ],
          "letters": ["D", "C", "B", "A"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"qualitative vs quantitative results\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should agree\"},\"evidence\":{\"source\":\"table_1_and_figure_3\",\"statement\":\"disagree\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"masking condition comparison\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should compare\"},\"evidence\":{\"source\":\"figure_3\",\"statement\":\"limited comparison\"}}",
            "{\"letter\":\"B\",\"attribute\":\"ground truth alignment\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should align\"},\"evidence\":{\"source\":\"figure_3\",\"statement\":\"not aligned\"}}",
            "{\"letter\":\"C\",\"attribute\":\"bounding box consistency\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"figure_3\",\"statement\":\"inconsistent\"}}"
          ],
          "letters": ["D", "A", "B", "C"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"method performance\",\"target\":\"figure_3\",\"other_involved\":\"table_1\",\"action\":\"modify\",\"edit_statement\":\"qualitative representation\",\"reason\":\"different\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"reconstructions shown\",\"target\":\"figure_3\",\"other_involved\":\"table_1\",\"action\":\"add\",\"edit_statement\":\"w/o mask reconstructions\",\"reason\":\"missing\"}",
            "{\"letter\":\"B\",\"attribute\":\"alignment\",\"target\":\"figure_3\",\"other_involved\":\"table_1\",\"action\":\"modify\",\"edit_statement\":\"align Ground truth\",\"reason\":\"misaligned\"}",
            "{\"letter\":\"C\",\"attribute\":\"red bounding box\",\"target\":\"figure_3\",\"other_involved\":\"table_1\",\"action\":\"modify\",\"edit_statement\":\"standardize region\",\"reason\":\"inconsistent\"}"
          ],
          "letters": ["D", "A", "B", "C"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 3", "Table 1"]
    }
  ],
  "pK3oe2bubc": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 7,
          "image_id": "pK3oe2bubc_7_74b21c7b",
          "bbox": {
            "x": 0.17217898832684825,
            "y": 0.15639097744360902,
            "width": 0.6634241245136187,
            "height": 0.2796992481203007
          }
        },
        {
          "type": "text",
          "page": 6,
          "content": "Our LayerShuffle (LS) approaches show slightly lower performance than the baselines when executing layers in their original order.",
          "line": 285
        }
      ],
      "review_text": "Table 1: The performance degradation with LayerShuffle is over 28% on the simple classification task (CIFAR, sequential, LS-pred), which contradicts the statement in Line 285 that the model performance with LayerShuffle is 'slightly' lower.",
      "category": "table-text",
      "description": "The layer shuffle method significantly decreases accuracy, not just \"slightly\"",
      "confidence": 2,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the table consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the table inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {
          "question": "Our LayerShuffle (LS) approaches show slightly lower performance than the baselines when executing layers in their original order.",
          "correct": "pK3oe2bubc_7_74b21c7b",
          "incorrect": [
            "pK3oe2bubc_9_table_table2",
            "pK3oe2bubc_7_image_figure4",
            "pK3oe2bubc_4_image_figure3"
          ],
          "letters": ["B", "D", "A", "C"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"performance\",\"claim\":{\"source\":\"expectation\",\"statement\":\"substantial reduction\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"substantial reduction\"}}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"accuracy\",\"claim\":{\"source\":\"caption\",\"statement\":\"fail catastrophically\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"above 60%\"}}",
            "{\"letter\":\"A\",\"attribute\":\"accuracy\",\"claim\":{\"source\":\"caption\",\"statement\":\"above 60%\"},\"evidence\":{\"source\":\"text\",\"statement\":\"slightly lower\"}}",
            "{\"letter\":\"B\",\"attribute\":\"performance\",\"claim\":{\"source\":\"text\",\"statement\":\"slightly lower\"},\"evidence\":{\"source\":\"plot\",\"statement\":\"outperforms baselines\"}}"
          ],
          "letters": ["D", "C", "A", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"LayerShuffle performance\",\"target\":\"text\",\"other_involved\":\"Table 1\",\"action\":\"modify\",\"edit_statement\":\"align accuracy reduction\",\"reason\":\"contradictory\"}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"baseline model performance\",\"target\":\"caption\",\"other_involved\":\"Table 1\",\"action\":\"modify\",\"edit_statement\":\"align performance\",\"reason\":\"contradictory\"}",
            "{\"letter\":\"A\",\"attribute\":\"LayerShuffle accuracy\",\"target\":\"caption\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align accuracy\",\"reason\":\"contradictory\"}",
            "{\"letter\":\"B\",\"attribute\":\"LayerShuffle performance\",\"target\":\"text\",\"other_involved\":\"Table 1\",\"action\":\"modify\",\"edit_statement\":\"align performance\",\"reason\":\"contradictory\"}"
          ],
          "letters": ["D", "C", "A", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 1"]
    }
  ],
  "p3NVJg6ywM": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 8,
          "image_id": "p3NVJg6ywM_8_4c1c2f65",
          "bbox": {
            "x": 0.14105058365758755,
            "y": 0.1218045112781955,
            "width": 0.7217898832684825,
            "height": 0.21954887218045113
          }
        },
        {
          "type": "image",
          "page": 8,
          "image_id": "p3NVJg6ywM_8_e90d2965",
          "bbox": {
            "x": 0.16828793774319065,
            "y": 0.35566415428218984,
            "width": 0.6750972762645914,
            "height": 0.12330827067669173
          }
        }
      ],
      "review_text": "Table 1 and Table 2: The data in these tables seem to be inconsistent.",
      "category": "table-table",
      "description": "The values for DENSE are inconsistent across the two tables",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the first table consistent with the content of the second table?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the first table inconsistent with the content of the second table?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {
          "question": "p3NVJg6ywM_8_4c1c2f65",
          "correct": "p3NVJg6ywM_8_e90d2965",
          "incorrect": [
            "p3NVJg6ywM_7_table_table1",
            "p3NVJg6ywM_5_interline-equation_equation13.5",
            "p3NVJg6ywM_5_interline-equation_equation5"
          ],
          "letters": ["B", "C", "D", "A"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"accuracy values\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be same\"},\"evidence\":{\"source\":\"Table 1 and Table 2\",\"statement\":\"different\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"omega values\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be same\"},\"evidence\":{\"source\":\"Table 1 and Table 2\",\"statement\":\"different\"}}",
            "{\"letter\":\"C\",\"attribute\":\"dataset\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be same\"},\"evidence\":{\"source\":\"Table 1 and Table 2\",\"statement\":\"different\"}}",
            "{\"letter\":\"D\",\"attribute\":\"standard deviation\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be same\"},\"evidence\":{\"source\":\"Table 1 and Table 2\",\"statement\":\"different\"}}"
          ],
          "letters": ["B", "A", "C", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"accuracy values\",\"target\":\"table_2\",\"other_involved\":\"table_1\",\"action\":\"modify\",\"edit_statement\":\"align values\",\"reason\":\"inconsistent\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"\u03c9 values\",\"target\":\"table_2\",\"other_involved\":\"table_1\",\"action\":\"modify\",\"edit_statement\":\"align values\",\"reason\":\"different\"}",
            "{\"letter\":\"C\",\"attribute\":\"dataset\",\"target\":\"table_2\",\"other_involved\":\"table_1\",\"action\":\"add\",\"edit_statement\":\"add dataset\",\"reason\":\"missing\"}",
            "{\"letter\":\"D\",\"attribute\":\"standard deviation\",\"target\":\"table_2\",\"other_involved\":\"table_1\",\"action\":\"modify\",\"edit_statement\":\"align values\",\"reason\":\"higher\"}"
          ],
          "letters": ["B", "A", "C", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 1", "Table 2"]
    }
  ],
  "owR9ofvkFQ": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 6,
          "image_id": "owR9ofvkFQ_6_201bab08",
          "bbox": {
            "x": 0.17023346303501946,
            "y": 0.09924812030075188,
            "width": 0.6614785992217899,
            "height": 0.2586466165413534
          }
        },
        {
          "type": "text",
          "page": 6,
          "content": "The MathOdyssey dataset includes a variety of answer types, provid-\ning a comprehensive assessment of the mathematical reasoning and problem-solving capabilities of\nlarge language models (LLMs). The distribution of answer types is shown in Figure 2, and it is cat-\negorized into three main types: True-False questions, Multiple-Choice questions, and Open-Answer\nquestions. The distribution of answer types in the MathOdyssey dataset is designed to provide a\nwell-rounded evaluation of LLMs\u2019 mathematical capabilities. With 62.8% of the questions being\nopen-answer, the dataset emphasizes the importance of detailed reasoning and solution generation.\nMultiple-choice questions, making up 33.1%, help assess the models\u2019 ability to choose correct an-\nswers from given options, while true-false questions, at 4.1%, provide a quick check of fundamental\nunderstanding. This diverse mix of answer types ensures that LLMs are tested on various aspects of\nmathematical problem-solving, from basic validation to complex reasoning and solution generation,\nrequiring an understanding of the concepts.",
          "line": 299
        }
      ],
      "review_text": "Figure 2: The data presented in the figure differs from the description in line 304 regarding the diversity of answer types.",
      "category": "figure-text",
      "description": "The values in the text do not match the pie diagram",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {
          "question": "The MathOdyssey dataset includes a variety of answer types, provid-\ning a comprehensive assessment of the mathematical reasoning and problem-solving capabilities of\nlarge language models (LLMs). The distribution of answer types is shown in Figure 2, and it is cat-\negorized into three main types: True-False questions, Multiple-Choice questions, and Open-Answer\nquestions. The distribution of answer types in the MathOdyssey dataset is designed to provide a\nwell-rounded evaluation of LLMs\u2019 mathematical capabilities. With 62.8% of the questions being\nopen-answer, the dataset emphasizes the importance of detailed reasoning and solution generation.\nMultiple-choice questions, making up 33.1%, help assess the models\u2019 ability to choose correct an-\nswers from given options, while true-false questions, at 4.1%, provide a quick check of fundamental\nunderstanding. This diverse mix of answer types ensures that LLMs are tested on various aspects of\nmathematical problem-solving, from basic validation to complex reasoning and solution generation,\nrequiring an understanding of the concepts.",
          "correct": "owR9ofvkFQ_6_201bab08",
          "incorrect": [
            "owR9ofvkFQ_4_image_figure1",
            "owR9ofvkFQ_7_table_table3",
            "owR9ofvkFQ_7_table_table4"
          ],
          "letters": ["B", "D", "A", "C"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"percentage\",\"claim\":{\"source\":\"expectation\",\"statement\":\"consistent\"},\"evidence\":{\"source\":\"text and figure_2\",\"statement\":\"inconsistent for Open-Answer and Multiple-Choice\"}}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"percentage for True-False\",\"claim\":{\"source\":\"expectation\",\"statement\":\"consistent\"},\"evidence\":{\"source\":\"text and figure_2\",\"statement\":\"inconsistent\"}}",
            "{\"letter\":\"A\",\"attribute\":\"total percentage\",\"claim\":{\"source\":\"expectation\",\"statement\":\"adds up to 100%\"},\"evidence\":{\"source\":\"text and figure_2\",\"statement\":\"does not add up to 100%\"}}",
            "{\"letter\":\"C\",\"attribute\":\"categories\",\"claim\":{\"source\":\"expectation\",\"statement\":\"consistent\"},\"evidence\":{\"source\":\"text and figure_2\",\"statement\":\"different categories\"}}"
          ],
          "letters": ["B", "D", "A", "C"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"percentage\",\"target\":\"text\",\"other_involved\":\"figure_2\",\"action\":\"modify\",\"edit_statement\":\"match values\",\"reason\":\"stated differently\"}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"percentage\",\"target\":\"text\",\"other_involved\":\"figure_2\",\"action\":\"modify\",\"edit_statement\":\"match value\",\"reason\":\"reported differently\"}",
            "{\"letter\":\"A\",\"attribute\":\"total percentage\",\"target\":\"text\",\"other_involved\":\"figure_2\",\"action\":\"modify\",\"edit_statement\":\"add up to 100%\",\"reason\":\"not adding up\"}",
            "{\"letter\":\"C\",\"attribute\":\"answer categories\",\"target\":\"figure_2\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"match categories\",\"reason\":\"different categories\"}"
          ],
          "letters": ["B", "D", "A", "C"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 2"]
    }
  ],
  "ow51wrwVtI": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 10,
          "image_id": "ow51wrwVtI_10_12bc79f1",
          "bbox": {
            "x": 0.19163424124513617,
            "y": 0.09473684210526315,
            "width": 0.6206225680933852,
            "height": 0.20601503759398496
          }
        }
      ],
      "review_text": "Figure 7: SAM-Free outperforms TFCounter on the BIKE-1000 dataset when the number of objects exceeds 15, contradicting the paper's claim of consistent superiority.",
      "category": "figure-text",
      "description": "The Figure 7 (Right) is supposed to show Log Scale y-axis, but it is linear.",
      "confidence": 2,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"y-axis scale\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be log scale\"},\"evidence\":{\"source\":\"right graph\",\"statement\":\"linear scale\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"y-axis scale\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be log scale\"},\"evidence\":{\"source\":\"left graph\",\"statement\":\"linearly spaced\"}}",
            "{\"letter\":\"D\",\"attribute\":\"x-axis labels\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"graphs\",\"statement\":\"inconsistent categories\"}}",
            "{\"letter\":\"C\",\"attribute\":\"horizontal line spacing\",\"claim\":{\"source\":\"caption\",\"statement\":\"log scale\"},\"evidence\":{\"source\":\"graphs\",\"statement\":\"differently spaced\"}}"
          ],
          "letters": ["B", "A", "D", "C"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"y-axis\",\"target\":\"figure_7_right\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"scale to log\",\"reason\":\"linear scale\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"y-axis\",\"target\":\"figure_7_left\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"make space ticks log scale\",\"reason\":\"data is in log scale\"}",
            "{\"letter\":\"D\",\"attribute\":\"x-axis labels\",\"target\":\"figure_7_right\",\"other_involved\":\"figure_7_left\",\"action\":\"modify\",\"edit_statement\":\"categorize labels the same\",\"reason\":\"inconsistent categorization\"}",
            "{\"letter\":\"C\",\"attribute\":\"horizontal lines\",\"target\":\"figure_7_right\",\"other_involved\":\"figure_7_left\",\"action\":\"modify\",\"edit_statement\":\"space lines the same\",\"reason\":\"different spacing\"}"
          ],
          "letters": ["B", "A", "D", "C"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 7"]
    }
  ],
  "oqRe1KvD17": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 8,
          "image_id": "oqRe1KvD17_8_f79cf62d",
          "bbox": {
            "x": 0.17217898832684825,
            "y": 0.11278195488721804,
            "width": 0.6692607003891051,
            "height": 0.24210526315789474
          }
        }
      ],
      "review_text": "Table 3: The baseline without RAG for the GPT-3.5 turbo is not reported, making it difficult to compare the results with other baseline models.",
      "category": "table-only",
      "description": "The gpt-3-5-turbo RAG does not have a non-RAG baseline to compare to",
      "confidence": 2,
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the table that is consistent with a different part of the table?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the table that is inconsistent with a different part of the table?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"model comparison\",\"claim\":{\"source\":\"caption\",\"statement\":\"listed in section\"},\"evidence\":{\"source\":\"Table 3\",\"statement\":\"no baseline\"}}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"model comparison\",\"claim\":{\"source\":\"caption\",\"statement\":\"RAG model shown\"},\"evidence\":{\"source\":\"Without Retrieval-Augmented Generation section\",\"statement\":\"no non-RAG entry\"}}",
            "{\"letter\":\"D\",\"attribute\":\"model comparison\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should have RAG version\"},\"evidence\":{\"source\":\"Without Retrieval-Augmented Generation section\",\"statement\":\"no RAG version\"}}",
            "{\"letter\":\"B\",\"attribute\":\"model comparison\",\"claim\":{\"source\":\"caption\",\"statement\":\"RAG version listed\"},\"evidence\":{\"source\":\"Table 3\",\"statement\":\"no original performance\"}}"
          ],
          "letters": ["A", "C", "D", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"model entry\",\"target\":\"table_3\",\"other_involved\":\"GPT-3.5-turbo-1106 RAG (ours)\",\"action\":\"add\",\"edit_statement\":\"add missing non-RAG baseline\",\"reason\":\"incomplete\"}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"model entry\",\"target\":\"table_3\",\"other_involved\":\"GPT-4-0613 RAG (ours)\",\"action\":\"add\",\"edit_statement\":\"add missing non-RAG entry\",\"reason\":\"incomplete\"}",
            "{\"letter\":\"D\",\"attribute\":\"RAG version\",\"target\":\"table_3\",\"other_involved\":\"Mixtral 8*7B\",\"action\":\"add\",\"edit_statement\":\"add missing RAG version\",\"reason\":\"incomplete\"}",
            "{\"letter\":\"B\",\"attribute\":\"performance\",\"target\":\"table_3\",\"other_involved\":\"GPT-3.5 (OpenAI, 2022)\",\"action\":\"add\",\"edit_statement\":\"add missing non-RAG performance\",\"reason\":\"incomplete\"}"
          ],
          "letters": ["A", "C", "D", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 3"]
    }
  ],
  "opSPgPIwAD": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 2,
          "image_id": "opSPgPIwAD_2_9ce76f35",
          "bbox": {
            "x": 0.16828793774319065,
            "y": 0.09924812030075188,
            "width": 0.6673151750972763,
            "height": 0.43909774436090226
          }
        }
      ],
      "review_text": "Figure 1: The authors use different starting points for FACE and their algorithm, making the comparison inconsistent and not comparable.",
      "category": "figure-only",
      "description": "The FACE uses a different starting point than the author's implementation, which does not allow for direct comparison",
      "confidence": 1,
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the figure that is consistent with a different part of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the figure that is inconsistent with a different part of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"starting point\",\"claim\":{\"source\":\"expectation\",\"statement\":\"same start\"},\"evidence\":{\"source\":\"figure_1\",\"statement\":\"different start\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"path focus\",\"claim\":{\"source\":\"expectation\",\"statement\":\"addresses failure\"},\"evidence\":{\"source\":\"figure_1\",\"statement\":\"addresses success\"}}",
            "{\"letter\":\"A\",\"attribute\":\"path construction\",\"claim\":{\"source\":\"expectation\",\"statement\":\"demonstrate failure\"},\"evidence\":{\"source\":\"figure_1\",\"statement\":\"only success\"}}",
            "{\"letter\":\"C\",\"attribute\":\"image difference\",\"claim\":{\"source\":\"expectation\",\"statement\":\"different paths\"},\"evidence\":{\"source\":\"figure_1\",\"statement\":\"same path\"}}"
          ],
          "letters": ["D", "B", "A", "C"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"recourse path starting point\",\"target\":\"figure_1b\",\"other_involved\":\"figure_1c\",\"action\":\"modify\",\"edit_statement\":\"match starting point\",\"reason\":\"different\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"path finding focus\",\"target\":\"figure_1c\",\"other_involved\":\"figure_1b\",\"action\":\"modify\",\"edit_statement\":\"address points initially without recourse\",\"reason\":\"different\"}",
            "{\"letter\":\"A\",\"attribute\":\"path-based algorithm results\",\"target\":\"figure_1c\",\"other_involved\":\"figure_1b\",\"action\":\"modify\",\"edit_statement\":\"show success, failure\",\"reason\":\"incomplete\"}",
            "{\"letter\":\"C\",\"attribute\":\"path illustration\",\"target\":\"figure_1b\",\"other_involved\":\"figure_1c\",\"action\":\"modify\",\"edit_statement\":\"match cropping, path\",\"reason\":\"different\"}"
          ],
          "letters": ["D", "B", "A", "C"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 1"]
    }
  ],
  "oW7T3p5wE1": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 4,
          "image_id": "oW7T3p5wE1_4_6b487805",
          "bbox": {
            "x": 0.3433852140077821,
            "y": 0.5157894736842105,
            "width": 0.490272373540856,
            "height": 0.07218045112781955
          }
        },
        {
          "type": "text",
          "page": 4,
          "content": "To better adapt to\nthe characteristics of Self-Attention, we also measure the distance between tokens using a method\nsimilar to dot product. Specifically, we calculate the cosine similarity between the cluster center\nand each token, and then sort the tokens according to the magnitude of the computed results. The\nspecific process is shown in Eq. 2.",
          "line": 186
        }
      ],
      "review_text": "L.188: The text states that the magnitude of the sim score is used, which contradicts Eq. 2 that shows the sim score itself is used. Which one is correct?",
      "category": "equation-text",
      "description": "The text says the magnitude of similarity score is used, but the equation shows the similarity score itself being used",
      "confidence": 1,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the equation consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the equation inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {
          "question": "To better adapt to\nthe characteristics of Self-Attention, we also measure the distance between tokens using a method\nsimilar to dot product. Specifically, we calculate the cosine similarity between the cluster center\nand each token, and then sort the tokens according to the magnitude of the computed results. The\nspecific process is shown in Eq. 2.",
          "correct": "oW7T3p5wE1_4_6b487805",
          "incorrect": [
            "oW7T3p5wE1_3_interline-equation_equation45",
            "oW7T3p5wE1_3_interline-equation_equation27.5",
            "oW7T3p5wE1_3_interline-equation_equation37.5"
          ],
          "letters": ["D", "C", "A", "B"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"sorting\",\"claim\":{\"source\":\"text\",\"statement\":\"magnitude\"},\"evidence\":{\"source\":\"equation_2\",\"statement\":\"raw values\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"similarity\",\"claim\":{\"source\":\"expectation\",\"statement\":\"cosine similarity\"},\"evidence\":{\"source\":\"equation_2\",\"statement\":\"not cosine similarity\"}}",
            "{\"letter\":\"A\",\"attribute\":\"metric\",\"claim\":{\"source\":\"text\",\"statement\":\"distance metric\"},\"evidence\":{\"source\":\"equation_2\",\"statement\":\"similarity\"}}",
            "{\"letter\":\"D\",\"attribute\":\"sorting\",\"claim\":{\"source\":\"expectation\",\"statement\":\"explicit sorting\"},\"evidence\":{\"source\":\"equation_2\",\"statement\":\"argsort\"}}"
          ],
          "letters": ["C", "B", "A", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"sorting mechanism\",\"target\":\"text\",\"other_involved\":\"equation_2\",\"action\":\"modify\",\"edit_statement\":\"magnitude of scores\",\"reason\":\"inconsistent\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"cosine similarity\",\"target\":\"text\",\"other_involved\":\"equation_2\",\"action\":\"modify\",\"edit_statement\":\"formula\",\"reason\":\"not standard\"}",
            "{\"letter\":\"A\",\"attribute\":\"sorting tokens\",\"target\":\"text\",\"other_involved\":\"equation_2\",\"action\":\"modify\",\"edit_statement\":\"distance metric\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"D\",\"attribute\":\"idx calculation\",\"target\":\"equation_2\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"explicit sorting\",\"reason\":\"implied direct\"}"
          ],
          "letters": ["C", "B", "A", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["(2)"]
    }
  ],
  "nxZbKWhUeZ": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 8,
          "image_id": "nxZbKWhUeZ_8_a7fb73c6",
          "bbox": {
            "x": 0.17412451361867703,
            "y": 0.10977443609022557,
            "width": 0.6595330739299611,
            "height": 0.15338345864661654
          }
        },
        {
          "type": "text",
          "page": 8,
          "content": "Table 1 reports the image segmentation results\non three widely-used common datasets: Cityscapes, Mapillary Vistas, and ADE20K. HoughPL\ndemonstrates significant performance gains over the baseline across these datasets. Specifically,\nHoughPL outperforms the state-of-the-art methods by 5.5 in mIoU for semantic segmentation, 5.5 in\nAP for instance segmentation, and 5.5 in PQ for panoptic segmentation on average",
          "line": 404
        }
      ],
      "review_text": "Table 1: The authors claim that HoughPL outperforms the SOTA methods with 5.5 in mIoU, 5.5 in AP and 5.5 in PQ on average. But table 1 does not support this claim.",
      "category": "table-text",
      "description": "The difference in mIoU, AP and PQ claimed in the text can't be found in the table",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the table consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the table inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "Table 1 reports the image segmentation results\non three widely-used common datasets: Cityscapes, Mapillary Vistas, and ADE20K. HoughPL\ndemonstrates significant performance gains over the baseline across these datasets. Specifically,\nHoughPL outperforms the state-of-the-art methods by 5.5 in mIoU for semantic segmentation, 5.5 in\nAP for instance segmentation, and 5.5 in PQ for panoptic segmentation on average",
          "correct": "nxZbKWhUeZ_8_a7fb73c6",
          "incorrect": [
            "nxZbKWhUeZ_8_table_table3",
            "nxZbKWhUeZ_8_table_table5",
            "nxZbKWhUeZ_8_table_table4"
          ],
          "letters": ["C", "A", "D", "B"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"gain\",\"claim\":{\"source\":\"text\",\"statement\":\"5.5 gain\"},\"evidence\":{\"source\":\"table_1\",\"statement\":\"smaller gain\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"gain\",\"claim\":{\"source\":\"text\",\"statement\":\"average gain\"},\"evidence\":{\"source\":\"table_1\",\"statement\":\"exact gain\"}}",
            "{\"letter\":\"D\",\"attribute\":\"datasets\",\"claim\":{\"source\":\"text\",\"statement\":\"three datasets\"},\"evidence\":{\"source\":\"table_1\",\"statement\":\"missing dataset\"}}",
            "{\"letter\":\"A\",\"attribute\":\"comparison\",\"claim\":{\"source\":\"text\",\"statement\":\"outperforming state-of-the-art\"},\"evidence\":{\"source\":\"table_1\",\"statement\":\"compared to baseline\"}}"
          ],
          "letters": ["C", "B", "D", "A"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"performance gain\",\"target\":\"table_1\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align values\",\"reason\":\"discrepancy\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"performance gain\",\"target\":\"table_1\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align variability\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"D\",\"attribute\":\"dataset results\",\"target\":\"table_1\",\"other_involved\":\"text\",\"action\":\"add\",\"edit_statement\":\"include Mapillary Vistas results\",\"reason\":\"Mapillary Vistas results missing\"}",
            "{\"letter\":\"A\",\"attribute\":\"comparison\",\"target\":\"table_1\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"include SSPrompt\",\"reason\":\"missing SOTA baseline\"}"
          ],
          "letters": ["C", "B", "D", "A"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 1"]
    }
  ],
  "nM2kuesKpC": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 9,
          "image_id": "nM2kuesKpC_9_50486f6c",
          "bbox": {
            "x": 0.17996108949416342,
            "y": 0.5263157894736843,
            "width": 0.6712062256809338,
            "height": 0.20601503759398496
          }
        }
      ],
      "review_text": "Figure 4: The colors used in this figure do not match the rest of the figures, making it difficult to compare results across visualizations.",
      "category": "figure-only",
      "description": "There are no blue and orange lines in the plot, but they can be found in the legend",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the figure that is consistent with a different part of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the figure that is inconsistent with a different part of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"lines\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be distinguishable\"},\"evidence\":{\"source\":\"Figure 4\",\"statement\":\"overlap\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"symbol\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should match\"},\"evidence\":{\"source\":\"Figure 4\",\"statement\":\"does not match\"}}",
            "{\"letter\":\"B\",\"attribute\":\"scale\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"Figure 4\",\"statement\":\"inconsistent\"}}",
            "{\"letter\":\"D\",\"attribute\":\"symbol\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should match\"},\"evidence\":{\"source\":\"Figure 4\",\"statement\":\"does not match\"}}"
          ],
          "letters": ["C", "A", "B", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"lines\",\"target\":\"plot\",\"other_involved\":null,\"action\":\"reposition\",\"edit_statement\":\"separate lines\",\"reason\":\"overlap\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"symbol\",\"target\":\"legend\",\"other_involved\":\"plot\",\"action\":\"modify\",\"edit_statement\":\"match symbol\",\"reason\":\"different\"}",
            "{\"letter\":\"B\",\"attribute\":\"x-axis labels\",\"target\":\"figure_4a\",\"other_involved\":\"figure_4b\",\"action\":\"modify\",\"edit_statement\":\"scale consistently\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"D\",\"attribute\":\"symbol\",\"target\":\"legend\",\"other_involved\":\"plot\",\"action\":\"modify\",\"edit_statement\":\"match symbol\",\"reason\":\"different\"}"
          ],
          "letters": ["C", "A", "B", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 4"]
    }
  ],
  "mnwlhvmKMN": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 3,
          "image_id": "mnwlhvmKMN_3_ef749439",
          "bbox": {
            "x": 0.17023346303501946,
            "y": 0.11278195488721804,
            "width": 0.6653696498054475,
            "height": 0.17142857142857143
          }
        }
      ],
      "review_text": "Figure 2: 'guided depth diffusion' and 'guided normal diffusion' are flipped, contradicting the labels provided.",
      "category": "figure-only",
      "description": "'guided depth diffusion' and 'guided normal diffusion' labels should be flipped",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the figure that is consistent with a different part of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the figure that is inconsistent with a different part of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"diffusion model labels\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should not be swapped\"},\"evidence\":{\"source\":\"figure_2\",\"statement\":\"are swapped\"}}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"video optical flow image\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should represent guidance\"},\"evidence\":{\"source\":\"figure_2\",\"statement\":\"does not represent\"}}",
            "{\"letter\":\"A\",\"attribute\":\"steps and result images\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should match\"},\"evidence\":{\"source\":\"figure_2\",\"statement\":\"do not match\"}}",
            "{\"letter\":\"B\",\"attribute\":\"optical flow map\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should match\"},\"evidence\":{\"source\":\"figure_2\",\"statement\":\"does not match\"}}"
          ],
          "letters": ["D", "C", "A", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"labels\",\"target\":\"Video Depth, Video Normal\",\"other_involved\":null,\"action\":\"replace\",\"edit_statement\":\"swap diffusion model\",\"reason\":\"swapped\"}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"image\",\"target\":\"Video Optical Flow\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"represent guidance type\",\"reason\":\"inadequate\"}",
            "{\"letter\":\"A\",\"attribute\":\"steps\",\"target\":\"figure_2\",\"other_involved\":\"Video Normal, Video Depth\",\"action\":\"modify\",\"edit_statement\":\"align result images\",\"reason\":\"not match\"}",
            "{\"letter\":\"B\",\"attribute\":\"optical flow map\",\"target\":\"figure_2\",\"other_involved\":\"Original Video Dataset\",\"action\":\"modify\",\"edit_statement\":\"align original video\",\"reason\":\"not match\"}"
          ],
          "letters": ["D", "C", "A", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 2"]
    }
  ],
  "miIE56qM10": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 5,
          "image_id": "miIE56qM10_5_b1c89f61",
          "bbox": {
            "x": 0.3570038910505836,
            "y": 0.13082706766917293,
            "width": 0.4785992217898833,
            "height": 0.05112781954887218
          }
        },
        {
          "type": "text",
          "page": 5,
          "content": "In under-confidence, we assume \nP\n(\ny\n\u2217\n\u2223\nx\n;\n\u03b8\n)\n=\np\n\u2217\nP(y \n\u2217\n \u2223x;\u03b8)=p \n\u2217\n  and \nP\n(\ny\ni\n\u2223\nx\n;\n\u03b8\n)\n=\np\ni\nP(y \ni\n\u200b\t\n \u2223x;\u03b8)=p \ni\n\u200b\t\n  for \ny\ni\n\u2260\ny\n\u2217\ny \ni\n\u200b\t\n \n\ue020\n=y \n\u2217\n , where \np\n\u2217\n+\n\u2211\ny\ni\n\u2260\ny\n\u2217\np\ni\n=\n1\np \n\u2217\n +\u2211 \ny \ni\n\u200b\t\n \n\ue020\n=y \n\u2217\n \n\u200b\t\n p \ni\n\u200b\t\n =1. By Jensen\u2019s inequality, we have:",
          "line": -1
        }
      ],
      "review_text": "Equation 14: Jensen's inequality applies \u2265, but the equation uses a stricter >. The reason for this stricter condition should be explained.",
      "category": "equation-text",
      "description": "Jensen's inequality should have a >= instead of >",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the equation consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the equation inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "In under-confidence, we assume \nP\n(\ny\n\u2217\n\u2223\nx\n;\n\u03b8\n)\n=\np\n\u2217\nP(y \n\u2217\n \u2223x;\u03b8)=p \n\u2217\n  and \nP\n(\ny\ni\n\u2223\nx\n;\n\u03b8\n)\n=\np\ni\nP(y \ni\n\u200b\t\n \u2223x;\u03b8)=p \ni\n\u200b\t\n  for \ny\ni\n\u2260\ny\n\u2217\ny \ni\n\u200b\t\n \n\ue020\n=y \n\u2217\n , where \np\n\u2217\n+\n\u2211\ny\ni\n\u2260\ny\n\u2217\np\ni\n=\n1\np \n\u2217\n +\u2211 \ny \ni\n\u200b\t\n \n\ue020\n=y \n\u2217\n \n\u200b\t\n p \ni\n\u200b\t\n =1. By Jensen\u2019s inequality, we have:",
          "correct": "miIE56qM10_5_b1c89f61",
          "incorrect": [
            "miIE56qM10_4_interline-equation_equation9",
            "miIE56qM10_4_interline-equation_equation32.5",
            "miIE56qM10_4_interline-equation_equation2.5"
          ],
          "letters": ["A", "C", "D", "B"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"inequality sign\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be non-strict\"},\"evidence\":{\"source\":\"equation_14\",\"statement\":\"is strict\"}}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"summation limits\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should include all y_i\"},\"evidence\":{\"source\":\"equation_14\",\"statement\":\"excludes some y_i\"}}",
            "{\"letter\":\"C\",\"attribute\":\"logarithm base\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be log_10\"},\"evidence\":{\"source\":\"equation_14\",\"statement\":\"uses ln\"}}",
            "{\"letter\":\"B\",\"attribute\":\"negative sign\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should not be there\"},\"evidence\":{\"source\":\"equation_14\",\"statement\":\"has negative sign\"}}"
          ],
          "letters": ["A", "D", "C", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"inequality sign\",\"target\":\"equation_14\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"change sign\",\"reason\":\"non-strict\"}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"summation limits\",\"target\":\"equation_14\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"adjust limits\",\"reason\":\"incorrect\"}",
            "{\"letter\":\"C\",\"attribute\":\"terms\",\"target\":\"equation_14\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"change base\",\"reason\":\"consistency\"}",
            "{\"letter\":\"B\",\"attribute\":\"negative sign\",\"target\":\"equation_14\",\"other_involved\":null,\"action\":\"remove\",\"edit_statement\":\"negative sign\",\"reason\":\"incorrect\"}"
          ],
          "letters": ["A", "D", "C", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["(14)"]
    }
  ],
  "mb2rHLcKN5": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 2,
          "image_id": "mb2rHLcKN5_2_61f00f27",
          "bbox": {
            "x": 0.17023346303501946,
            "y": 0.10075187969924813,
            "width": 0.6673151750972763,
            "height": 0.3969924812030075
          }
        },
        {
          "type": "image",
          "page": 14,
          "image_id": "mb2rHLcKN5_14_a7fa47b0",
          "bbox": {
            "x": 0.1877431906614786,
            "y": 0.30453633473331765,
            "width": 0.6284046692607004,
            "height": 0.48571428571428577
          }
        }
      ],
      "review_text": "Section 3.2 and Figure 1: The generation of subgoal-based proofs is conditioned on manually provided formal proofs, but Figure 6 seems to indicate that no formal proofs are included in the prompt. This discrepancy needs further clarification.",
      "category": "figure-figure",
      "description": "Figure 1 shows formal proofs as input to generator, figure 6 contradicts this",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the first figure consistent with the content of the second figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the first figure inconsistent with the content of the second figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "mb2rHLcKN5_2_61f00f27",
          "correct": "mb2rHLcKN5_14_a7fa47b0",
          "incorrect": [
            "mb2rHLcKN5_14_image_figure7",
            "mb2rHLcKN5_14_image_figure8",
            "mb2rHLcKN5_14_image_figure9"
          ],
          "letters": ["A", "C", "D", "B"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"input to generators\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"figure_1, figure_6\",\"statement\":\"inconsistent\"}}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"input for proof generation\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"figure_1, figure_6\",\"statement\":\"inconsistent\"}}",
            "{\"letter\":\"B\",\"attribute\":\"proof generation\",\"claim\":{\"source\":\"figure_1, figure_6\",\"statement\":\"exclusively from informal statements\"},\"evidence\":{\"source\":\"figure_1\",\"statement\":\"not exclusively from informal statements\"}}",
            "{\"letter\":\"A\",\"attribute\":\"processes\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"figure_1, figure_6\",\"statement\":\"different processes\"}}"
          ],
          "letters": ["C", "D", "B", "A"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"input\",\"target\":\"figure_1\",\"other_involved\":\"figure_6\",\"action\":\"modify\",\"edit_statement\":\"update input\",\"reason\":\"contradicts\"}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"input\",\"target\":\"figure_1\",\"other_involved\":\"figure_6\",\"action\":\"modify\",\"edit_statement\":\"update input sources\",\"reason\":\"includes formal\"}",
            "{\"letter\":\"B\",\"attribute\":\"format\",\"target\":\"figure_1\",\"other_involved\":\"figure_6\",\"action\":\"modify\",\"edit_statement\":\"update format\",\"reason\":\"differ\"}",
            "{\"letter\":\"A\",\"attribute\":\"process\",\"target\":\"figure_6\",\"other_involved\":\"figure_1\",\"action\":\"add\",\"edit_statement\":\"add process\",\"reason\":\"not shown\"}"
          ],
          "letters": ["C", "D", "B", "A"]
        }
      },
      "severity": 1,
      "visual_elements": ["Figure 1", "Figure 6"]
    }
  ],
  "mZvzvwIu8f": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 7,
          "image_id": "mZvzvwIu8f_7_cd2d732b",
          "bbox": {
            "x": 0.17217898832684825,
            "y": 0.0962406015037594,
            "width": 0.6595330739299611,
            "height": 0.2721804511278195
          }
        }
      ],
      "review_text": "Table 1: The best result of Inc 5 experiment on CIFAR100 B50 dataset is obtained by DSGD (63.58) instead of CREATE (63.53).",
      "category": "table-only",
      "description": "The best result in Last Inc 5 should be DSGD and not CREATE",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the table that is consistent with a different part of the table?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the table that is inconsistent with a different part of the table?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"best performance\",\"claim\":{\"source\":\"table\",\"statement\":\"CREATE is best\"},\"evidence\":{\"source\":\"table\",\"statement\":\"DSGD is higher\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"best performance\",\"claim\":{\"source\":\"table\",\"statement\":\"DSGD is best\"},\"evidence\":{\"source\":\"table\",\"statement\":\"CREATE is higher\"}}",
            "{\"letter\":\"C\",\"attribute\":\"best performance\",\"claim\":{\"source\":\"table\",\"statement\":\"CREATE is best\"},\"evidence\":{\"source\":\"table\",\"statement\":\"DSGD is higher\"}}",
            "{\"letter\":\"B\",\"attribute\":\"Gain (\u0394)\",\"claim\":{\"source\":\"expectation\",\"statement\":\"corresponds to difference\"},\"evidence\":{\"source\":\"table\",\"statement\":\"does not correspond\"}}"
          ],
          "letters": ["D", "A", "C", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"bolding\",\"target\":\"table_1\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"unbold CREATE\",\"reason\":\"incorrect\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"highlight\",\"target\":\"table_1\",\"other_involved\":\"table_1\",\"action\":\"modify\",\"edit_statement\":\"bold DSGD\",\"reason\":\"incorrect\"}",
            "{\"letter\":\"C\",\"attribute\":\"bolding\",\"target\":\"table_1\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"unbold CREATE\",\"reason\":\"incorrect\"}",
            "{\"letter\":\"B\",\"attribute\":\"gain calculation\",\"target\":\"table_1\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"update gain\",\"reason\":\"incorrect\"}"
          ],
          "letters": ["D", "A", "C", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 1"]
    }
  ],
  "mXh8LbXXpx": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 5,
          "image_id": "mXh8LbXXpx_5_3eb4df26",
          "bbox": {
            "x": 0.16828793774319065,
            "y": 0.09172932330827067,
            "width": 0.6634241245136187,
            "height": 0.2646616541353384
          }
        }
      ],
      "review_text": "Table 1 caption: The caption states that the first block indicates visual prompts and the second block text prompts, but the table structure is reversed.",
      "category": "table-caption",
      "description": "The caption states the first block shows vision-based methods and the second block text-based methods, but the table blocks are exactly the other way round",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the caption of the table consistent with the content of the table?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the caption of the table inconsistent with the content of the table?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"model blocks\",\"claim\":{\"source\":\"caption\",\"statement\":\"first block visual-prompted\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"first block text-prompted\"}}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"model blocks\",\"claim\":{\"source\":\"caption\",\"statement\":\"two blocks\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"one block\"}}",
            "{\"letter\":\"A\",\"attribute\":\"Average column\",\"claim\":{\"source\":\"expectation\",\"statement\":\"consistent with categories\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"inconsistent with categories\"}}",
            "{\"letter\":\"C\",\"attribute\":\"model categories\",\"claim\":{\"source\":\"expectation\",\"statement\":\"clearly labeled\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"not labeled\"}}"
          ],
          "letters": ["B", "D", "A", "C"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"prompted models\",\"target\":\"table_1\",\"other_involved\":\"caption\",\"action\":\"replace\",\"edit_statement\":\"model types\",\"reason\":\"mismatch\"}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"model blocks\",\"target\":\"table_1\",\"other_involved\":\"caption\",\"action\":\"modify\",\"edit_statement\":\"match blocks\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"A\",\"attribute\":\"Average column\",\"target\":\"table_1\",\"other_involved\":\"numerical values\",\"action\":\"modify\",\"edit_statement\":\"match values\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"C\",\"attribute\":\"model categories\",\"target\":\"table_1\",\"other_involved\":\"caption\",\"action\":\"add\",\"edit_statement\":\"labels\",\"reason\":\"unclear\"}"
          ],
          "letters": ["B", "D", "A", "C"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 1"]
    },
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 5,
          "image_id": "mXh8LbXXpx_5_802cbb3c",
          "bbox": {
            "x": 0.1663424124513619,
            "y": 0.09022556390977444,
            "width": 0.6750972762645914,
            "height": 0.2646616541353384
          }
        },
        {
          "type": "image",
          "page": 6,
          "image_id": "mXh8LbXXpx_6_036f5f6e",
          "bbox": {
            "x": 0.17023346303501946,
            "y": 0.09551377834234023,
            "width": 0.6673151750972763,
            "height": 0.1548872180451128
          }
        }
      ],
      "review_text": "Table 1 vs Table 2: SoftMatcher+ achieves a score of 41.6 in Table 1 but 41.8 in Table 2.",
      "category": "table-table",
      "description": "SoftMatcher+ performance does not match between table 1 and table 2",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the first table consistent with the content of the second table?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the first table inconsistent with the content of the second table?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "mXh8LbXXpx_5_802cbb3c",
          "correct": "mXh8LbXXpx_6_036f5f6e",
          "incorrect": [
            "mXh8LbXXpx_4_table_table1",
            "mXh8LbXXpx_8_table_table4",
            "mXh8LbXXpx_6_image_figure1"
          ],
          "letters": ["A", "C", "D", "B"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"SoftMatcher+ performance\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be equal\"},\"evidence\":{\"source\":\"Table 1 and Table 2\",\"statement\":\"values differ\"}}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"Supervised baseline\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be equal\"},\"evidence\":{\"source\":\"Table 1 and Table 2\",\"statement\":\"values differ\"}}",
            "{\"letter\":\"A\",\"attribute\":\"text-prompted models count\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be equal\"},\"evidence\":{\"source\":\"Table 1 and Table 2\",\"statement\":\"counts differ\"}}",
            "{\"letter\":\"D\",\"attribute\":\"LISA model performance\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"Table 1 and Table 2\",\"statement\":\"Average performance differs\"}}"
          ],
          "letters": ["B", "C", "A", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"performance values\",\"target\":\"table_2\",\"other_involved\":\"table_1\",\"action\":\"modify\",\"edit_statement\":\"update values\",\"reason\":\"values differ\"}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"Supervised baseline\",\"target\":\"table_1\",\"other_involved\":\"table_2\",\"action\":\"modify\",\"edit_statement\":\"update values\",\"reason\":\"values differ\"}",
            "{\"letter\":\"A\",\"attribute\":\"text-prompted models\",\"target\":\"table_2\",\"other_involved\":\"table_1\",\"action\":\"add\",\"edit_statement\":\"add models\",\"reason\":\"missing models\"}",
            "{\"letter\":\"D\",\"attribute\":\"Average performance\",\"target\":\"table_2\",\"other_involved\":\"table_1\",\"action\":\"modify\",\"edit_statement\":\"align performance\",\"reason\":\"different performance\"}"
          ],
          "letters": ["B", "C", "A", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 1", "Table 2"]
    }
  ],
  "krUajZ1gHg": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 6,
          "image_id": "krUajZ1gHg_6_e2c40cf4",
          "bbox": {
            "x": 0.17217898832684825,
            "y": 0.09172932330827067,
            "width": 0.6848249027237354,
            "height": 0.2330827067669173
          }
        },
        {
          "type": "text",
          "page": 5,
          "content": "Compared with the existing Wildfish++ dataset with both taxonomy and visual descriptions from the domain experts, MarineMaid is 10 times larger and contains a wide range of marine creatures while wildfish++ only focuses on fish.",
          "line": 256
        }
      ],
      "review_text": "Table 1: The review mentions that MarineMaid dataset provides better captions and taxonomy, but has less instances than some other datasets, which seems to contradict the statement that it provides 'better' captions and taxonomy.",
      "category": "figure-text",
      "description": "The claim that MarineMaid is 10 times larger than Wildfish++ seems to not hold true based on the table",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {
          "question": "Compared with the existing Wildfish++ dataset with both taxonomy and visual descriptions from the domain experts, MarineMaid is 10 times larger and contains a wide range of marine creatures while wildfish++ only focuses on fish.",
          "correct": "krUajZ1gHg_6_e2c40cf4",
          "incorrect": [
            "krUajZ1gHg_6_table_table2",
            "krUajZ1gHg_6_table_table3",
            "krUajZ1gHg_8_table_table4"
          ],
          "letters": ["B", "D", "C", "A"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"size comparison\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should match table data\"},\"evidence\":{\"source\":\"table_1\",\"statement\":\"does not confirm size comparison\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"annotations\",\"claim\":{\"source\":\"text\",\"statement\":\"has visual descriptions from domain experts\"},\"evidence\":{\"source\":\"table_1\",\"statement\":\"has little expert annotations\"}}",
            "{\"letter\":\"B\",\"attribute\":\"categories\",\"claim\":{\"source\":\"text\",\"statement\":\"focuses on fish\"},\"evidence\":{\"source\":\"table_1\",\"statement\":\"has more categories\"}}",
            "{\"letter\":\"C\",\"attribute\":\"taxonomy\",\"claim\":{\"source\":\"text\",\"statement\":\"has taxonomy\"},\"evidence\":{\"source\":\"table_1\",\"statement\":\"does not have taxonomy\"}}"
          ],
          "letters": ["D", "A", "B", "C"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"dataset size\",\"target\":\"Table 1\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"confirm dataset size\",\"reason\":\"contradiction\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"expert annotations\",\"target\":\"Table 1\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"match annotation quantity\",\"reason\":\"contradiction\"}",
            "{\"letter\":\"B\",\"attribute\":\"categories\",\"target\":\"Table 1\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"match listed categories\",\"reason\":\"contradiction\"}",
            "{\"letter\":\"C\",\"attribute\":\"taxonomy\",\"target\":\"Table 1\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"reflect taxonomy presence\",\"reason\":\"contradiction\"}"
          ],
          "letters": ["D", "A", "B", "C"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 1"]
    }
  ],
  "km2nHt2YoD": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 9,
          "image_id": "km2nHt2YoD_9_502d90b9",
          "bbox": {
            "x": 0.4309338521400778,
            "y": 0.5172932330827068,
            "width": 0.40077821011673154,
            "height": 0.12030075187969926
          }
        }
      ],
      "review_text": "Table 3: The discussion on generality states that using corresponding training data does not always yield the best performance on the corresponding testing data, which contradicts the expectation that training on specific data should improve performance on that same data.",
      "category": "table-only",
      "description": "Caption mentions 'when the training data is different from the testing data', but table also shows results for the same training and testing data",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the table that is consistent with a different part of the table?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the table that is inconsistent with a different part of the table?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"data condition\",\"claim\":{\"source\":\"caption\",\"statement\":\"different data\"},\"evidence\":{\"source\":\"Table 3\",\"statement\":\"same data\"}}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"node count\",\"claim\":{\"source\":\"caption\",\"statement\":\"100 nodes\"},\"evidence\":{\"source\":\"Table 3\",\"statement\":\"no node count\"}}",
            "{\"letter\":\"B\",\"attribute\":\"objective value\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be same\"},\"evidence\":{\"source\":\"Table 3\",\"statement\":\"different values\"}}",
            "{\"letter\":\"C\",\"attribute\":\"average objective value\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be one value\"},\"evidence\":{\"source\":\"Table 3\",\"statement\":\"multiple values\"}}"
          ],
          "letters": ["A", "D", "B", "C"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"training/testing data\",\"target\":\"table_3\",\"other_involved\":\"caption_table_3\",\"action\":\"modify\",\"edit_statement\":\"align description\",\"reason\":\"contradiction\"}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"node count\",\"target\":\"caption_table_3\",\"other_involved\":\"table_3\",\"action\":\"remove\",\"edit_statement\":\"remove mention\",\"reason\":\"not present\"}",
            "{\"letter\":\"B\",\"attribute\":\"objective value\",\"target\":\"table_3\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"align values\",\"reason\":\"asymmetry\"}",
            "{\"letter\":\"C\",\"attribute\":\"average vs individual values\",\"target\":\"table_3\",\"other_involved\":\"caption_table_3\",\"action\":\"modify\",\"edit_statement\":\"align description\",\"reason\":\"contradiction\"}"
          ],
          "letters": ["A", "D", "B", "C"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 3"]
    }
  ],
  "kIOAMYeOcv": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 2,
          "image_id": "kIOAMYeOcv_2_792843cd",
          "bbox": {
            "x": 0.1663424124513619,
            "y": 0.09774436090225565,
            "width": 0.6712062256809338,
            "height": 0.32481203007518794
          }
        },
        {
          "type": "text",
          "page": 10,
          "content": "Experiments indicate that our method achieves new state-of-the-art performance on five datasets, surpassing recently proposed VLTVG, LADS, LUNA, QRNet, VG-LAW and MMCA.",
          "line": 537
        }
      ],
      "review_text": "Figure 1: The analysis lacks results for VG-LAW, TransVG++, LUNA, LG-FPN, PVD, which are mentioned and compared in the paper. The figure claims that previous works lack discriminative visual features, but without results for these methods, the claim is not fully supported.",
      "category": "figure-text",
      "description": "Figure does not show SOTA models mentioned in the text",
      "confidence": 2,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {
          "question": "Experiments indicate that our method achieves new state-of-the-art performance on five datasets, surpassing recently proposed VLTVG, LADS, LUNA, QRNet, VG-LAW and MMCA.",
          "correct": "kIOAMYeOcv_2_792843cd",
          "incorrect": [
            "kIOAMYeOcv_3_image_figure2",
            "kIOAMYeOcv_7_image_figure3",
            "kIOAMYeOcv_14_image_figure4"
          ],
          "letters": ["A", "B", "D", "C"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"comparative analysis\",\"claim\":{\"source\":\"expectation\",\"statement\":\"includes all mentioned models\"},\"evidence\":{\"source\":\"Figure 1\",\"statement\":\"missing models\"}}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"models\",\"claim\":{\"source\":\"expectation\",\"statement\":\"only SOTA models\"},\"evidence\":{\"source\":\"Figure 1\",\"statement\":\"includes TransVG\"}}",
            "{\"letter\":\"C\",\"attribute\":\"datasets\",\"claim\":{\"source\":\"text\",\"statement\":\"five datasets\"},\"evidence\":{\"source\":\"Figure 1\",\"statement\":\"four columns\"}}",
            "{\"letter\":\"A\",\"attribute\":\"bounding boxes\",\"claim\":{\"source\":\"text\",\"statement\":\"state-of-the-art performance\"},\"evidence\":{\"source\":\"Figure 1\",\"statement\":\"outside ground-truth\"}}"
          ],
          "letters": ["B", "D", "C", "A"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"models\",\"target\":\"figure_1\",\"other_involved\":\"text\",\"action\":\"add\",\"edit_statement\":\"add models\",\"reason\":\"omission\"}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"TransVG\",\"target\":\"text\",\"other_involved\":\"figure_1\",\"action\":\"add\",\"edit_statement\":\"add model\",\"reason\":\"omission\"}",
            "{\"letter\":\"C\",\"attribute\":\"datasets\",\"target\":\"figure_1\",\"other_involved\":\"text\",\"action\":\"add\",\"edit_statement\":\"add dataset\",\"reason\":\"mismatch\"}",
            "{\"letter\":\"A\",\"attribute\":\"bounding boxes\",\"target\":\"figure_1\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align position\",\"reason\":\"contradiction\"}"
          ],
          "letters": ["B", "D", "C", "A"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 1"]
    }
  ],
  "kA5egaJjya": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 1,
          "image_id": "kA5egaJjya_1_a0c17c2f",
          "bbox": {
            "x": 0.17412451361867703,
            "y": 0.4406015037593985,
            "width": 0.6634241245136187,
            "height": 0.22706766917293233
          }
        }
      ],
      "review_text": "Figure 1(b): The windows indicated in Figure 1(a) are missing.",
      "category": "figure-only",
      "description": "The windows indicated in the 2D plan are not included in the 3D model",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the figure that is consistent with a different part of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the figure that is inconsistent with a different part of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"windows\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should match\"},\"evidence\":{\"source\":\"figure_1\",\"statement\":\"absent\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"front door position\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should match\"},\"evidence\":{\"source\":\"figure_1\",\"statement\":\"does not match\"}}",
            "{\"letter\":\"D\",\"attribute\":\"interior furniture\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"figure_1\",\"statement\":\"inconsistent\"}}",
            "{\"letter\":\"A\",\"attribute\":\"room arrangement\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should match\"},\"evidence\":{\"source\":\"figure_1\",\"statement\":\"does not match\"}}"
          ],
          "letters": ["C", "B", "D", "A"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"windows\",\"target\":\"figure_1b\",\"other_involved\":\"figure_1a\",\"action\":\"add\",\"edit_statement\":\"include windows\",\"reason\":\"missing\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"front door position\",\"target\":\"figure_1b\",\"other_involved\":\"figure_1a\",\"action\":\"modify\",\"edit_statement\":\"align door position\",\"reason\":\"different\"}",
            "{\"letter\":\"D\",\"attribute\":\"furniture\",\"target\":\"figure_1a\",\"other_involved\":\"figure_1b\",\"action\":\"add\",\"edit_statement\":\"include furniture\",\"reason\":\"missing\"}",
            "{\"letter\":\"A\",\"attribute\":\"room arrangement\",\"target\":\"figure_1b\",\"other_involved\":\"figure_1a\",\"action\":\"modify\",\"edit_statement\":\"match room arrangement\",\"reason\":\"different\"}"
          ],
          "letters": ["C", "B", "D", "A"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 1"]
    },
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 3,
          "image_id": "kA5egaJjya_3_1a28ec99",
          "bbox": {
            "x": 0.26556420233463035,
            "y": 0.5954887218045113,
            "width": 0.5778210116731518,
            "height": 0.04360902255639098
          }
        },
        {
          "type": "text",
          "page": 3,
          "content": "The output guides the spatial distribution within the floor plan as shown in (1), where Nrooms is the suggested number of bedrooms and restrooms that could fit in this floor area,\nBoundary is the the floor plan boundary mask, Front Door Position is the designated entry point, and\nArea is the encoded area of the floor plan boundary.",
          "line": 139
        }
      ],
      "review_text": "L142: Eq(1) should have N_rooms instead of N_counts.",
      "category": "equation-text",
      "description": "The N_counts should be N_rooms",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the equation consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the equation inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "The output guides the spatial distribution within the floor plan as shown in (1), where Nrooms is the suggested number of bedrooms and restrooms that could fit in this floor area,\nBoundary is the the floor plan boundary mask, Front Door Position is the designated entry point, and\nArea is the encoded area of the floor plan boundary.",
          "correct": "kA5egaJjya_3_1a28ec99",
          "incorrect": [
            "kA5egaJjya_4_interline-equation_equation21.5",
            "kA5egaJjya_4_interline-equation_equation18.5",
            "kA5egaJjya_4_image_figure2"
          ],
          "letters": ["C", "B", "A", "D"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"N_counts\",\"claim\":{\"source\":\"text\",\"statement\":\"Nrooms\"},\"evidence\":{\"source\":\"equation_1\",\"statement\":\"N_counts\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"RoomCounter parameters\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be fully described\"},\"evidence\":{\"source\":\"text\",\"statement\":\"not fully described\"}}",
            "{\"letter\":\"A\",\"attribute\":\"Nrooms\",\"claim\":{\"source\":\"text\",\"statement\":\"spatial distribution\"},\"evidence\":{\"source\":\"equation_1\",\"statement\":\"numerical count\"}}",
            "{\"letter\":\"D\",\"attribute\":\"Equation 1\",\"claim\":{\"source\":\"text\",\"statement\":\"multiple outputs\"},\"evidence\":{\"source\":\"equation_1\",\"statement\":\"single formula\"}}"
          ],
          "letters": ["C", "B", "A", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"variable name\",\"target\":\"text\",\"other_involved\":\"equation_1\",\"action\":\"modify\",\"edit_statement\":\"align variable name\",\"reason\":\"naming discrepancy\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"parameters\",\"target\":\"text\",\"other_involved\":\"RoomCounter\",\"action\":\"add\",\"edit_statement\":\"describe parameters\",\"reason\":\"missing description\"}",
            "{\"letter\":\"A\",\"attribute\":\"Nrooms\",\"target\":\"text\",\"other_involved\":\"equation_1\",\"action\":\"modify\",\"edit_statement\":\"align definition with equation\",\"reason\":\"contradictory\"}",
            "{\"letter\":\"D\",\"attribute\":\"outputs\",\"target\":\"text\",\"other_involved\":\"equation_1\",\"action\":\"modify\",\"edit_statement\":\"align output type\",\"reason\":\"multiple outputs\"}"
          ],
          "letters": ["C", "B", "A", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["(1)"]
    },
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 3,
          "image_id": "kA5egaJjya_3_6a958b3e",
          "bbox": {
            "x": 0.20914396887159534,
            "y": 0.8977443609022556,
            "width": 0.6342412451361867,
            "height": 0.04360902255639098
          }
        },
        {
          "type": "text",
          "page": 3,
          "content": "The use of a shared encoder ensures that the feature extraction process is uniform and only\nneeds to be executed once, thus speeding up the prediction process as shown in (2), where Frecurrent represents compressed features from previous predictions, encapsulating essential\nspatial and structural information, and \u03b8shared are the parameters of the ResNet101 model.",
          "line": 158
        }
      ],
      "review_text": "L161: Eq(2) should have F_layout instead of F_shared.",
      "category": "equation-text",
      "description": "The equation should show F_recurrent, not F_shared",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the equation consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the equation inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "The use of a shared encoder ensures that the feature extraction process is uniform and only\nneeds to be executed once, thus speeding up the prediction process as shown in (2), where Frecurrent represents compressed features from previous predictions, encapsulating essential\nspatial and structural information, and \u03b8shared are the parameters of the ResNet101 model.",
          "correct": "kA5egaJjya_3_6a958b3e",
          "incorrect": [
            "kA5egaJjya_4_interline-equation_equation21.5",
            "kA5egaJjya_4_interline-equation_equation18.5",
            "kA5egaJjya_4_image_figure2"
          ],
          "letters": ["A", "C", "B", "D"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"output\",\"claim\":{\"source\":\"expectation\",\"statement\":\"F_recurrent\"},\"evidence\":{\"source\":\"equation_2\",\"statement\":\"F_shared\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"model\",\"claim\":{\"source\":\"text\",\"statement\":\"ResNet101\"},\"evidence\":{\"source\":\"equation_2\",\"statement\":\"no ResNet101\"}}",
            "{\"letter\":\"A\",\"attribute\":\"encoder\",\"claim\":{\"source\":\"text\",\"statement\":\"shared encoder\"},\"evidence\":{\"source\":\"equation_2\",\"statement\":\"decoder formula\"}}",
            "{\"letter\":\"C\",\"attribute\":\"F_recurrent\",\"claim\":{\"source\":\"text\",\"statement\":\"compressed features\"},\"evidence\":{\"source\":\"equation_2\",\"statement\":\"input to LayoutEncoder\"}}"
          ],
          "letters": ["D", "B", "A", "C"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"F_shared\",\"target\":\"equation_2\",\"other_involved\":\"explanation\",\"action\":\"replace\",\"edit_statement\":\"F_recurrent\",\"reason\":\"mismatch\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"parameters (\\nG_shared)\",\"target\":\"equation_2\",\"other_involved\":\"text\",\"action\":\"add\",\"edit_statement\":\"ResNet101 mention\",\"reason\":\"missing\"}",
            "{\"letter\":\"A\",\"attribute\":\"encoder\",\"target\":\"equation_2\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"match formula\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"C\",\"attribute\":\"F_recurrent\",\"target\":\"equation_2\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align input use\",\"reason\":\"contradictory\"}"
          ],
          "letters": ["D", "B", "A", "C"]
        }
      },
      "severity": 0,
      "visual_elements": ["(2)"]
    }
  ],
  "k9KKFhwNwg": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 8,
          "image_id": "k9KKFhwNwg_8_f827dd06",
          "bbox": {
            "x": 0.18968871595330739,
            "y": 0.21503759398496242,
            "width": 0.6147859922178989,
            "height": 0.1804511278195489
          }
        },
        {
          "type": "text",
          "page": 8,
          "content": "The performance of EGLNN is similar to that of EGLNN-T (a module using only the teacher\nGNN), which indicates that the knowledge distillation algorithm proposed in this paper is able to\nrealize the knowledge migration from the teacher model to the student model in a more effective and\ncomprehensive way. This advantage enables the EGLNN to achieve higher accuracy at a smaller\nscale, which makes it perfect for anomaly detection tasks in the field of industrial IoT",
          "line": 405
        }
      ],
      "review_text": "Figure 2: It is obvious that EGLNN-T achieved a net better performance than EGLNN, contradicting the claim in section 1 that EGLNN-T's performance is not better than EGLNN.",
      "category": "figure-text",
      "description": "The performance of EGLNN-T is much better compared to EGLNN according to the figure, yet the text claims similar performance",
      "confidence": 2,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "The performance of EGLNN is similar to that of EGLNN-T (a module using only the teacher\nGNN), which indicates that the knowledge distillation algorithm proposed in this paper is able to\nrealize the knowledge migration from the teacher model to the student model in a more effective and\ncomprehensive way. This advantage enables the EGLNN to achieve higher accuracy at a smaller\nscale, which makes it perfect for anomaly detection tasks in the field of industrial IoT",
          "correct": "k9KKFhwNwg_8_f827dd06",
          "incorrect": [
            "k9KKFhwNwg_8_image_figure4",
            "k9KKFhwNwg_8_image_figure3",
            "k9KKFhwNwg_9_image_figure5"
          ],
          "letters": ["D", "A", "B", "C"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"EGLNN-T performance\",\"claim\":{\"source\":\"text\",\"statement\":\"lower than EGLNN\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"higher than EGLNN\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"EGLNN performance\",\"claim\":{\"source\":\"expectation\",\"statement\":\"similar to EGLNN-T\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"outperforms EGLNN-T\"}}",
            "{\"letter\":\"A\",\"attribute\":\"Recall plot\",\"claim\":{\"source\":\"expectation\",\"statement\":\"different from F1-Score\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"same as F1-Score plot\"}}",
            "{\"letter\":\"D\",\"attribute\":\"Accuracy plot\",\"claim\":{\"source\":\"expectation\",\"statement\":\"different from F1-Score\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"same as F1-Score plot\"}}"
          ],
          "letters": ["C", "B", "A", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"performance\",\"target\":\"text\",\"other_involved\":\"figure_2\",\"action\":\"modify\",\"edit_statement\":\"update EGLNN\",\"reason\":\"contradicts\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"performance\",\"target\":\"text\",\"other_involved\":\"figure_2\",\"action\":\"modify\",\"edit_statement\":\"update EGLNN\",\"reason\":\"contradicts\"}",
            "{\"letter\":\"A\",\"attribute\":\"plot\",\"target\":\"figure_2\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"explain Recall\",\"reason\":\"same\"}",
            "{\"letter\":\"D\",\"attribute\":\"plot\",\"target\":\"figure_2\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"explain Accuracy\",\"reason\":\"same\"}"
          ],
          "letters": ["C", "B", "A", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 2"]
    }
  ],
  "k1mMxqalb0": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 4,
          "image_id": "k1mMxqalb0_4_26fd4db7",
          "bbox": {
            "x": 0.16439688715953307,
            "y": 0.09924812030075188,
            "width": 0.6809338521400778,
            "height": 0.33984962406015035
          }
        }
      ],
      "review_text": "Figure 2: The main architecture figure 2 is misleading. $z_i$ is not a weight of the model, but a hidden state. The decoder blocks are not frozen since the W_out is updated.",
      "category": "figure-only",
      "description": "The figure shows trainable weights for z_i, but z_i is a latent space, not a weight vector",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the figure that is consistent with a different part of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the figure that is inconsistent with a different part of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"z_i symbol\",\"claim\":{\"source\":\"expectation\",\"statement\":\"not weight vector\"},\"evidence\":{\"source\":\"figure_2a\",\"statement\":\"optimizing weights symbol\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"z_i label\",\"claim\":{\"source\":\"expectation\",\"statement\":\"not undergoing optimization\"},\"evidence\":{\"source\":\"figure_2c\",\"statement\":\"undergoing optimization\"}}",
            "{\"letter\":\"C\",\"attribute\":\"z_i update\",\"claim\":{\"source\":\"figure_2a\",\"statement\":\"fixed output\"},\"evidence\":{\"source\":\"figure_2c\",\"statement\":\"being updated\"}}",
            "{\"letter\":\"A\",\"attribute\":\"hidden state\",\"claim\":{\"source\":\"legend\",\"statement\":\"last subject token\"},\"evidence\":{\"source\":\"figure_2a\",\"statement\":\"not last hidden state\"}}"
          ],
          "letters": ["D", "B", "C", "A"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"z_i symbol\",\"target\":\"figure_2a\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"change z_i symbol\",\"reason\":\"wrong symbol\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"z_i label\",\"target\":\"legend\",\"other_involved\":\"figure_2\",\"action\":\"modify\",\"edit_statement\":\"clarify z_i\",\"reason\":\"ambiguous\"}",
            "{\"letter\":\"C\",\"attribute\":\"z_i update\",\"target\":\"figure_2c\",\"other_involved\":\"figure_2a\",\"action\":\"modify\",\"edit_statement\":\"align z_i status\",\"reason\":\"implied fixed\"}",
            "{\"letter\":\"A\",\"attribute\":\"purple hidden state\",\"target\":\"figure_2a\",\"other_involved\":\"legend\",\"action\":\"modify\",\"edit_statement\":\"update hidden state position\",\"reason\":\"inconsistent\"}"
          ],
          "letters": ["D", "B", "C", "A"]
        }
      },
      "severity": 1,
      "visual_elements": ["Figure 2"]
    }
  ],
  "jwGPmIqE99": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 31,
          "image_id": "jwGPmIqE99_31_48cdff80",
          "bbox": {
            "x": 0.17023346303501946,
            "y": 0.1804511278195489,
            "width": 0.688715953307393,
            "height": 0.08571428571428572
          }
        },
        {
          "type": "image",
          "page": 31,
          "image_id": "jwGPmIqE99_31_da829557",
          "bbox": {
            "x": 0.1780155642023346,
            "y": 0.37370926706414476,
            "width": 0.6517509727626459,
            "height": 0.09924812030075188
          }
        }
      ],
      "review_text": "Table 8 and 9: The rows and columns are rotated, making the data difficult to understand and compare.",
      "category": "table-table",
      "description": "The tables' rows and columns are inverted, making the two tables hard to compare",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the first table consistent with the content of the second table?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the first table inconsistent with the content of the second table?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "jwGPmIqE99_31_48cdff80",
          "correct": "jwGPmIqE99_31_da829557",
          "incorrect": [
            "jwGPmIqE99_31_table_table10",
            "jwGPmIqE99_31_table_table11",
            "jwGPmIqE99_30_table_table7"
          ],
          "letters": ["C", "D", "B", "A"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"table structure\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"Table 7 and Table 8\",\"statement\":\"inverted\"}}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"table focus\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be comparable\"},\"evidence\":{\"source\":\"Table 7 and Table 8\",\"statement\":\"not comparable\"}}",
            "{\"letter\":\"A\",\"attribute\":\"category labels\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be identical\"},\"evidence\":{\"source\":\"Table 7 and Table 8\",\"statement\":\"differ\"}}",
            "{\"letter\":\"B\",\"attribute\":\"data presence\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should match\"},\"evidence\":{\"source\":\"Table 7 and Table 8\",\"statement\":\"do not match\"}}"
          ],
          "letters": ["D", "C", "A", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"roles\",\"target\":\"table_7\",\"other_involved\":\"table_8\",\"action\":\"reposition\",\"edit_statement\":\"invert rows columns\",\"reason\":\"inverted\"}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"focus\",\"target\":\"table_7\",\"other_involved\":\"table_8\",\"action\":\"modify\",\"edit_statement\":\"align focus\",\"reason\":\"different\"}",
            "{\"letter\":\"A\",\"attribute\":\"outcome categories\",\"target\":\"table_7\",\"other_involved\":\"table_8\",\"action\":\"modify\",\"edit_statement\":\"align categories\",\"reason\":\"different\"}",
            "{\"letter\":\"B\",\"attribute\":\"data points\",\"target\":\"table_7\",\"other_involved\":\"table_8\",\"action\":\"add\",\"edit_statement\":\"add matchup\",\"reason\":\"missing\"}"
          ],
          "letters": ["D", "C", "A", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 7", "Table 8"]
    }
  ],
  "jvmMqD57ZR": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 8,
          "image_id": "jvmMqD57ZR_8_65eddef1",
          "bbox": {
            "x": 0.16685877203597804,
            "y": 0.09602821537009326,
            "width": 0.6801152737752162,
            "height": 0.29175946547884185
          }
        }
      ],
      "review_text": "Figure 3: The label should be 'DRAG' instead of 'DRGA'.",
      "category": "figure-only",
      "description": "In the top right subplot, the method should be \"DRAG\" instead of \"DRGA\"",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the figure that is consistent with a different part of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the figure that is inconsistent with a different part of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"label\",\"claim\":{\"source\":\"expectation\",\"statement\":\"DRGA is DRAG\"},\"evidence\":{\"source\":\"figure_3\",\"statement\":\"DRGA is label\"}}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"label\",\"claim\":{\"source\":\"expectation\",\"statement\":\"DRAG is DRGA\"},\"evidence\":{\"source\":\"figure_3\",\"statement\":\"DRAG is label\"}}",
            "{\"letter\":\"A\",\"attribute\":\"label\",\"claim\":{\"source\":\"expectation\",\"statement\":\"DRAG++ is error\"},\"evidence\":{\"source\":\"figure_3\",\"statement\":\"DRAG++ is label\"}}",
            "{\"letter\":\"D\",\"attribute\":\"label\",\"claim\":{\"source\":\"expectation\",\"statement\":\"DRAG is DRGA\"},\"evidence\":{\"source\":\"figure_3\",\"statement\":\"DRAG is label\"}}"
          ],
          "letters": ["B", "C", "A", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"label\",\"target\":\"figure_3c\",\"other_involved\":\"DRAG\",\"action\":\"modify\",\"edit_statement\":\"change 'DRGA' to 'DRAG'\",\"reason\":\"typo\"}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"label\",\"target\":\"figure_3d\",\"other_involved\":\"DRGA\",\"action\":\"modify\",\"edit_statement\":\"change 'DRAG' to 'DRGA'\",\"reason\":\"typo\"}",
            "{\"letter\":\"A\",\"attribute\":\"label\",\"target\":\"figure_3c\",\"other_involved\":\"DRAG\",\"action\":\"modify\",\"edit_statement\":\"change 'DRAG++' to 'DRAG'\",\"reason\":\"typo\"}",
            "{\"letter\":\"D\",\"attribute\":\"label\",\"target\":\"figure_3c\",\"other_involved\":\"DRGA\",\"action\":\"modify\",\"edit_statement\":\"change 'DRAG' to 'DRGA'\",\"reason\":\"typo\"}"
          ],
          "letters": ["B", "C", "A", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 3"]
    }
  ],
  "jR6YMxVG9i": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 7,
          "image_id": "jR6YMxVG9i_7_0cf69ce5",
          "bbox": {
            "x": 0.1639769276555746,
            "y": 0.09725320047153396,
            "width": 0.6772334293948128,
            "height": 0.3452115812917595
          }
        }
      ],
      "review_text": "Figure 2: The x-axis increments of 1/2 do not match the actual step size of 1.",
      "category": "figure-only",
      "description": "The x-axis has 0.5 as step size, which does not make sense for integer values",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the figure that is consistent with a different part of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the figure that is inconsistent with a different part of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"x-axis tick marks\",\"claim\":{\"source\":\"expectation\",\"statement\":\"integer\"},\"evidence\":{\"source\":\"figure_2\",\"statement\":\"includes non-integer\"}}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"x-axis range\",\"claim\":{\"source\":\"expectation\",\"statement\":\"wider\"},\"evidence\":{\"source\":\"figure_2\",\"statement\":\"is too limited\"}}",
            "{\"letter\":\"B\",\"attribute\":\"y-axis range\",\"claim\":{\"source\":\"expectation\",\"statement\":\"narrower\"},\"evidence\":{\"source\":\"figure_2\",\"statement\":\"is too wide\"}}",
            "{\"letter\":\"D\",\"attribute\":\"legend\",\"claim\":{\"source\":\"expectation\",\"statement\":\"show dotted lines\"},\"evidence\":{\"source\":\"figure_2\",\"statement\":\"does not show dotted lines\"}}"
          ],
          "letters": ["A", "C", "B", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"x-axis\",\"target\":\"figure_2\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"change tick marks\",\"reason\":\"discrete variable\"}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"x-axis\",\"target\":\"figure_2\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"extend range\",\"reason\":\"limited range\"}",
            "{\"letter\":\"B\",\"attribute\":\"y-axis\",\"target\":\"figure_2\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"narrow range\",\"reason\":\"wide range\"}",
            "{\"letter\":\"D\",\"attribute\":\"legend\",\"target\":\"figure_2\",\"other_involved\":\"Top-K+AR, DP+AR\",\"action\":\"modify\",\"edit_statement\":\"correct line styles\",\"reason\":\"incorrect representation\"}"
          ],
          "letters": ["A", "C", "B", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 2"]
    }
  ],
  "jJvJqgPZCD": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 8,
          "image_id": "jJvJqgPZCD_8_8f76f5e5",
          "bbox": {
            "x": 0.42910661065269273,
            "y": 0.2857831857252227,
            "width": 0.40634005763688763,
            "height": 0.2182628062360802
          }
        }
      ],
      "review_text": "Fig 3a: The caption states 42% accuracy, but the heatmap shows 69%.",
      "category": "figure-caption",
      "description": "The caption says 42%, but the heatmap does not contain this number",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the caption of the figure consistent with the content of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the caption of the figure inconsistent with the content of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"percentage\",\"claim\":{\"source\":\"text\",\"statement\":\"42%\"},\"evidence\":{\"source\":\"heatmap\",\"statement\":\"contradictory value\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"percentage\",\"claim\":{\"source\":\"caption\",\"statement\":\"72%\"},\"evidence\":{\"source\":\"heatmap\",\"statement\":\"inconsistent percentage\"}}",
            "{\"letter\":\"B\",\"attribute\":\"total percentage\",\"claim\":{\"source\":\"expectation\",\"statement\":\"sums to 100%\"},\"evidence\":{\"source\":\"heatmap\",\"statement\":\"does not sum to 100%\"}}",
            "{\"letter\":\"D\",\"attribute\":\"percentage\",\"claim\":{\"source\":\"caption\",\"statement\":\"42%\"},\"evidence\":{\"source\":\"heatmap\",\"statement\":\"0.18\"}}"
          ],
          "letters": ["C", "A", "B", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"percentage\",\"target\":\"figure_1a\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"percentage value\",\"reason\":\"contradiction\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"percentage\",\"target\":\"figure_1b\",\"other_involved\":\"caption\",\"action\":\"modify\",\"edit_statement\":\"percentage value\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"B\",\"attribute\":\"percentages\",\"target\":\"figure_1a\",\"other_involved\":\"heatmap\",\"action\":\"modify\",\"edit_statement\":\"sum to 100%\",\"reason\":\"incorrect sum\"}",
            "{\"letter\":\"D\",\"attribute\":\"percentage\",\"target\":\"figure_1a\",\"other_involved\":\"caption\",\"action\":\"modify\",\"edit_statement\":\"display 42%\",\"reason\":\"contradiction\"}"
          ],
          "letters": ["C", "A", "B", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 1"]
    }
  ],
  "iwVkB9zaVb": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 7,
          "image_id": "iwVkB9zaVb_7_5913e6fe",
          "bbox": {
            "x": 0.17262246079678495,
            "y": 0.6773199748355722,
            "width": 0.6685878962536023,
            "height": 0.2383073496659243
          }
        },
        {
          "type": "text",
          "page": 1,
          "content": " Notably, R-CoT-8B significantly outperforms previous state-of-the-art open-source mathematical models by 16.6% on MathVista and 9.2% on GeoQA, while also surpassing the closed-source model GPT-4o by an average of 13% across both datasets",
          "line": 24
        }
      ],
      "review_text": "Table 1: The statement in the abstract claims a significant improvement of 16.6% and 9.2% over previous state-of-the-art models on MathVista and GeoQA respectively, but the table shows marginal improvements under comparable settings.",
      "category": "figure-text",
      "description": "The table does not confirm the performance advantage claimed in the abstract",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {
          "question": " Notably, R-CoT-8B significantly outperforms previous state-of-the-art open-source mathematical models by 16.6% on MathVista and 9.2% on GeoQA, while also surpassing the closed-source model GPT-4o by an average of 13% across both datasets",
          "correct": "iwVkB9zaVb_7_5913e6fe",
          "incorrect": [
            "iwVkB9zaVb_8_table_table2",
            "iwVkB9zaVb_8_table_table3",
            "iwVkB9zaVb_9_table_table4"
          ],
          "letters": ["B", "A", "C", "D"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"performance gain\",\"claim\":{\"source\":\"text\",\"statement\":\"large improvements\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"different gains\"}}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"parameter count\",\"claim\":{\"source\":\"expectation\",\"statement\":\"no additional parameters\"},\"evidence\":{\"source\":\"caption\",\"statement\":\"varies\"}}",
            "{\"letter\":\"B\",\"attribute\":\"performance gain\",\"claim\":{\"source\":\"expectation\",\"statement\":\"significant gain\"},\"evidence\":{\"source\":\"text\",\"statement\":\"0.8% gain\"}}",
            "{\"letter\":\"A\",\"attribute\":\"model comparison\",\"claim\":{\"source\":\"text\",\"statement\":\"single model\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"multiple models\"}}"
          ],
          "letters": ["C", "D", "B", "A"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"performance gains\",\"target\":\"text\",\"other_involved\":\"table_1\",\"action\":\"modify\",\"edit_statement\":\"align with table\",\"reason\":\"different numbers\"}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"parameter count\",\"target\":\"caption\",\"other_involved\":\"R-CoT\",\"action\":\"modify\",\"edit_statement\":\"clarify\",\"reason\":\"varied\"}",
            "{\"letter\":\"B\",\"attribute\":\"performance gains\",\"target\":\"text\",\"other_involved\":\"R-CoT-2B\",\"action\":\"modify\",\"edit_statement\":\"adjust claim\",\"reason\":\"inaccurate\"}",
            "{\"letter\":\"A\",\"attribute\":\"R-CoT-8B\",\"target\":\"text\",\"other_involved\":\"table_1\",\"action\":\"modify\",\"edit_statement\":\"describe R-CoT-8B\",\"reason\":\"multiple R-CoT models in table\"}"
          ],
          "letters": ["C", "D", "B", "A"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 1"]
    }
  ],
  "ivXe7J6U0k": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 4,
          "image_id": "ivXe7J6U0k_4_e5ded7ba",
          "bbox": {
            "x": 0.1697406164163815,
            "y": 0.3765404240325724,
            "width": 0.6628242074927955,
            "height": 0.23385300668151449
          }
        },
        {
          "type": "text",
          "page": 4,
          "content": "The evaluation results are shown in Figure 1a. Consistency using neighbors generated with weak\naugmentation significantly reduces calibration error compared to the baseline. As we increase the\nperturbation strength with moderate augmentation, as shown by the x-axis values, the calibration\nerror continues to decrease with minimal impact on accuracy, outperforming the commonly used\ncalibration method, Temperature Scaling, up to a certain perturbation threshold",
          "line": 169
        }
      ],
      "review_text": "Figure 1(a): The legend is missing 'Moderate Augmentation', which contradicts the information in the text.",
      "category": "figure-text",
      "description": "Figure 1a is missing an indication of what is weak augmentation and what is moderate augmentation",
      "confidence": 1,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "The evaluation results are shown in Figure 1a. Consistency using neighbors generated with weak\naugmentation significantly reduces calibration error compared to the baseline. As we increase the\nperturbation strength with moderate augmentation, as shown by the x-axis values, the calibration\nerror continues to decrease with minimal impact on accuracy, outperforming the commonly used\ncalibration method, Temperature Scaling, up to a certain perturbation threshold",
          "correct": "ivXe7J6U0k_4_e5ded7ba",
          "incorrect": [
            "ivXe7J6U0k_5_image_figure3",
            "ivXe7J6U0k_6_image_figure4",
            "ivXe7J6U0k_8_image_figure5"
          ],
          "letters": ["D", "C", "A", "B"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"augmentation ranges\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be labeled\"},\"evidence\":{\"source\":\"figure_1a\",\"statement\":\"not labeled\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"performance comparison\",\"claim\":{\"source\":\"text\",\"statement\":\"augmentation outperforms scaling\"},\"evidence\":{\"source\":\"plot\",\"statement\":\"scaling outperforms augmentation\"}}",
            "{\"letter\":\"D\",\"attribute\":\"augmentation strength scale\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be defined\"},\"evidence\":{\"source\":\"figure_1a\",\"statement\":\"not defined\"}}",
            "{\"letter\":\"B\",\"attribute\":\"accuracy trend\",\"claim\":{\"source\":\"text\",\"statement\":\"minimal impact on accuracy\"},\"evidence\":{\"source\":\"figure_1a\",\"statement\":\"accuracy decreases\"}}"
          ],
          "letters": ["C", "A", "D", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"augmentation range labels\",\"target\":\"figure_1a\",\"other_involved\":\"caption, text\",\"action\":\"add\",\"edit_statement\":\"delineate ranges\",\"reason\":\"missing\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"performance\",\"target\":\"figure_1\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align claims\",\"reason\":\"contradiction\"}",
            "{\"letter\":\"D\",\"attribute\":\"augmentation strength values\",\"target\":\"figure_1a\",\"other_involved\":null,\"action\":\"clarify\",\"edit_statement\":\"define values\",\"reason\":\"undefined\"}",
            "{\"letter\":\"B\",\"attribute\":\"accuracy trend\",\"target\":\"figure_1a\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align claims\",\"reason\":\"inconsistency\"}"
          ],
          "letters": ["C", "A", "D", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 1"]
    }
  ],
  "iiK1vNRo6I": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 5,
          "image_id": "iiK1vNRo6I_5_708d4713",
          "bbox": {
            "x": 0.5270893195864104,
            "y": 0.10337792207509744,
            "width": 0.2708933717579251,
            "height": 0.17594654788418707
          }
        },
        {
          "type": "text",
          "page": 6,
          "content": "Train \u03bc-NN: (i) Initiate \u03bc-predictor NN with random W1, B0, fix W0 = [\u03bc\u2217\nB1 , . . . , \u03bc\u2217\nBn ]T\nand set with random weights, (ii) Train the \u03bc-predictor NN with 1000 datapoints populated\nusing the procedure described above, (iii) Define solver NN by setting first part to \u03bc-NN\nand second part to J\u22121.",
          "line": 303
        }
      ],
      "review_text": "Figure 2: The first layer weights W_0 are initialized as gradients of the dual variables \u03bc, but in the following text of section 3.4 they are given as W_0 = [\u03bc*]",
      "category": "figure-text",
      "description": "In figure 2, the W_0 are initialized using \\mu, whereas in the text W_0 is initialized using \\mu^*",
      "confidence": 2,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {
          "question": "Train \u03bc-NN: (i) Initiate \u03bc-predictor NN with random W1, B0, fix W0 = [\u03bc\u2217\nB1 , . . . , \u03bc\u2217\nBn ]T\nand set with random weights, (ii) Train the \u03bc-predictor NN with 1000 datapoints populated\nusing the procedure described above, (iii) Define solver NN by setting first part to \u03bc-NN\nand second part to J\u22121.",
          "correct": "iiK1vNRo6I_5_708d4713",
          "incorrect": [
            "iiK1vNRo6I_4_image_figure1",
            "iiK1vNRo6I_6_image_figure4",
            "iiK1vNRo6I_6_image_figure3"
          ],
          "letters": ["D", "C", "A", "B"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"initialization\",\"claim\":{\"source\":\"text\",\"statement\":\"W0 is fixed using \u03bc* values\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"W^0 initialized using \u2207\u03bc\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"trainable parameters\",\"claim\":{\"source\":\"text\",\"statement\":\"W1 and B0 are random variables\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"W1* and B0 are trainable parameters\"}}",
            "{\"letter\":\"D\",\"attribute\":\"training procedure\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be in figure\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"no training data\"}}",
            "{\"letter\":\"C\",\"attribute\":\"network output\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be \u03bc\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"output is \u03bc*\"}}"
          ],
          "letters": ["B", "A", "D", "C"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"initialization\",\"target\":\"figure_2\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align variable initialization\",\"reason\":\"contradictory\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"parameters\",\"target\":\"figure_2\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align description\",\"reason\":\"contradictory\"}",
            "{\"letter\":\"D\",\"attribute\":\"training details\",\"target\":\"figure_2\",\"other_involved\":\"text\",\"action\":\"add\",\"edit_statement\":\"add training datapoints\",\"reason\":\"missing\"}",
            "{\"letter\":\"C\",\"attribute\":\"output\",\"target\":\"figure_2\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align variable name\",\"reason\":\"inconsistent\"}"
          ],
          "letters": ["B", "A", "D", "C"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 2"]
    }
  ],
  "i2ue8J6aqI": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 8,
          "image_id": "i2ue8J6aqI_8_126b862a",
          "bbox": {
            "x": 0.16934521993001303,
            "y": 0.08911876459231322,
            "width": 0.6666666666666666,
            "height": 0.14942528735632185
          }
        }
      ],
      "review_text": "Fig3: The ADNI dataset has three classes (HC, MCI, AD), but the figure only shows two clusters, which contradicts the expected number of classes.",
      "category": "figure-only",
      "description": "The ADNI dataset has 3 classes, but the scatterplots only show 2 classes",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the figure that is consistent with a different part of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the figure that is inconsistent with a different part of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"classes\",\"claim\":{\"source\":\"Figure 3\",\"statement\":\"2 classes\"},\"evidence\":{\"source\":\"text\",\"statement\":\"3 classes\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"classes\",\"claim\":{\"source\":\"Figure 3\",\"statement\":\"2 classes\"},\"evidence\":{\"source\":\"text\",\"statement\":\"4 classes\"}}",
            "{\"letter\":\"D\",\"attribute\":\"scatterplots\",\"claim\":{\"source\":\"expectation\",\"statement\":\"different\"},\"evidence\":{\"source\":\"Figure 3\",\"statement\":\"same\"}}",
            "{\"letter\":\"A\",\"attribute\":\"classes\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be separable\"},\"evidence\":{\"source\":\"Figure 3\",\"statement\":\"not separable\"}}"
          ],
          "letters": ["C", "B", "D", "A"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"classes\",\"target\":\"figure_3\",\"other_involved\":\"ADNI dataset\",\"action\":\"add\",\"edit_statement\":\"third class\",\"reason\":\"missing\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"classes\",\"target\":\"figure_3\",\"other_involved\":\"ADNI dataset\",\"action\":\"modify\",\"edit_statement\":\"represent 4 classes\",\"reason\":\"two depicted\"}",
            "{\"letter\":\"D\",\"attribute\":\"scatterplots\",\"target\":\"figure_3d\",\"other_involved\":\"figure_3e\",\"action\":\"modify\",\"edit_statement\":\"distinguish representation\",\"reason\":\"equivalent\"}",
            "{\"letter\":\"A\",\"attribute\":\"methods\",\"target\":\"figure_3\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"show separation ability\",\"reason\":\"not shown\"}"
          ],
          "letters": ["C", "B", "D", "A"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 3"]
    }
  ],
  "hVpAjJPfgZ": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 10,
          "image_id": "hVpAjJPfgZ_10_f20e1b90",
          "bbox": {
            "x": 0.16934521993001303,
            "y": 0.0983141668911638,
            "width": 0.6726190476190476,
            "height": 0.2206896551724138
          }
        },
        {
          "type": "text",
          "page": 3,
          "content": "The PIH model achieved state-of-the-art results, proving the effectiveness of using\nlonger lookback windows.",
          "line": 119
        }
      ],
      "review_text": "Figure 6: The left panel legend incorrectly states 'PHI' instead of 'PIH', which contradicts the text description of the figure.",
      "category": "figure-text",
      "description": "The text calls the method \"PIH\", whereas in the figure, it is called \"PHI\"",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "The PIH model achieved state-of-the-art results, proving the effectiveness of using\nlonger lookback windows.",
          "correct": "hVpAjJPfgZ_10_f20e1b90",
          "incorrect": [
            "hVpAjJPfgZ_8_image_figure4",
            "hVpAjJPfgZ_8_image_figure5",
            "hVpAjJPfgZ_7_image_figure3"
          ],
          "letters": ["A", "D", "B", "C"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"abbreviation\",\"claim\":{\"source\":\"text\",\"statement\":\"PIH\"},\"evidence\":{\"source\":\"figure_6\",\"statement\":\"PHI\"}}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"state-of-the-art\",\"claim\":{\"source\":\"expectation\",\"statement\":\"state-of-the-art\"},\"evidence\":{\"source\":\"figure_6\",\"statement\":\"lower performance\"}}",
            "{\"letter\":\"C\",\"attribute\":\"metric\",\"claim\":{\"source\":\"caption\",\"statement\":\"average MSE\"},\"evidence\":{\"source\":\"figure_6\",\"statement\":\"different metric\"}}",
            "{\"letter\":\"B\",\"attribute\":\"state-of-the-art\",\"claim\":{\"source\":\"text\",\"statement\":\"state-of-the-art\"},\"evidence\":{\"source\":\"figure_6\",\"statement\":\"no baseline\"}}"
          ],
          "letters": ["A", "D", "C", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"method name\",\"target\":\"figure_6_left_chart_legend\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align name\",\"reason\":\"different\"}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"performance\",\"target\":\"figure_6\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align performance\",\"reason\":\"different\"}",
            "{\"letter\":\"C\",\"attribute\":\"metric\",\"target\":\"figure_6\",\"other_involved\":\"caption\",\"action\":\"modify\",\"edit_statement\":\"align metric\",\"reason\":\"different\"}",
            "{\"letter\":\"B\",\"attribute\":\"baseline\",\"target\":\"figure_6\",\"other_involved\":\"text\",\"action\":\"add\",\"edit_statement\":\"add baseline\",\"reason\":\"missing\"}"
          ],
          "letters": ["A", "D", "C", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 6"]
    }
  ],
  "h5UdvNFHee": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 8,
          "image_id": "h5UdvNFHee_8_ea2151ba",
          "bbox": {
            "x": 0.16934521993001303,
            "y": 0.08107278758081897,
            "width": 0.6696428571428571,
            "height": 0.5011494252873564
          }
        }
      ],
      "review_text": "Table 6: The two images seem to be misplaced.",
      "category": "figure-only",
      "description": "The images are in the wrong column and should be swapped",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the figure that is consistent with a different part of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the figure that is inconsistent with a different part of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"image order\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should match text\"},\"evidence\":{\"source\":\"table 6\",\"statement\":\"images swapped\"}}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"image quality\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be clear\"},\"evidence\":{\"source\":\"table 6\",\"statement\":\"image is blurry\"}}",
            "{\"letter\":\"D\",\"attribute\":\"explanation accuracy\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should match image\"},\"evidence\":{\"source\":\"table 6\",\"statement\":\"explanations incorrect\"}}",
            "{\"letter\":\"B\",\"attribute\":\"explanation consistency\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should match\"},\"evidence\":{\"source\":\"table 6\",\"statement\":\"explanations differ\"}}"
          ],
          "letters": ["A", "C", "D", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"images\",\"target\":\"table_6\",\"other_involved\":\"text\",\"action\":\"reposition\",\"edit_statement\":\"exchange images\",\"reason\":\"swapped\"}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"image_quality\",\"target\":\"table_6\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"improve clarity\",\"reason\":\"blurry\"}",
            "{\"letter\":\"D\",\"attribute\":\"model_interpretation\",\"target\":\"table_6\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"correct meme interpretation\",\"reason\":\"wrong\"}",
            "{\"letter\":\"B\",\"attribute\":\"explanation\",\"target\":\"table_6\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"align description\",\"reason\":\"mismatch\"}"
          ],
          "letters": ["A", "C", "D", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 6"]
    }
  ],
  "gN4stDLq3t": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 8,
          "image_id": "gN4stDLq3t_8_18f44290",
          "bbox": {
            "x": 0.16934521993001303,
            "y": 0.10727974025682473,
            "width": 0.6636904761904762,
            "height": 0.2827586206896552
          }
        }
      ],
      "review_text": "Fig 6: The caption says it is about the a hyperparameter but the plot appears to be for the b hyperparameter, since the values are all negative.",
      "category": "figure-caption",
      "description": "The caption talks about hyperparameter a, whereas the figure shows b",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the caption of the figure consistent with the content of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the caption of the figure inconsistent with the content of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"axis label\",\"claim\":{\"source\":\"caption\",\"statement\":\"mentions optimal a\"},\"evidence\":{\"source\":\"plot\",\"statement\":\"y-axis labeled as b\"}}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"optimal a\",\"claim\":{\"source\":\"caption\",\"statement\":\"consistent across tokens\"},\"evidence\":{\"source\":\"plot\",\"statement\":\"noticeable variation in b\"}}",
            "{\"letter\":\"C\",\"attribute\":\"legend\",\"claim\":{\"source\":\"expectation\",\"statement\":\"all values visible\"},\"evidence\":{\"source\":\"plot\",\"statement\":\"not all values visible\"}}",
            "{\"letter\":\"B\",\"attribute\":\"metric\",\"claim\":{\"source\":\"expectation\",\"statement\":\"different metrics\"},\"evidence\":{\"source\":\"plot\",\"statement\":\"x-axis and y-axis use same metric\"}}"
          ],
          "letters": ["A", "D", "C", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"label\",\"target\":\"figure_6_yaxis\",\"other_involved\":\"figure_6_caption\",\"action\":\"modify\",\"edit_statement\":\"match\",\"reason\":\"inconsistent\"}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"consistency\",\"target\":\"figure_6\",\"other_involved\":\"figure_6_caption\",\"action\":\"modify\",\"edit_statement\":\"reflect variation\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"C\",\"attribute\":\"legend_values\",\"target\":\"figure_6\",\"other_involved\":null,\"action\":\"add\",\"edit_statement\":\"missing\",\"reason\":\"incomplete\"}",
            "{\"letter\":\"B\",\"attribute\":\"metric\",\"target\":\"figure_6_xaxis\",\"other_involved\":\"figure_6_yaxis\",\"action\":\"modify\",\"edit_statement\":\"distinguish\",\"reason\":\"same\"}"
          ],
          "letters": ["A", "D", "C", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 6"]
    }
  ],
  "gDWkImLIKd": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 7,
          "image_id": "gDWkImLIKd_7_82cc9319",
          "bbox": {
            "x": 0.16934521993001303,
            "y": 0.1040612933279454,
            "width": 0.681547619047619,
            "height": 0.2183908045977012
          }
        },
        {
          "type": "text",
          "page": 1,
          "content": "With the gold test patch as a reference, we predict executability of all editing lo-\ncations with an accuracy of 91.6%, aggregating which, we can predict the build\nstatus in 82.1% of the instances in SWE-bench",
          "line": 19
        }
      ],
      "review_text": "Abstract: The authors claim an accuracy of 91.6% at the micro-level and 82.1% at the macro-level, but these are actually F1 scores as per Tables 1 and 2.",
      "category": "figure-text",
      "description": "Abstract text talks about accuracy, but table shows the F1-score",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {
          "question": "With the gold test patch as a reference, we predict executability of all editing lo-\ncations with an accuracy of 91.6%, aggregating which, we can predict the build\nstatus in 82.1% of the instances in SWE-bench",
          "correct": "gDWkImLIKd_7_82cc9319",
          "incorrect": [
            "gDWkImLIKd_7_table_table3",
            "gDWkImLIKd_7_table_table4",
            "gDWkImLIKd_8_table_table5"
          ],
          "letters": ["D", "B", "A", "C"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"metric\",\"claim\":{\"source\":\"text\",\"statement\":\"accuracy measure\"},\"evidence\":{\"source\":\"Table 2\",\"statement\":\"F1-Score\"}}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"metric\",\"claim\":{\"source\":\"text\",\"statement\":\"accuracy for executability\"},\"evidence\":{\"source\":\"Table 2\",\"statement\":\"build status prediction\"}}",
            "{\"letter\":\"B\",\"attribute\":\"accuracy\",\"claim\":{\"source\":\"text\",\"statement\":\"91.6% and 82.1%\"},\"evidence\":{\"source\":\"Table 2\",\"statement\":\"71.4%\"}}",
            "{\"letter\":\"C\",\"attribute\":\"accuracy\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be highlighted\"},\"evidence\":{\"source\":\"Table 2\",\"statement\":\"not highlighted\"}}"
          ],
          "letters": ["A", "D", "B", "C"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"metric\",\"target\":\"text\",\"other_involved\":\"table_2\",\"action\":\"modify\",\"edit_statement\":\"make F1-Score\",\"reason\":\"is shown as accuracy store\"}",
          "incorrect": [
            "{\"letter\":\"D\",\"predicting executability attribute\":\"predicting executability attribute\",\"target\":\"table_2\",\"other_involved\":\"text\",\"action\":\"add\",\"edit_statement\":\"add 91.6%\",\"reason\":\"missing\"}",
            "{\"letter\":\"B\",\"attribute\":\"highest reported accuracy\",\"target\":\"table_2\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"make the same\",\"reason\":\"different\"}",
            "{\"letter\":\"C\",\"attribute\":\"value\",\"target\":\"table_2\",\"other_involved\":\"text\",\"action\":\"add\",\"edit_statement\":\"highlight 82.1%\",\"reason\":\"missing\"}"
          ],
          "letters": ["A", "D", "B", "C"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 2"]
    }
  ],
  "gCYFtUKXSc": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 9,
          "image_id": "gCYFtUKXSc_9_6cee1d9b",
          "bbox": {
            "x": 0.20208331516810824,
            "y": 0.0939464349856322,
            "width": 0.6339285714285714,
            "height": 0.24367816091954023
          }
        }
      ],
      "review_text": "Table 1: There is an inconsistency of Notations in Table 1. (CF100 and CIFAR100)",
      "category": "table-only",
      "description": "Inconsistency in labelling (CIFAR100 <-> CF100)",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the table that is consistent with a different part of the table?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the table that is inconsistent with a different part of the table?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"abbreviation\",\"claim\":{\"source\":\"expectation\",\"statement\":\"consistent abbreviation\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"different abbreviations\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"column\",\"claim\":{\"source\":\"expectation\",\"statement\":\"consistent columns\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"inconsistent columns\"}}",
            "{\"letter\":\"D\",\"attribute\":\"highlight\",\"claim\":{\"source\":\"expectation\",\"statement\":\"highlight best method\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"not highlighted\"}}",
            "{\"letter\":\"C\",\"attribute\":\"performance\",\"claim\":{\"source\":\"expectation\",\"statement\":\"outperform baselines\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"worse than baselines\"}}"
          ],
          "letters": ["A", "B", "D", "C"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"CIFAR100 abbreviation\",\"target\":\"table_1\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"update abbreviation\",\"reason\":\"inconsistent\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"TinyImg columns\",\"target\":\"table_1\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"adjust columns\",\"reason\":\"mismatch\"}",
            "{\"letter\":\"D\",\"attribute\":\"best performing method\",\"target\":\"table_1\",\"other_involved\":null,\"action\":\"add\",\"edit_statement\":\"highlight best\",\"reason\":\"missing\"}",
            "{\"letter\":\"C\",\"attribute\":\"Relearn (ours) performance\",\"target\":\"table_1\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"explain performance\",\"reason\":\"underperforming\"}"
          ],
          "letters": ["A", "B", "D", "C"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 1"]
    }
  ],
  "g6iiIUvhko": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 9,
          "image_id": "g6iiIUvhko_9_732cdf01",
          "bbox": {
            "x": 0.1723214104062035,
            "y": 0.09302683863146552,
            "width": 0.6726190476190476,
            "height": 0.3057471264367816
          }
        },
        {
          "type": "image",
          "page": 9,
          "image_id": "g6iiIUvhko_9_993fa180",
          "bbox": {
            "x": 0.16636902945382254,
            "y": 0.5586589944773708,
            "width": 0.6785714285714286,
            "height": 0.2873563218390805
          }
        }
      ],
      "review_text": "Table 2: The return of 'Ours' on ML-45's Test tasks is $2911.7 \\\\pm 105.1$, which contradicts the result in Table 1, Episode 2 where it is $2893.3 \\\\pm 107.5$.",
      "category": "table-table",
      "description": "The Ours result for ML-45's Test tasks do not match between the tables",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the first table consistent with the content of the second table?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the first table inconsistent with the content of the second table?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "g6iiIUvhko_9_732cdf01",
          "correct": "g6iiIUvhko_9_993fa180",
          "incorrect": [
            "g6iiIUvhko_8_table_table1",
            "g6iiIUvhko_14_table_table3",
            "g6iiIUvhko_15_table_table5"
          ],
          "letters": ["B", "D", "C", "A"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"Return value\",\"claim\":{\"source\":\"table_1\",\"statement\":\"Ours ML-45 Episode 2\"},\"evidence\":{\"source\":\"table_2\",\"statement\":\"Ours ML-45 Test\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"Success Rate\",\"claim\":{\"source\":\"table_1\",\"statement\":\"Ours ML-45 Episode 2\"},\"evidence\":{\"source\":\"table_2\",\"statement\":\"Ours ML-45 Test\"}}",
            "{\"letter\":\"C\",\"attribute\":\"Return value\",\"claim\":{\"source\":\"table_1\",\"statement\":\"Ours ML-10 Episode 2\"},\"evidence\":{\"source\":\"table_2\",\"statement\":\"Ours ML-10 Test\"}}",
            "{\"letter\":\"A\",\"attribute\":\"Return value\",\"claim\":{\"source\":\"table_1\",\"statement\":\"LDM ML-45 Episode 2\"},\"evidence\":{\"source\":\"table_2\",\"statement\":\"LDM ML-45 Test\"}}"
          ],
          "letters": ["D", "B", "C", "A"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"Return value\",\"target\":\"table_1\",\"other_involved\":\"table_2\",\"action\":\"modify\",\"edit_statement\":\"match table_2\",\"reason\":\"mismatch\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"Success Rate\",\"target\":\"table_1\",\"other_involved\":\"table_2\",\"action\":\"modify\",\"edit_statement\":\"match table_2\",\"reason\":\"mismatch\"}",
            "{\"letter\":\"C\",\"attribute\":\"Return value\",\"target\":\"table_1\",\"other_involved\":\"table_2\",\"action\":\"modify\",\"edit_statement\":\"match table_2\",\"reason\":\"mismatch\"}",
            "{\"letter\":\"A\",\"attribute\":\"Return value\",\"target\":\"table_1\",\"other_involved\":\"table_2\",\"action\":\"modify\",\"edit_statement\":\"match table_2\",\"reason\":\"mismatch\"}"
          ],
          "letters": ["D", "B", "C", "A"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 1", "Table 2"]
    }
  ],
  "exfy4e7OJq": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 2,
          "image_id": "exfy4e7OJq_2_56fce9bc",
          "bbox": {
            "x": 0.1723214104062035,
            "y": 0.1022222409303161,
            "width": 0.6696428571428571,
            "height": 0.2574712643678161
          }
        }
      ],
      "review_text": "Table 1(a) and 1(b): The y-axes are inconsistent. The minimum value in Table 1(a) is 2, while it should be -2 according to Table 1(b).",
      "category": "figure-only",
      "description": "The y axis do not align between the two subplots, making comparison hard",
      "confidence": 2,
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the figure that is consistent with a different part of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the figure that is inconsistent with a different part of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"y-axis alignment\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be aligned\"},\"evidence\":{\"source\":\"Figure 1\",\"statement\":\"different y-axis starts\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"neuron activation\",\"claim\":{\"source\":\"caption\",\"statement\":\"activated by Python\"},\"evidence\":{\"source\":\"figure\",\"statement\":\"activations for other languages\"}}",
            "{\"letter\":\"B\",\"attribute\":\"scale type\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"sub-plots\",\"statement\":\"different scale types\"}}",
            "{\"letter\":\"C\",\"attribute\":\"font style\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"figure descriptions\",\"statement\":\"inconsistent font styles\"}}"
          ],
          "letters": ["D", "A", "B", "C"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"y-axis scale\",\"target\":\"figure_1a\",\"other_involved\":\"figure_1b\",\"action\":\"modify\",\"edit_statement\":\"align y-axis scale\",\"reason\":\"misaligned\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"activations\",\"target\":\"caption\",\"other_involved\":\"figure_1\",\"action\":\"modify\",\"edit_statement\":\"align neuron activations\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"B\",\"attribute\":\"scale\",\"target\":\"figure_1a\",\"other_involved\":\"figure_1b\",\"action\":\"modify\",\"edit_statement\":\"align scale type\",\"reason\":\"different\"}",
            "{\"letter\":\"C\",\"attribute\":\"font styles\",\"target\":\"figure_1\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"align font styles\",\"reason\":\"inconsistent\"}"
          ],
          "letters": ["D", "A", "B", "C"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 1"]
    }
  ],
  "erowpbZcPi": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 6,
          "image_id": "erowpbZcPi_6_d1c2f49a",
          "bbox": {
            "x": 0.20803569612048922,
            "y": 0.08498086161997127,
            "width": 0.5982142857142857,
            "height": 0.1103448275862069
          }
        },
        {
          "type": "text",
          "page": 10,
          "content": " While for the synthetic dataset the non-tilted case, which correspond to ERM, has the worst performance compared to tilted cases, thus both positive and negative tilted parameters boost the performance of QNN classification tasks.",
          "line": 508
        }
      ],
      "review_text": "Table 1: The claim that both positive and negative tilt temperatures lead to an improvement seems to be at odds with the results in Table 1.",
      "category": "table-text",
      "description": "Negative tilting does not seem to improve the performance according to the table, but the text claims an improvement",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the table consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the table inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": " While for the synthetic dataset the non-tilted case, which correspond to ERM, has the worst performance compared to tilted cases, thus both positive and negative tilted parameters boost the performance of QNN classification tasks.",
          "correct": "erowpbZcPi_6_d1c2f49a",
          "incorrect": [
            "erowpbZcPi_6_image_figure3",
            "erowpbZcPi_6_interline-equation_equation24",
            "erowpbZcPi_6_interline-equation_equation30"
          ],
          "letters": ["B", "A", "C", "D"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"tilt parameter\",\"claim\":{\"source\":\"text\",\"statement\":\"boost performance\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"negative tilt doesn't improve\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"tilt parameter\",\"claim\":{\"source\":\"expectation\",\"statement\":\"negative tilt doesn't improve\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"negative tilt improves\"}}",
            "{\"letter\":\"D\",\"attribute\":\"tilt parameter\",\"claim\":{\"source\":\"expectation\",\"statement\":\"tilted is better than non-tilted\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"non-tilted is best\"}}",
            "{\"letter\":\"A\",\"attribute\":\"tilt parameter\",\"claim\":{\"source\":\"text\",\"statement\":\"improve performance\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"only positive tilt improves\"}}"
          ],
          "letters": ["C", "B", "D", "A"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"negative tilt hyperparameter\",\"target\":\"Table 1\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align performance improvement\",\"reason\":\"contradiction\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"tilt parameters\",\"target\":\"Table 1\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align performance improvement\",\"reason\":\"contradiction\"}",
            "{\"letter\":\"D\",\"attribute\":\"performance\",\"target\":\"text\",\"other_involved\":\"Table 1\",\"action\":\"modify\",\"edit_statement\":\"align ERM performance claims\",\"reason\":\"contradiction\"}",
            "{\"letter\":\"A\",\"attribute\":\"tilt parameters\",\"target\":\"Table 1\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align performance improvement\",\"reason\":\"contradiction\"}"
          ],
          "letters": ["C", "B", "D", "A"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 1"]
    }
  ],
  "ec9hJPn59o": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 3,
          "image_id": "ec9hJPn59o_3_1c517e6e",
          "bbox": {
            "x": 0.16934521993001303,
            "y": 0.6100383188532688,
            "width": 0.6755952380952381,
            "height": 0.3149425287356322
          }
        },
        {
          "type": "text",
          "page": 4,
          "content": "In this section, we take a low-light RGB image $I \\in \\mathbb{R}^{W \\times H \\times 3}$ as input. A reflection convolutional operator **RefConv** (an regular convolutional operator with reflection padding) is employed on $I$ to generate a low-resolution image $I_l \\in \\mathbb{R}^{\\frac{H}{8} \\times \\frac{W}{8} \\times 3}$. Then, the Feature Aggregation Module (FAM) is utilized to transform $I$ and $I_l$ into low-level features $F_l \\in \\mathbb{R}^{W \\times H \\times C}$ and high-level features $f_h \\in \\mathbb{R}^{\\frac{H}{8} \\times \\frac{W}{8} \\times C}$. Subsequently, the High-level Feature Enhancement Module (HFEM) processes to generate richer high-level features $F_f \\in \\mathbb{R}^{\\frac{H}{8} \\times \\frac{W}{8} \\times C}$.\n",
          "line": 169
        }
      ],
      "review_text": "Lines 173-179: The description of the proposed module does not align with the corresponding Figure 2.",
      "category": "figure-text",
      "description": "The figure lacks any reference to **RefConv** from the text",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "In this section, we take a low-light RGB image $I \\in \\mathbb{R}^{W \\times H \\times 3}$ as input. A reflection convolutional operator **RefConv** (an regular convolutional operator with reflection padding) is employed on $I$ to generate a low-resolution image $I_l \\in \\mathbb{R}^{\\frac{H}{8} \\times \\frac{W}{8} \\times 3}$. Then, the Feature Aggregation Module (FAM) is utilized to transform $I$ and $I_l$ into low-level features $F_l \\in \\mathbb{R}^{W \\times H \\times C}$ and high-level features $f_h \\in \\mathbb{R}^{\\frac{H}{8} \\times \\frac{W}{8} \\times C}$. Subsequently, the High-level Feature Enhancement Module (HFEM) processes to generate richer high-level features $F_f \\in \\mathbb{R}^{\\frac{H}{8} \\times \\frac{W}{8} \\times C}$.\n",
          "correct": "ec9hJPn59o_3_1c517e6e",
          "incorrect": [
            "ec9hJPn59o_3_image_figure3",
            "ec9hJPn59o_1_image_figure1",
            "ec9hJPn59o_6_image_figure5"
          ],
          "letters": ["C", "D", "B", "A"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"RefConv operator\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be shown\"},\"evidence\":{\"source\":\"figure_2\",\"statement\":\"not shown\"}}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"FAM output\",\"claim\":{\"source\":\"text\",\"statement\":\"two outputs\"},\"evidence\":{\"source\":\"figure_2\",\"statement\":\"one output\"}}",
            "{\"letter\":\"A\",\"attribute\":\"spatial dimensions\",\"claim\":{\"source\":\"text\",\"statement\":\"different dimensions\"},\"evidence\":{\"source\":\"figure_2\",\"statement\":\"same dimensions\"}}",
            "{\"letter\":\"C\",\"attribute\":\"Ff generation\",\"claim\":{\"source\":\"text\",\"statement\":\"generated by HFEM\"},\"evidence\":{\"source\":\"figure_2\",\"statement\":\"input to HFEM\"}}"
          ],
          "letters": ["B", "D", "A", "C"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"RefConv operator\",\"target\":\"figure_2\",\"other_involved\":\"text\",\"action\":\"add\",\"edit_statement\":\"RefConv operator\",\"reason\":\"omission\"}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"output\",\"target\":\"figure_2\",\"other_involved\":\"text\",\"action\":\"add\",\"edit_statement\":\"second output\",\"reason\":\"missing output\"}",
            "{\"letter\":\"A\",\"attribute\":\"spatial dimensions\",\"target\":\"figure_2\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"image size\",\"reason\":\"contradiction\"}",
            "{\"letter\":\"C\",\"attribute\":\"Ff\",\"target\":\"figure_2\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"Ff direction\",\"reason\":\"contradiction\"}"
          ],
          "letters": ["B", "D", "A", "C"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 2"]
    },
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 3,
          "image_id": "ec9hJPn59o_3_c50cd381",
          "bbox": {
            "x": 0.16934521993001303,
            "y": 0.61187744140625,
            "width": 0.6755952380952381,
            "height": 0.30344827586206896
          }
        },
        {
          "type": "image",
          "page": 4,
          "image_id": "ec9hJPn59o_4_04dc1289",
          "bbox": {
            "x": 0.2854166485014416,
            "y": 0.478314138829023,
            "width": 0.4375,
            "height": 0.2942528735632184
          }
        }
      ],
      "review_text": "Figure 3: The output of (a) should be the input of (d), but the dimensions are labelled differently.",
      "category": "figure-figure",
      "description": "According to figure 2, the output of FAM should be the input to HFEM, but the dimensions do not match in Figure 3",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the first figure consistent with the content of the second figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the first figure inconsistent with the content of the second figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {
          "question": "ec9hJPn59o_3_c50cd381",
          "correct": "ec9hJPn59o_4_04dc1289",
          "incorrect": [
            "ec9hJPn59o_6_image_figure5",
            "ec9hJPn59o_6_image_figure4",
            "ec9hJPn59o_2_image_figure2"
          ],
          "letters": ["A", "D", "C", "B"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"dimensional consistency\",\"claim\":{\"source\":\"figure_2\",\"statement\":\"FAM output to HFEM\"},\"evidence\":{\"source\":\"figure_3\",\"statement\":\"dimensional mismatch\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"channel count\",\"claim\":{\"source\":\"figure_3d\",\"statement\":\"32 channels\"},\"evidence\":{\"source\":\"figure_2\",\"statement\":\"24 channels\"}}",
            "{\"letter\":\"D\",\"attribute\":\"input dimensions\",\"claim\":{\"source\":\"figure_3a\",\"statement\":\"H*W*24\"},\"evidence\":{\"source\":\"figure_2\",\"statement\":\"H*W*4\"}}",
            "{\"letter\":\"C\",\"attribute\":\"output dimensions\",\"claim\":{\"source\":\"figure_2\",\"statement\":\"H*W*3\"},\"evidence\":{\"source\":\"figure_3d\",\"statement\":\"H*W*32\"}}"
          ],
          "letters": ["B", "A", "D", "C"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"FAM output\",\"target\":\"figure_2\",\"other_involved\":\"figure_3, HFEM input\",\"action\":\"modify\",\"edit_statement\":\"FAM output dimension\",\"reason\":\"mismatch\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"HFEM output\",\"target\":\"figure_3d\",\"other_involved\":\"figure_2\",\"action\":\"modify\",\"edit_statement\":\"HFEM output channels\",\"reason\":\"contradicts\"}",
            "{\"letter\":\"D\",\"attribute\":\"FAM input\",\"target\":\"figure_3a\",\"other_involved\":\"figure_2, input I\",\"action\":\"modify\",\"edit_statement\":\"FAM input channels\",\"reason\":\"contradicts\"}",
            "{\"letter\":\"C\",\"attribute\":\"HFEM output (channels)\",\"target\":\"figure_2\",\"other_involved\":\"figure_3d, Enhanced Image\",\"action\":\"modify\",\"edit_statement\":\"HFEM output channels\",\"reason\":\"inconsistent\"}"
          ],
          "letters": ["B", "A", "D", "C"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 2", "Figure 3"]
    }
  ],
  "eFGIWUqHQm": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 5,
          "image_id": "eFGIWUqHQm_5_974b12fc",
          "bbox": {
            "x": 0.1723214104062035,
            "y": 0.18532567517510778,
            "width": 0.6726190476190476,
            "height": 0.44367816091954027
          }
        }
      ],
      "review_text": "Figure 3: The term 'MediaPop' in the caption should be corrected to 'MediaPipe'",
      "category": "figure-caption",
      "description": "MediaPop used in caption, but figure itself only shows MediaPipe, indicating a typo",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the caption of the figure consistent with the content of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the caption of the figure inconsistent with the content of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"model name\",\"claim\":{\"source\":\"caption part (a)\",\"statement\":\"MediaPop\"},\"evidence\":{\"source\":\"Figure 3(a) and caption part (c)\",\"statement\":\"MediaPipe\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"label\",\"claim\":{\"source\":\"caption\",\"statement\":\"initial two layers of DenseNet121\"},\"evidence\":{\"source\":\"Figure 3(b)\",\"statement\":\"Dense block 1 and Dense block 2\"}}",
            "{\"letter\":\"C\",\"attribute\":\"input\",\"claim\":{\"source\":\"caption\",\"statement\":\"facial graph\"},\"evidence\":{\"source\":\"Figure 3(a)\",\"statement\":\"image of face\"}}",
            "{\"letter\":\"B\",\"attribute\":\"depiction\",\"claim\":{\"source\":\"caption\",\"statement\":\"low-level features related to textures\"},\"evidence\":{\"source\":\"Figure 3(a)\",\"statement\":\"doesn't depict features\"}}"
          ],
          "letters": ["D", "A", "C", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"processing step\",\"target\":\"caption\",\"other_involved\":\"figure_3a, caption\",\"action\":\"replace\",\"edit_statement\":\"MediaPop with MediaPipe\",\"reason\":\"inconsistent\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"layers\",\"target\":\"figure_3b\",\"other_involved\":\"caption\",\"action\":\"modify\",\"edit_statement\":\"name consistency\",\"reason\":\"different\"}",
            "{\"letter\":\"C\",\"attribute\":\"input\",\"target\":\"figure_3a\",\"other_involved\":\"caption\",\"action\":\"modify\",\"edit_statement\":\"input consistency\",\"reason\":\"different\"}",
            "{\"letter\":\"B\",\"attribute\":\"features\",\"target\":\"figure_3a\",\"other_involved\":\"caption\",\"action\":\"add\",\"edit_statement\":\"feature depiction\",\"reason\":\"missing\"}"
          ],
          "letters": ["D", "A", "C", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 3"]
    }
  ],
  "dsALpkd1OU": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 6,
          "image_id": "dsALpkd1OU_6_772c8fb0",
          "bbox": {
            "x": 0.22886902945382254,
            "y": 0.3325670965786638,
            "width": 0.556547619047619,
            "height": 0.09885057471264369
          }
        },
        {
          "type": "text",
          "page": 1,
          "content": "We conducted experiments on SWE-bench and improved the resolution rate by approximately 27.3%, demonstrating the potential of this method.",
          "line": 25
        }
      ],
      "review_text": "Table 2: The abstract mentions a 27% improvement over baseline, but the table shows only a 6% improvement.",
      "category": "figure-text",
      "description": "The table shows a smaller improvement over the baseline than the abstract claims",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {
          "question": "We conducted experiments on SWE-bench and improved the resolution rate by approximately 27.3%, demonstrating the potential of this method.",
          "correct": "dsALpkd1OU_6_772c8fb0",
          "incorrect": [
            "dsALpkd1OU_6_table_table3",
            "dsALpkd1OU_3_interline-equation_equation9.5",
            "dsALpkd1OU_2_image_figure2"
          ],
          "letters": ["A", "D", "B", "C"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"improvement value\",\"claim\":{\"source\":\"text\",\"statement\":\"27.3% improvement\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"less than 27.3%\"}}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"improvement percentage\",\"claim\":{\"source\":\"expectation\",\"statement\":\"inconsistent\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"27.3%\"}}",
            "{\"letter\":\"B\",\"attribute\":\"resolution rate\",\"claim\":{\"source\":\"text\",\"statement\":\"27.3% improvement\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"lower than 27.3%\"}}",
            "{\"letter\":\"C\",\"attribute\":\"benchmark used\",\"claim\":{\"source\":\"expectation\",\"statement\":\"SWE-bench\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"SME-bench\"}}"
          ],
          "letters": ["A", "D", "B", "C"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"resolution rate\",\"target\":\"text\",\"other_involved\":\"Table 1\",\"action\":\"modify\",\"edit_statement\":\"align increase value\",\"reason\":\"inconsistent\"}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"resolution rate\",\"target\":\"text\",\"other_involved\":\"Table 1\",\"action\":\"none\",\"edit_statement\":\"align reduce value\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"B\",\"attribute\":\"resolution rate\",\"target\":\"Table 1\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"remove value\",\"reason\":\"duplicate\"}",
            "{\"letter\":\"C\",\"attribute\":\"benchmark\",\"target\":\"text\",\"other_involved\":\"Table 1\",\"action\":\"modify\",\"edit_statement\":\"align benchmark name\",\"reason\":\"different\"}"
          ],
          "letters": ["A", "D", "B", "C"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 1"]
    }
  ],
  "dL3h1lyUNd": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 7,
          "image_id": "dL3h1lyUNd_7_ec972fd3",
          "bbox": {
            "x": 0.16934521993001303,
            "y": 0.5202681618175288,
            "width": 0.6755952380952381,
            "height": 0.26436781609195403
          }
        }
      ],
      "review_text": "Table 1: The reviewer points out a discrepancy in energy consumption values (7.56 vs 7.59) for identical configurations.",
      "category": "table-only",
      "description": "The Energy value for VGG16 MAD SMSB is not the same across all subtables",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the table that is consistent with a different part of the table?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the table that is inconsistent with a different part of the table?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"Energy (mJ) value\",\"claim\":{\"source\":\"expectation\",\"statement\":\"consistent reporting\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"inconsistent reporting\"}}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"missing values\",\"claim\":{\"source\":\"expectation\",\"statement\":\"values provided\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"values missing\"}}",
            "{\"letter\":\"B\",\"attribute\":\"VGG backbone configurations\",\"claim\":{\"source\":\"expectation\",\"statement\":\"valid configurations\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"invalid configurations\"}}",
            "{\"letter\":\"C\",\"attribute\":\"worst results\",\"claim\":{\"source\":\"expectation\",\"statement\":\"not bolded\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"bolded\"}}"
          ],
          "letters": ["A", "D", "B", "C"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"Energy (mJ)\",\"target\":\"table_1\",\"other_involved\":\"VGG16 backbone, MAD, SMSB\",\"action\":\"modify\",\"edit_statement\":\"update value\",\"reason\":\"inconsistent\"}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"missing values\",\"target\":\"table_1\",\"other_involved\":null,\"action\":\"add\",\"edit_statement\":\"provide missing values\",\"reason\":\"incomplete\"}",
            "{\"letter\":\"B\",\"attribute\":\"VGG backbone configurations\",\"target\":\"table_1\",\"other_involved\":null,\"action\":\"remove\",\"edit_statement\":\"non-existent configurations\",\"reason\":\"do not exist\"}",
            "{\"letter\":\"C\",\"attribute\":\"bolding\",\"target\":\"table_1\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"bold best results\",\"reason\":\"worst results bolded\"}"
          ],
          "letters": ["A", "D", "B", "C"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 1"]
    }
  ],
  "dIK7GpOwNY": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 6,
          "image_id": "dIK7GpOwNY_6_a784c94f",
          "bbox": {
            "x": 0.16636902945382254,
            "y": 0.09659002896012932,
            "width": 0.6696428571428571,
            "height": 0.5770114942528736
          }
        },
        {
          "type": "text",
          "page": 5,
          "content": "Across all datasets (ImageNet, CIFAR100, CIFAR10), there is a general negative cor-\nrelation between effective dimensionality and relative performance. Models with higher effective\ndimensionality tend to suffer more under AutoAttack, especially on ImageNet and CIFAR100. The\nmain outliers to the general trends are ResNet (ImageNet) and VGG (CIFAR), much like in Sec-\ntion 4.1",
          "line": 237
        }
      ],
      "review_text": "Figure 2: The trend shown in the figure contradicts the claim made by the authors that adversarial robustness and effective dimension are negatively correlated. Most models show the opposite trend or no correlation.",
      "category": "figure-text",
      "description": "It is hard to claim a clear general negative correlation based on the plots, which are rather ambiguous",
      "confidence": 2,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "Across all datasets (ImageNet, CIFAR100, CIFAR10), there is a general negative cor-\nrelation between effective dimensionality and relative performance. Models with higher effective\ndimensionality tend to suffer more under AutoAttack, especially on ImageNet and CIFAR100. The\nmain outliers to the general trends are ResNet (ImageNet) and VGG (CIFAR), much like in Sec-\ntion 4.1",
          "correct": "dIK7GpOwNY_6_a784c94f",
          "incorrect": [
            "dIK7GpOwNY_6_image_figure4",
            "dIK7GpOwNY_6_image_figure3",
            "dIK7GpOwNY_3_image_figure1"
          ],
          "letters": ["A", "D", "B", "C"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"correlation\",\"claim\":{\"source\":\"text\",\"statement\":\"negative correlation\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"ambiguous trends\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"correlation\",\"claim\":{\"source\":\"text\",\"statement\":\"negative correlation\"},\"evidence\":{\"source\":\"plots\",\"statement\":\"positive correlation\"}}",
            "{\"letter\":\"D\",\"attribute\":\"models\",\"claim\":{\"source\":\"expectation\",\"statement\":\"consistent models\"},\"evidence\":{\"source\":\"subplots\",\"statement\":\"inconsistent models\"}}",
            "{\"letter\":\"A\",\"attribute\":\"legend\",\"claim\":{\"source\":\"expectation\",\"statement\":\"match plot lines\"},\"evidence\":{\"source\":\"plots\",\"statement\":\"don't match\"}}"
          ],
          "letters": ["C", "B", "D", "A"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"trends\",\"target\":\"plot\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align description\",\"reason\":\"unsubstantiated\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"correlation\",\"target\":\"plot\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align description\",\"reason\":\"contradiction\"}",
            "{\"letter\":\"D\",\"attribute\":\"models\",\"target\":\"subplot\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"align models\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"A\",\"attribute\":\"symbols\",\"target\":\"legend\",\"other_involved\":\"plot\",\"action\":\"modify\",\"edit_statement\":\"match lines\",\"reason\":\"different\"}"
          ],
          "letters": ["C", "B", "D", "A"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 2"]
    }
  ],
  "cp9LvuvAKW": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 6,
          "image_id": "cp9LvuvAKW_6_8e9e11a2",
          "bbox": {
            "x": 0.14553569612048922,
            "y": 0.5608429305854885,
            "width": 0.7172619047619048,
            "height": 0.2160919540229885
          }
        }
      ],
      "review_text": "Figure 3: The use of a non-consistent scale for the x-axis makes it confusing to compare the data, which could lead to misinterpretation of the results.",
      "category": "table-only",
      "description": "The y-axis scaling for subplots (b) and (c) is different, making them harder to compare",
      "confidence": 2,
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the table that is consistent with a different part of the table?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the table that is inconsistent with a different part of the table?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"y-axis scaling\",\"claim\":{\"source\":\"expectation\",\"statement\":\"same scaling\"},\"evidence\":{\"source\":\"figure_3\",\"statement\":\"different scaling\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"legends\",\"claim\":{\"source\":\"expectation\",\"statement\":\"specify model versions\"},\"evidence\":{\"source\":\"figure_3\",\"statement\":\"incomplete legends\"}}",
            "{\"letter\":\"C\",\"attribute\":\"y-axis scales\",\"claim\":{\"source\":\"expectation\",\"statement\":\"same scale\"},\"evidence\":{\"source\":\"figure_3\",\"statement\":\"different scales\"}}",
            "{\"letter\":\"B\",\"attribute\":\"x-axis\",\"claim\":{\"source\":\"expectation\",\"statement\":\"logarithmic scale\"},\"evidence\":{\"source\":\"figure_3\",\"statement\":\"not logarithmic\"}}"
          ],
          "letters": ["D", "A", "C", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"y-axis scaling\",\"target\":\"figure_3b\",\"other_involved\":\"figure_3c\",\"action\":\"modify\",\"edit_statement\":\"align scale\",\"reason\":\"different\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"legends\",\"target\":\"figure_3b\",\"other_involved\":\"figure_3d,figure_3a,figure_3c\",\"action\":\"add\",\"edit_statement\":\"model version\",\"reason\":\"missing\"}",
            "{\"letter\":\"C\",\"attribute\":\"y-axis scale\",\"target\":\"figure_3a\",\"other_involved\":\"figure_3d\",\"action\":\"modify\",\"edit_statement\":\"align scale\",\"reason\":\"different\"}",
            "{\"letter\":\"B\",\"attribute\":\"data scale labels\",\"target\":\"figure_3\",\"other_involved\":\"x-axis\",\"action\":\"modify\",\"edit_statement\":\"consistent spacing\",\"reason\":\"inconsistent\"}"
          ],
          "letters": ["D", "A", "C", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 3"]
    }
  ],
  "cnLNpIRPuF": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 9,
          "image_id": "cnLNpIRPuF_9_371b0182",
          "bbox": {
            "x": 0.16934521993001303,
            "y": 0.08750954112787356,
            "width": 0.6726190476190476,
            "height": 0.7195402298850574
          }
        }
      ],
      "review_text": "Fig. 6: The color-coding is hard to differentiate, hampering accessibility, and there is a large generation artifact in the form of an entirely lava chunk that remains entirely unaddressed in the paper.",
      "category": "figure-only",
      "description": "There is a large lava chunk on the map, which is not addressed in the caption",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the figure that is consistent with a different part of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the figure that is inconsistent with a different part of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"lava chunk\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be accurate\"},\"evidence\":{\"source\":\"figure_6\",\"statement\":\"error in generation\"}}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"colored borders\",\"claim\":{\"source\":\"caption\",\"statement\":\"indicate screenshot locations\"},\"evidence\":{\"source\":\"figure_6\",\"statement\":\"lack corresponding borders\"}}",
            "{\"letter\":\"C\",\"attribute\":\"colored squares\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"figure_6\",\"statement\":\"inconsistent between images\"}}",
            "{\"letter\":\"A\",\"attribute\":\"image description\",\"claim\":{\"source\":\"caption\",\"statement\":\"scaled down pixel map\"},\"evidence\":{\"source\":\"figure_6\",\"statement\":\"high-resolution photograph\"}}"
          ],
          "letters": ["B", "D", "C", "A"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"orange square depiction\",\"target\":\"figure_6_top\",\"other_involved\":\"caption\",\"action\":\"modify\",\"edit_statement\":\"explain error\",\"reason\":\"unexplained\"}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"colored borders\",\"target\":\"figure_6_bottom\",\"other_involved\":\"caption\",\"action\":\"add\",\"edit_statement\":\"add borders\",\"reason\":\"missing\"}",
            "{\"letter\":\"C\",\"attribute\":\"number of colored squares\",\"target\":\"figure_6_top\",\"other_involved\":\"figure_6_bottom\",\"action\":\"modify\",\"edit_statement\":\"match count\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"A\",\"attribute\":\"image type\",\"target\":\"figure_6_top\",\"other_involved\":\"caption\",\"action\":\"modify\",\"edit_statement\":\"update description\",\"reason\":\"inconsistent\"}"
          ],
          "letters": ["B", "D", "C", "A"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 6"]
    }
  ],
  "cb4PoT7ePW": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 2,
          "image_id": "cb4PoT7ePW_2_a7fb6379",
          "bbox": {
            "x": 0.13958331516810824,
            "y": 0.1976245397808908,
            "width": 0.7202380952380952,
            "height": 0.3632183908045977
          }
        }
      ],
      "review_text": "Figure 1: The DoLa method doesn\u2019t include any context encoder, but the figure shows that the context encoder is part of DoLa, which is not true. You should add a title to the figure such as \u2018Context Encoder + DoLa\u2019",
      "category": "figure-only",
      "description": "DoLa does not include a dedicated context encoder, which makes the left part of the figure show the wrong architecture",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the figure that is consistent with a different part of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the figure that is inconsistent with a different part of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"diagram\",\"claim\":{\"source\":\"expectation\",\"statement\":\"shouldn't include Context Encoder\"},\"evidence\":{\"source\":\"Figure 1\",\"statement\":\"includes Context Encoder\"}}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"answer\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be correct\"},\"evidence\":{\"source\":\"Figure 1\",\"statement\":\"outputs wrong answer\"}}",
            "{\"letter\":\"C\",\"attribute\":\"approaches\",\"claim\":{\"source\":\"caption\",\"statement\":\"visually distinguishable\"},\"evidence\":{\"source\":\"Figure 1\",\"statement\":\"visually indistinguishable\"}}",
            "{\"letter\":\"B\",\"attribute\":\"pathways\",\"claim\":{\"source\":\"caption\",\"statement\":\"connected to layers\"},\"evidence\":{\"source\":\"Figure 1\",\"statement\":\"points into nothing\"}}"
          ],
          "letters": ["A", "D", "C", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"Context Encoder\",\"target\":\"figure_1_left\",\"other_involved\":\"DoLa method\",\"action\":\"remove\",\"edit_statement\":\"block\",\"reason\":\"not utilize\"}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"answer\",\"target\":\"figure_1_right\",\"other_involved\":\"User question\",\"action\":\"modify\",\"edit_statement\":\"correct output\",\"reason\":\"wrong answer\"}",
            "{\"letter\":\"C\",\"attribute\":\"visuals\",\"target\":\"figure_1\",\"other_involved\":\"two approaches\",\"action\":\"modify\",\"edit_statement\":\"distinguish representation\",\"reason\":\"indistinguishable\"}",
            "{\"letter\":\"B\",\"attribute\":\"arrow\",\"target\":\"figure_1_right\",\"other_involved\":\"layer 24\",\"action\":\"modify\",\"edit_statement\":\"fix direction\",\"reason\":\"points nothing\"}"
          ],
          "letters": ["A", "D", "C", "B"]
        }
      },
      "severity": 1,
      "visual_elements": ["Figure 1"]
    }
  ],
  "cPIs6PlCuE": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 5,
          "image_id": "cPIs6PlCuE_5_97433c41",
          "bbox": {
            "x": 0.5056547437395368,
            "y": 0.6962452154049928,
            "width": 0.3392857142857143,
            "height": 0.1793103448275862
          }
        },
        {
          "type": "text",
          "page": 6,
          "content": "As illustrated in Equation 7, we identify channels with significantly large deviations in LC values and subsequently set all output values of these channels to zero. The blue portion in Figure 2 corresponds to these severely biased parameters.",
          "line": 336
        }
      ],
      "review_text": "Figure 2: The blue portion in Figure 2 (Line 338) is confusing as all of them are blue.",
      "category": "figure-text",
      "description": "All parameters are blue, questioning the \"blue portion\" mentioned in the text",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {
          "question": "As illustrated in Equation 7, we identify channels with significantly large deviations in LC values and subsequently set all output values of these channels to zero. The blue portion in Figure 2 corresponds to these severely biased parameters.",
          "correct": "cPIs6PlCuE_5_97433c41",
          "incorrect": [
            "cPIs6PlCuE_5_image_figure3",
            "cPIs6PlCuE_1_image_figure1",
            "cPIs6PlCuE_14_image_figure5"
          ],
          "letters": ["A", "D", "C", "B"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"color\",\"claim\":{\"source\":\"text\",\"statement\":\"blue portion is biased parameters\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"Wj is blue\"}}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"color\",\"claim\":{\"source\":\"text\",\"statement\":\"blue portion is parameters\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"x cube is blue\"}}",
            "{\"letter\":\"C\",\"attribute\":\"color\",\"claim\":{\"source\":\"text\",\"statement\":\"blue portion is parameters\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"Wj is purple\"}}",
            "{\"letter\":\"B\",\"attribute\":\"calculation\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should show dot product\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"no dot product\"}}"
          ],
          "letters": ["A", "D", "C", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"color\",\"target\":\"figure_2\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"only mark blue portion\",\"reason\":\"whole kernel blue\"}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"color\",\"target\":\"figure_2\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"only mark blue portion\",\"reason\":\"x cube blue\"}",
            "{\"letter\":\"C\",\"attribute\":\"color\",\"target\":\"figure_2\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"make color blue\",\"reason\":\"color is purple\"}",
            "{\"letter\":\"B\",\"attribute\":\"equation\",\"target\":\"figure_2\",\"other_involved\":\"figure_2\",\"action\":\"add\",\"edit_statement\":\"dot product calculation\",\"reason\":\"missing\"}"
          ],
          "letters": ["A", "D", "C", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 2"]
    }
  ],
  "bx0IbCcBvO": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 2,
          "image_id": "bx0IbCcBvO_2_5eee25ea",
          "bbox": {
            "x": 0.16636902945382254,
            "y": 0.08957856276939656,
            "width": 0.681547619047619,
            "height": 0.2827586206896552
          }
        }
      ],
      "review_text": "Figure 1(a), (b) and (c): The differences are not visually apparent due to the dominance of dark blue areas, contradicting the intended comparison.",
      "category": "figure-only",
      "description": "The sparse patterns can't be seen in the figure due to the colormap",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the figure that is consistent with a different part of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the figure that is inconsistent with a different part of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"colormap\",\"claim\":{\"source\":\"expectation\",\"statement\":\"discern sparse patterns\"},\"evidence\":{\"source\":\"figure_1\",\"statement\":\"hard to discern\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"sparse patterns\",\"claim\":{\"source\":\"caption\",\"statement\":\"sparse patterns\"},\"evidence\":{\"source\":\"attention maps\",\"statement\":\"not sparse\"}}",
            "{\"letter\":\"C\",\"attribute\":\"colormap scale\",\"claim\":{\"source\":\"expectation\",\"statement\":\"darker is higher\"},\"evidence\":{\"source\":\"colormap\",\"statement\":\"lighter is lower\"}}",
            "{\"letter\":\"B\",\"attribute\":\"dimensions\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"subfigures\",\"statement\":\"vary\"}}"
          ],
          "letters": ["D", "A", "C", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"colormap\",\"target\":\"figure_1\",\"other_involved\":\"attention maps\",\"action\":\"replace\",\"edit_statement\":\"visibility sparse patterns\",\"reason\":\"difficult discern\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"attention maps\",\"target\":\"figure_1\",\"other_involved\":\"caption\",\"action\":\"modify\",\"edit_statement\":\"add sparse patterns\",\"reason\":\"contradiction\"}",
            "{\"letter\":\"C\",\"attribute\":\"numerical scale\",\"target\":\"figure_1\",\"other_involved\":\"colormap\",\"action\":\"replace\",\"edit_statement\":\"reverse scale\",\"reason\":\"reversed\"}",
            "{\"letter\":\"B\",\"attribute\":\"dimensions\",\"target\":\"figure_1\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"align dimensions\",\"reason\":\"variation\"}"
          ],
          "letters": ["D", "A", "C", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 1"]
    }
  ],
  "b7HOhqXiZs": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 6,
          "image_id": "b7HOhqXiZs_6_c7c9b6fa",
          "bbox": {
            "x": 0.16934521993001303,
            "y": 0.09164751425556752,
            "width": 0.6755952380952381,
            "height": 0.34942528735632183
          }
        }
      ],
      "review_text": "Figure 1 and Figure 2: The number of training steps used is not clear. Figure 1 suggests at least 20000 steps, but none of the runs in Figure 2 show convergence, implying that training longer could have changed the final results.",
      "category": "figure-caption",
      "description": "None of the lines show convergence as implied by the caption",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the caption of the figure consistent with the content of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the caption of the figure inconsistent with the content of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"convergence\",\"claim\":{\"source\":\"caption\",\"statement\":\"convergence\"},\"evidence\":{\"source\":\"Figure 1\",\"statement\":\"downward trend\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"hyperparameter count\",\"claim\":{\"source\":\"title\",\"statement\":\"different counts\"},\"evidence\":{\"source\":\"Figure 1\",\"statement\":\"same data\"}}",
            "{\"letter\":\"C\",\"attribute\":\"legend\",\"claim\":{\"source\":\"expectation\",\"statement\":\"specify k value\"},\"evidence\":{\"source\":\"Figure 1\",\"statement\":\"does not specify\"}}",
            "{\"letter\":\"D\",\"attribute\":\"training steps\",\"claim\":{\"source\":\"expectation\",\"statement\":\"same steps\"},\"evidence\":{\"source\":\"Figure 1\",\"statement\":\"different steps\"}}"
          ],
          "letters": ["A", "B", "C", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"convergence\",\"target\":\"figure_1_caption\",\"other_involved\":\"figure_1\",\"action\":\"modify\",\"edit_statement\":\"remove claim\",\"reason\":\"not achieved\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"data\",\"target\":\"figure_1\",\"other_involved\":\"figure_1_title\",\"action\":\"modify\",\"edit_statement\":\"update title\",\"reason\":\"mismatch\"}",
            "{\"letter\":\"C\",\"attribute\":\"algorithm\",\"target\":\"figure_1_legend\",\"other_involved\":\"figure_1\",\"action\":\"add\",\"edit_statement\":\"reference value\",\"reason\":\"missing\"}",
            "{\"letter\":\"D\",\"attribute\":\"steps\",\"target\":\"figure_1a\",\"other_involved\":\"figure_1b\",\"action\":\"modify\",\"edit_statement\":\"align steps\",\"reason\":\"different\"}"
          ],
          "letters": ["A", "B", "C", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 1"]
    }
  ],
  "b39J2X4rjT": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 9,
          "image_id": "b39J2X4rjT_9_fe2b77d1",
          "bbox": {
            "x": 0.4937499818347749,
            "y": 0.09877396506824714,
            "width": 0.33035714285714285,
            "height": 0.13333333333333333
          }
        },
        {
          "type": "text",
          "page": 9,
          "content": "To explore the impact of CBAs at two different levels, we conduct\nablation experiments as depicted in Tab. 4. In the absence of CBA, we use pruning-based addition\nas a substitute. The results indicate that the CBA at level 2 has negligible effects on the 3DVG\ntask. This is primarily because the CBA at level 2 mainly serves to supplement the scene-level\nTGP, which is tasked with pruning the background\u2014a relatively straightforward process. Moreover,\nalthough some target features are pruned, they are compensated by two subsequent generative sparse\nconvolutions. However, the CBA at level 1 enhances performance by adapt completion for the\ntarget-level TGP. It is challenging for the target-level TGP to fully preserve target objects through\nupsampling features, especially for smaller or narrower targets. The CBA at level 1, based on high-\nresolution backbone features, effectively complements the TGP",
          "line": 442
        }
      ],
      "review_text": "Table 4 and the ablation study paragraph starting Line 442: The authors stated that CBA at level 2 has negligible effects while CBA at level 1 is more impactful for improving accuracy. But Table 4 indicates the opposite: CBA at level 2 alone provides the best performance boost, much bigger than CBA at level 1, and even more than having both level 1 and level 2.",
      "category": "table-text",
      "description": "The table shows level 2 CBA provides best accuracy (even better than level 1 and level 2 combined), but text mentions negligible effects",
      "confidence": 2,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the table consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the table inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "To explore the impact of CBAs at two different levels, we conduct\nablation experiments as depicted in Tab. 4. In the absence of CBA, we use pruning-based addition\nas a substitute. The results indicate that the CBA at level 2 has negligible effects on the 3DVG\ntask. This is primarily because the CBA at level 2 mainly serves to supplement the scene-level\nTGP, which is tasked with pruning the background\u2014a relatively straightforward process. Moreover,\nalthough some target features are pruned, they are compensated by two subsequent generative sparse\nconvolutions. However, the CBA at level 1 enhances performance by adapt completion for the\ntarget-level TGP. It is challenging for the target-level TGP to fully preserve target objects through\nupsampling features, especially for smaller or narrower targets. The CBA at level 1, based on high-\nresolution backbone features, effectively complements the TGP",
          "correct": "b39J2X4rjT_9_fe2b77d1",
          "incorrect": [
            "b39J2X4rjT_8_table_table3",
            "b39J2X4rjT_8_table_table5",
            "b39J2X4rjT_7_table_table2"
          ],
          "letters": ["B", "A", "C", "D"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"CBA level 2 effects\",\"claim\":{\"source\":\"text\",\"statement\":\"negligible effects\"},\"evidence\":{\"source\":\"Table 4\",\"statement\":\"highest accuracy\"}}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"CBA level 1 effect\",\"claim\":{\"source\":\"text\",\"statement\":\"enhances performance\"},\"evidence\":{\"source\":\"Table 4\",\"statement\":\"lower accuracy\"}}",
            "{\"letter\":\"D\",\"attribute\":\"CBA level 2 effects\",\"claim\":{\"source\":\"text\",\"statement\":\"negligible effects\"},\"evidence\":{\"source\":\"Table 4\",\"statement\":\"detrimental effect\"}}",
            "{\"letter\":\"B\",\"attribute\":\"CBA level 2 role\",\"claim\":{\"source\":\"text\",\"statement\":\"pruning background\"},\"evidence\":{\"source\":\"Table 4\",\"statement\":\"opposite\"}}"
          ],
          "letters": ["A", "C", "D", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"CBA (level 2) effects\",\"target\":\"text\",\"other_involved\":\"Table 4\",\"action\":\"modify\",\"edit_statement\":\"align with highest accuracy\",\"reason\":\"contradictory\"}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"CBA (level 1) effects\",\"target\":\"text\",\"other_involved\":\"Table 4\",\"action\":\"modify\",\"edit_statement\":\"align performance with Table\",\"reason\":\"contradictory\"}",
            "{\"letter\":\"D\",\"attribute\":\"CBA (level 2) effects\",\"target\":\"text\",\"other_involved\":\"Table 4\",\"action\":\"modify\",\"edit_statement\":\"align with detrimental effect\",\"reason\":\"contradictory\"}",
            "{\"letter\":\"B\",\"attribute\":\"CBA (level 2) effects\",\"target\":\"text\",\"other_involved\":\"Table 4\",\"action\":\"modify\",\"edit_statement\":\"align role with Table 4\",\"reason\":\"contradictory\"}"
          ],
          "letters": ["A", "C", "D", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 4"]
    }
  ],
  "aoW5Sm8Op8": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 3,
          "image_id": "aoW5Sm8Op8_3_7fa7511c",
          "bbox": {
            "x": 0.17827379135858443,
            "y": 0.09049808896821121,
            "width": 0.6547619047619048,
            "height": 0.15402298850574714
          }
        }
      ],
      "review_text": "Figure 1a, b and c are identical. Is it an error? the legend or the text should contain a description of the variables, and a short hint about how each bias occurs in those cases",
      "category": "figure-caption",
      "description": "All three subplots are exactly the same just with different caption",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the caption of the figure consistent with the content of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the caption of the figure inconsistent with the content of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"causal diagrams\",\"claim\":{\"source\":\"caption\",\"statement\":\"different biases\"},\"evidence\":{\"source\":\"Figure 1\",\"statement\":\"visually identical\"}}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"red lines\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"Figure 1 subplots (a) and (b)\",\"statement\":\"different notation\"}}",
            "{\"letter\":\"A\",\"attribute\":\"arrow direction\",\"claim\":{\"source\":\"expectation\",\"statement\":\"same direction\"},\"evidence\":{\"source\":\"Figure 1 subplot (c)\",\"statement\":\"reversed direction\"}}",
            "{\"letter\":\"D\",\"attribute\":\"subplots\",\"claim\":{\"source\":\"expectation\",\"statement\":\"different\"},\"evidence\":{\"source\":\"Figure 1 subplots (a) and (b)\",\"statement\":\"identical\"}}"
          ],
          "letters": ["B", "C", "A", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"diagrams\",\"target\":\"figure_1a,figure_1b,figure_1c\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"show distinct types\",\"reason\":\"identical\"}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"line\",\"target\":\"figure_1a,figure_1b\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"show consistent style\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"A\",\"attribute\":\"arrow\",\"target\":\"figure_1c\",\"other_involved\":\"figure_1a,figure_1b\",\"action\":\"modify\",\"edit_statement\":\"reverse direction\",\"reason\":\"reversed\"}",
            "{\"letter\":\"D\",\"attribute\":\"subplots\",\"target\":\"figure_1a,figure_1b\",\"other_involved\":\"figure_1c\",\"action\":\"modify\",\"edit_statement\":\"show distinct diagrams\",\"reason\":\"identical\"}"
          ],
          "letters": ["B", "C", "A", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 1"]
    }
  ],
  "aXSxSu3fvg": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 5,
          "image_id": "aXSxSu3fvg_5_3b584111",
          "bbox": {
            "x": 0.38958331516810824,
            "y": 0.5732567096578663,
            "width": 0.22023809523809523,
            "height": 0.14022988505747128
          }
        },
        {
          "type": "text",
          "page": 6,
          "content": "Figure 3 displays the cross-entropy loss values of the training set and validation set across iterations.\nBoth losses initially decrease rapidly, indicating effective learning and sustained improvement. As\ntraining progresses, these losses gradually stabilize and converge to each other. The criterion for\ndetermining the optimal number of iterations is the point at which the validation loss is minimized.",
          "line": 293
        }
      ],
      "review_text": "Figures 2, 3, 4: The validation loss does not go up, which contradicts the common behavior in most network training cases where validation loss typically increases.",
      "category": "figure-text",
      "description": "The text talks about the finding the minimum of the validation loss, but the figure does not show an increase of the validation loss which is atypical and might not indicate the optimal point of training",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {
          "question": "Figure 3 displays the cross-entropy loss values of the training set and validation set across iterations.\nBoth losses initially decrease rapidly, indicating effective learning and sustained improvement. As\ntraining progresses, these losses gradually stabilize and converge to each other. The criterion for\ndetermining the optimal number of iterations is the point at which the validation loss is minimized.",
          "correct": "aXSxSu3fvg_5_3b584111",
          "incorrect": [
            "aXSxSu3fvg_4_image_figure2",
            "aXSxSu3fvg_4_image_figure4",
            "aXSxSu3fvg_2_image_figure1"
          ],
          "letters": ["D", "C", "A", "B"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"optimal iterations\",\"claim\":{\"source\":\"expectation\",\"statement\":\"validation loss should increase\"},\"evidence\":{\"source\":\"Figure 3\",\"statement\":\"validation loss stabilizes\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"loss decrease\",\"claim\":{\"source\":\"text\",\"statement\":\"both losses decrease\"},\"evidence\":{\"source\":\"Figure 3\",\"statement\":\"only training loss decreases\"}}",
            "{\"letter\":\"C\",\"attribute\":\"loss behavior\",\"claim\":{\"source\":\"text\",\"statement\":\"losses stabilize\"},\"evidence\":{\"source\":\"Figure 3\",\"statement\":\"validation loss increases\"}}",
            "{\"letter\":\"D\",\"attribute\":\"loss focus\",\"claim\":{\"source\":\"expectation\",\"statement\":\"validation loss is primary\"},\"evidence\":{\"source\":\"Figure 3\",\"statement\":\"training loss is highlighted\"}}"
          ],
          "letters": ["B", "A", "C", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"optimal iterations\",\"target\":\"figure_3\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"show validation loss increase\",\"reason\":\"unclear\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"losses decrease\",\"target\":\"figure_3\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"show validation loss\",\"reason\":\"contradiction\"}",
            "{\"letter\":\"C\",\"attribute\":\"validation loss\",\"target\":\"figure_3\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"show validation loss stabilize\",\"reason\":\"contradiction\"}",
            "{\"letter\":\"D\",\"attribute\":\"primary focus\",\"target\":\"figure_3\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"highlight validation loss\",\"reason\":\"different\"}"
          ],
          "letters": ["B", "A", "C", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 3"]
    }
  ],
  "aVyJwS1fqQ": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 8,
          "image_id": "aVyJwS1fqQ_8_d9780559",
          "bbox": {
            "x": 0.6247023627871559,
            "y": 0.5247509485003592,
            "width": 0.20833333333333334,
            "height": 0.14022988505747128
          }
        }
      ],
      "review_text": "Table 3: The values for PSNR and SSIM are scaled by 100, which is not the typical scale for these metrics (0 to 1). This inconsistency could cause confusion for readers.",
      "category": "table-only",
      "description": "The PSNR and SSIM are in an unusual scale (they should be in the range of [0, 1]",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the table that is consistent with a different part of the table?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the table that is inconsistent with a different part of the table?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"metric scale\",\"claim\":{\"source\":\"expectation\",\"statement\":\"usual scale\"},\"evidence\":{\"source\":\"Table 3\",\"statement\":\"unusual scale\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"asterisk symbol\",\"claim\":{\"source\":\"expectation\",\"statement\":\"consistent usage\"},\"evidence\":{\"source\":\"RoboNet column\",\"statement\":\"inconsistent usage\"}}",
            "{\"letter\":\"D\",\"attribute\":\"bolding\",\"claim\":{\"source\":\"expectation\",\"statement\":\"consistent bolding\"},\"evidence\":{\"source\":\"Mani-WM values\",\"statement\":\"inconsistent bolding\"}}",
            "{\"letter\":\"A\",\"attribute\":\"metric interpretation\",\"claim\":{\"source\":\"expectation\",\"statement\":\"higher is better\"},\"evidence\":{\"source\":\"table\",\"statement\":\"higher is better\"}}"
          ],
          "letters": ["C", "B", "D", "A"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"PSNR and SSIM values\",\"target\":\"table_3\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"scale\",\"reason\":\"unusual\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"asterisk symbol\",\"target\":\"table_3\",\"other_involved\":\"RoboNet column\",\"action\":\"modify\",\"edit_statement\":\"consistent use\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"D\",\"attribute\":\"Mani-WM values\",\"target\":\"table_3\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"bolding\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"A\",\"attribute\":\"PSNR and SSIM values\",\"target\":\"table_3\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"interpretation\",\"reason\":\"contradiction\"}"
          ],
          "letters": ["C", "B", "D", "A"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 3"]
    }
  ],
  "a69zct3BkY": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 6,
          "image_id": "a69zct3BkY_6_702d412d",
          "bbox": {
            "x": 0.16636902945382254,
            "y": 0.08888893565912358,
            "width": 0.6755952380952381,
            "height": 0.25287356321839083
          }
        },
        {
          "type": "text",
          "page": 7,
          "content": "Figure 3 (right) provides a visualization of representations for the subject \u2019Delphine de Girardin\u2019\nafter three types of perturbations, reduced to two dimensions using Principal Component Analysis\n(PCA). ",
          "line": 352
        }
      ],
      "review_text": "O23: Line 352, the example in the text (Delphine de Girardin) does not match the example in Figure 3 (Slovenia).",
      "category": "figure-text",
      "description": "The text says Figure 3 depicts 'Delphine de Girardin', but the caption implies 'Slovenia'",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "Figure 3 (right) provides a visualization of representations for the subject \u2019Delphine de Girardin\u2019\nafter three types of perturbations, reduced to two dimensions using Principal Component Analysis\n(PCA). ",
          "correct": "a69zct3BkY_6_702d412d",
          "incorrect": [
            "a69zct3BkY_6_image_figure4",
            "a69zct3BkY_7_image_figure6",
            "a69zct3BkY_7_image_figure5"
          ],
          "letters": ["D", "B", "C", "A"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"subject\",\"claim\":{\"source\":\"text\",\"statement\":\"Delphine de Girardin\"},\"evidence\":{\"source\":\"Figure 3\",\"statement\":\"Slovenia\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"type\",\"claim\":{\"source\":\"label\",\"statement\":\"PCA\"},\"evidence\":{\"source\":\"Figure 3\",\"statement\":\"histogram\"}}",
            "{\"letter\":\"C\",\"attribute\":\"method\",\"claim\":{\"source\":\"caption\",\"statement\":\"LLaMa2-7B\"},\"evidence\":{\"source\":\"text\",\"statement\":\"PCA\"}}",
            "{\"letter\":\"D\",\"attribute\":\"type\",\"claim\":{\"source\":\"text and caption\",\"statement\":\"not consistent\"},\"evidence\":{\"source\":\"text and caption\",\"statement\":\"not consistent\"}}"
          ],
          "letters": ["A", "B", "C", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"label\",\"target\":\"figure_3_right\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"update author name\",\"reason\":\"different\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"content\",\"target\":\"figure_3_right\",\"other_involved\":\"label\",\"action\":\"replace\",\"edit_statement\":\"PCA visualization\",\"reason\":\"different\"}",
            "{\"letter\":\"C\",\"attribute\":\"method\",\"target\":\"figure_3_right\",\"other_involved\":\"caption\",\"action\":\"modify\",\"edit_statement\":\"update method\",\"reason\":\"different\"}",
            "{\"letter\":\"D\",\"attribute\":\"subject\",\"target\":\"figure_3_right\",\"other_involved\":\"text, caption\",\"action\":\"modify\",\"edit_statement\":\"align subject, visualization\",\"reason\":\"different\"}"
          ],
          "letters": ["A", "B", "C", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 3"]
    }
  ],
  "Zp51wHvoot": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 7,
          "image_id": "Zp51wHvoot_7_f977b271",
          "bbox": {
            "x": 0.16636902945382254,
            "y": 0.09268195492097701,
            "width": 0.6666666666666666,
            "height": 0.4413793103448276
          }
        }
      ],
      "review_text": "Figure 4: The paper only provides several frames of different text prompts, which is unclear whether these scenes transition smoothly. This contradicts the claim that ACDC can assure the temporal consistency of adjacent video clips.",
      "category": "figure-caption",
      "description": "The caption only treats 3 frames, but each method shows 5 frames in the plot",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the caption of the figure consistent with the content of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the caption of the figure inconsistent with the content of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"frame count\",\"claim\":{\"source\":\"expectation\",\"statement\":\"consistent frame count\"},\"evidence\":{\"source\":\"figure_4\",\"statement\":\"inconsistent frame count\"}}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"frame count\",\"claim\":{\"source\":\"caption\",\"statement\":\"4 frames\"},\"evidence\":{\"source\":\"figure_4\",\"statement\":\"3 frames\"}}",
            "{\"letter\":\"A\",\"attribute\":\"image content\",\"claim\":{\"source\":\"caption\",\"statement\":\"golden retriever\"},\"evidence\":{\"source\":\"figure_4\",\"statement\":\"no golden retriever\"}}",
            "{\"letter\":\"D\",\"attribute\":\"action\",\"claim\":{\"source\":\"expectation\",\"statement\":\"visible action\"},\"evidence\":{\"source\":\"figure_4\",\"statement\":\"no action visible\"}}"
          ],
          "letters": ["B", "C", "A", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"number_of_frames\",\"target\":\"caption\",\"other_involved\":\"figure_4\",\"action\":\"modify\",\"edit_statement\":\"add frame descriptions\",\"reason\":\"incomplete\"}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"number_of_frames\",\"target\":\"figure_4\",\"other_involved\":\"caption\",\"action\":\"modify\",\"edit_statement\":\"add frame output\",\"reason\":\"mismatch\"}",
            "{\"letter\":\"A\",\"attribute\":\"image_content\",\"target\":\"figure_4\",\"other_involved\":\"caption\",\"action\":\"modify\",\"edit_statement\":\"add golden retriever\",\"reason\":\"omission\"}",
            "{\"letter\":\"D\",\"attribute\":\"specific_action\",\"target\":\"figure_4\",\"other_involved\":\"caption\",\"action\":\"add\",\"edit_statement\":\"chasing butterfly\",\"reason\":\"missing\"}"
          ],
          "letters": ["B", "C", "A", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 4"]
    }
  ],
  "ZaudLwn0Hm": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 6,
          "image_id": "ZaudLwn0Hm_6_bb41b700",
          "bbox": {
            "x": 0.1723214104062035,
            "y": 0.09122605597835848,
            "width": 0.6636904761904762,
            "height": 0.20919540229885059
          }
        },
        {
          "type": "image",
          "page": 7,
          "image_id": "ZaudLwn0Hm_7_5fff34ff",
          "bbox": {
            "x": 0.16934521993001303,
            "y": 0.09536399402837645,
            "width": 0.6696428571428571,
            "height": 0.36551724137931035
          }
        }
      ],
      "review_text": "Figure 3: The figure does not showcase the high-performing models from Table 1.",
      "category": "figure-figure",
      "description": "The figure does not use the highest scoring models from the table for comparison of few-shot performance",
      "confidence": 2,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the first figure consistent with the content of the second figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the first figure inconsistent with the content of the second figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "ZaudLwn0Hm_6_bb41b700",
          "correct": "ZaudLwn0Hm_7_5fff34ff",
          "incorrect": [
            "ZaudLwn0Hm_3_image_figure2",
            "ZaudLwn0Hm_1_image_figure1",
            "ZaudLwn0Hm_6_table_table2"
          ],
          "letters": ["B", "C", "D", "A"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"models included\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should include all SOTA methods\"},\"evidence\":{\"source\":\"Table 1 and Figure 3\",\"statement\":\"missing SOTA methods\"}}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"number of tasks\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be the same\"},\"evidence\":{\"source\":\"Table 1 and Figure 3\",\"statement\":\"different task count\"}}",
            "{\"letter\":\"A\",\"attribute\":\"average performance\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"Table 1 and Figure 3\",\"statement\":\"inconsistent\"}}",
            "{\"letter\":\"D\",\"attribute\":\"16-shot values\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be marked\"},\"evidence\":{\"source\":\"Figure 3\",\"statement\":\"missing 16-shot markers\"}}"
          ],
          "letters": ["B", "C", "A", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"models\",\"target\":\"figure_3\",\"other_involved\":\"table_1\",\"action\":\"add\",\"edit_statement\":\"add missing models\",\"reason\":\"omitted\"}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"classification tasks\",\"target\":\"figure_3\",\"other_involved\":\"table_1\",\"action\":\"modify\",\"edit_statement\":\"align task count\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"A\",\"attribute\":\"performance values\",\"target\":\"table_1\",\"other_involved\":\"figure_3\",\"action\":\"modify\",\"edit_statement\":\"align values\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"D\",\"attribute\":\"symbols\",\"target\":\"figure_3\",\"other_involved\":\"table_1\",\"action\":\"add\",\"edit_statement\":\"add missing symbols\",\"reason\":\"missing\"}"
          ],
          "letters": ["B", "C", "A", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 1", "Figure 3"]
    }
  ],
  "ZYUR3HVSAT": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 7,
          "image_id": "ZYUR3HVSAT_7_795b62a5",
          "bbox": {
            "x": 0.3836309342157273,
            "y": 0.2645210704584231,
            "width": 0.5029761904761905,
            "height": 0.40229885057471265
          }
        },
        {
          "type": "text",
          "page": 7,
          "content": "A notable advantage of ISARA lies in its capacity for domain generalization within alignment tasks. To empirically evaluate this aspect, we conducted experiments where ISARA was trained and tested across varying categories. The results are illustrated in Figure 2.",
          "line": 324
        }
      ],
      "review_text": "Figure 2: The label 'ISARIL' is incorrect. It should be 'ISARA'.",
      "category": "figure-text",
      "description": "The figure labels have a typo, where ISARIL should be ISARA.",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {
          "question": "A notable advantage of ISARA lies in its capacity for domain generalization within alignment tasks. To empirically evaluate this aspect, we conducted experiments where ISARA was trained and tested across varying categories. The results are illustrated in Figure 2.",
          "correct": "ZYUR3HVSAT_7_795b62a5",
          "incorrect": [
            "ZYUR3HVSAT_7_table_figure4",
            "ZYUR3HVSAT_7_table_figure3",
            "ZYUR3HVSAT_8_image_figure5"
          ],
          "letters": ["D", "C", "B", "A"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"method name\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"ISARIL\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"methods\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be relevant\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"SFT and ICL-kNN\"}}",
            "{\"letter\":\"D\",\"attribute\":\"x-axis label\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should match\"},\"evidence\":{\"source\":\"text\",\"statement\":\"varying categories\"}}",
            "{\"letter\":\"A\",\"attribute\":\"ISARA\",\"claim\":{\"source\":\"text\",\"statement\":\"domain generalization\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"decreasing trend\"}}"
          ],
          "letters": ["C", "B", "D", "A"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"method name\",\"target\":\"Figure_2_legend\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"change 'ISARIL' to 'ISARA'\",\"reason\":\"typographical error\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"evaluated methods\",\"target\":\"text\",\"other_involved\":\"Figure_2\",\"action\":\"add\",\"edit_statement\":\"mention SFT, ICL-kNN\",\"reason\":\"missing\"}",
            "{\"letter\":\"D\",\"attribute\":\"x-axis label\",\"target\":\"Figure_2\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align with text\",\"reason\":\"mismatch\"}",
            "{\"letter\":\"A\",\"attribute\":\"performance trend\",\"target\":\"Figure_2\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align description\",\"reason\":\"inconsistent\"}"
          ],
          "letters": ["C", "B", "D", "A"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 2"]
    }
  ],
  "ZT33ACedmn": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 6,
          "image_id": "ZT33ACedmn_6_b49ab038",
          "bbox": {
            "x": 0.1961309342157273,
            "y": 0.09716474160380748,
            "width": 0.6160714285714286,
            "height": 0.2206896551724138
          }
        }
      ],
      "review_text": "Figure 3: What is the final approach that you choose? I assume the right panel. If this is the case, I'd add a more verbose caption to the Figure to clarify this.",
      "category": "figure-only",
      "description": "The figure shows two similar, but separate frameworks. It is unclear which one was picked for the paper",
      "confidence": 2,
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the figure that is consistent with a different part of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the figure that is inconsistent with a different part of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"model frameworks\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should clarify main framework\"},\"evidence\":{\"source\":\"figure_3\",\"statement\":\"does not clarify main framework\"}}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"arrows from Tokenizer\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"figure_3\",\"statement\":\"are different\"}}",
            "{\"letter\":\"B\",\"attribute\":\"ABBA Decompression\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"figure_3\",\"statement\":\"is different\"}}",
            "{\"letter\":\"D\",\"attribute\":\"input data\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be the same\"},\"evidence\":{\"source\":\"figure_3\",\"statement\":\"is different\"}}"
          ],
          "letters": ["A", "C", "B", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"model frameworks\",\"target\":\"figure_3\",\"other_involved\":null,\"action\":\"clarify\",\"edit_statement\":\"main subject\",\"reason\":\"unclear\"}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"arrows\",\"target\":\"figure_3\",\"other_involved\":\"Tokenizer, QLoRa\",\"action\":\"modify\",\"edit_statement\":\"number\",\"reason\":\"different\"}",
            "{\"letter\":\"B\",\"attribute\":\"ABBA Decompression\",\"target\":\"figure_3\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"inconsistency\",\"reason\":\"incompatible\"}",
            "{\"letter\":\"D\",\"attribute\":\"input data\",\"target\":\"figure_3\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"consistency\",\"reason\":\"different\"}"
          ],
          "letters": ["A", "C", "B", "D"]
        }
      },
      "severity": 1,
      "visual_elements": ["Figure 3"]
    }
  ],
  "ZMtq9pYw5e": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 8,
          "image_id": "ZMtq9pYw5e_8_2e8d8376",
          "bbox": {
            "x": 0.1723214104062035,
            "y": 0.22773946126302083,
            "width": 0.6636904761904762,
            "height": 0.2689655172413793
          }
        },
        {
          "type": "text",
          "page": 7,
          "content": "We conduct our experiments on the graph reasoning tasks proposed in GraphInstruct (Chen et al., 2024a). This dataset contains nine graph reasoning problems with different time complexity, ranging from linear and polynomial complexity to NP-complete.",
          "line": 324
        }
      ],
      "review_text": "Table 3: The paper mentions that GraphInstruct dataset has 9 problem types, including maximum flow, hamilton path, and subgraph matching. However, experimental results are only provided for 6 types of problems, contradicting the claim of solving complex graph reasoning problems.",
      "category": "table-text",
      "description": "The dataset contains 9 classes, but the results are only shown for 6 of them",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the table consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the table inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {
          "question": "We conduct our experiments on the graph reasoning tasks proposed in GraphInstruct (Chen et al., 2024a). This dataset contains nine graph reasoning problems with different time complexity, ranging from linear and polynomial complexity to NP-complete.",
          "correct": "ZMtq9pYw5e_8_2e8d8376",
          "incorrect": [
            "ZMtq9pYw5e_7_table_table2",
            "ZMtq9pYw5e_17_table_table3",
            "ZMtq9pYw5e_6_image_figure4"
          ],
          "letters": ["C", "A", "D", "B"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"graph reasoning tasks\",\"claim\":{\"source\":\"text\",\"statement\":\"9 tasks\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"6 tasks\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"graph reasoning tasks\",\"claim\":{\"source\":\"text excerpt\",\"statement\":\"fewer tasks\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"more tasks\"}}",
            "{\"letter\":\"B\",\"attribute\":\"graph reasoning tasks\",\"claim\":{\"source\":\"text\",\"statement\":\"9 tasks\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"7 tasks\"}}",
            "{\"letter\":\"C\",\"attribute\":\"task classification\",\"claim\":{\"source\":\"expectation\",\"statement\":\"consistent classification\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"inconsistent classification\"}}"
          ],
          "letters": ["D", "A", "B", "C"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"tasks\",\"target\":\"table_1\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align performance results\",\"reason\":\"six instead of nine\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"tasks\",\"target\":\"table_1\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align task count\",\"reason\":\"more\"}",
            "{\"letter\":\"B\",\"attribute\":\"tasks\",\"target\":\"table_1\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align performance results\",\"reason\":\"seven instead of nine\"}",
            "{\"letter\":\"C\",\"attribute\":\"title and sections\",\"target\":\"table_1\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align task categories\",\"reason\":\"contradictory\"}"
          ],
          "letters": ["D", "A", "B", "C"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 1"]
    }
  ],
  "YryL3QIWWc": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 8,
          "image_id": "YryL3QIWWc_8_22f84f62",
          "bbox": {
            "x": 0.16934521993001303,
            "y": 0.09681992804867098,
            "width": 0.6726190476190476,
            "height": 0.2597701149425287
          }
        }
      ],
      "review_text": "Figure 3, Figure 4, Figure 5, Figure 8, Figure 9: Incorrect labeling of either y or x-axis.",
      "category": "figure-only",
      "description": "The headline says Delta1 Error vs Number of Forward Passes, but the axis labels show Relative Error vs MACs",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the figure that is consistent with a different part of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the figure that is inconsistent with a different part of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"x-axis label\",\"claim\":{\"source\":\"caption\",\"statement\":\"Number of Forward Passes\"},\"evidence\":{\"source\":\"Figure 9\",\"statement\":\"MACs\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"y-axis label\",\"claim\":{\"source\":\"expectation\",\"statement\":\"consistent\"},\"evidence\":{\"source\":\"Figure 9\",\"statement\":\"inconsistent\"}}",
            "{\"letter\":\"D\",\"attribute\":\"legend models\",\"claim\":{\"source\":\"text\",\"statement\":\"aN with N\u2208[1,2,5,10,15,20]\"},\"evidence\":{\"source\":\"Figure 9\",\"statement\":\"shows a2, a4, a6\"}}",
            "{\"letter\":\"B\",\"attribute\":\"legend color\",\"claim\":{\"source\":\"expectation\",\"statement\":\"match graph\"},\"evidence\":{\"source\":\"Figure 9\",\"statement\":\"does not match\"}}"
          ],
          "letters": ["C", "A", "D", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"x-axis label\",\"target\":\"figure_9a\",\"other_involved\":\"figure_9b,figure_9 title\",\"action\":\"modify\",\"edit_statement\":\"refer forward passes\",\"reason\":\"mismatch\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"y-axis label\",\"target\":\"figure_9a\",\"other_involved\":\"figure_9b,figure_9 title\",\"action\":\"modify\",\"edit_statement\":\"update\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"D\",\"attribute\":\"legend\",\"target\":\"figure_9a\",\"other_involved\":\"figure_9b,paper text\",\"action\":\"modify\",\"edit_statement\":\"reflect cited models\",\"reason\":\"mismatch\"}",
            "{\"letter\":\"B\",\"attribute\":\"legend color\",\"target\":\"figure_9a\",\"other_involved\":\"figure_9b\",\"action\":\"modify\",\"edit_statement\":\"match plot colors\",\"reason\":\"mismatch\"}"
          ],
          "letters": ["C", "A", "D", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 9"]
    },
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 6,
          "image_id": "YryL3QIWWc_6_f0038e6e",
          "bbox": {
            "x": 0.16934521993001303,
            "y": 0.27992336229346265,
            "width": 0.6726190476190476,
            "height": 0.2988505747126437
          }
        }
      ],
      "review_text": "Figure 6: Legend is incorrect. Caption compares results of upcycling with a5 and a6, which are not included in the plot.",
      "category": "figure-caption",
      "description": "Caption mentions comparison with models A5 and A6, but they are not present in the figure",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the caption of the figure consistent with the content of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the caption of the figure inconsistent with the content of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"models\",\"claim\":{\"source\":\"caption\",\"statement\":\"compare A5 A6\"},\"evidence\":{\"source\":\"figure_6\",\"statement\":\"A5 A6 not listed\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"x-axis\",\"claim\":{\"source\":\"caption\",\"statement\":\"15K iterations\"},\"evidence\":{\"source\":\"plot\",\"statement\":\"MACs not iterations\"}}",
            "{\"letter\":\"A\",\"attribute\":\"L(C)\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should differ\"},\"evidence\":{\"source\":\"figure\",\"statement\":\"is same\"}}",
            "{\"letter\":\"D\",\"attribute\":\"trend\",\"claim\":{\"source\":\"caption\",\"statement\":\"clear scaling law\"},\"evidence\":{\"source\":\"plot\",\"statement\":\"no clear trend\"}}"
          ],
          "letters": ["C", "B", "A", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"models A5 and A6\",\"target\":\"figure_6_caption\",\"other_involved\":\"figure_6_legend, figure_6_plot\",\"action\":\"add\",\"edit_statement\":\"add to plot\",\"reason\":\"not present\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"iteration count\",\"target\":\"figure_6_caption\",\"other_involved\":\"figure_6\",\"action\":\"modify\",\"edit_statement\":\"add iteration x-axis\",\"reason\":\"missing info\"}",
            "{\"letter\":\"A\",\"attribute\":\"L(C) value\",\"target\":\"figure_6\",\"other_involved\":\"figure_6\",\"action\":\"modify\",\"edit_statement\":\"update value\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"D\",\"attribute\":\"scaling law\",\"target\":\"figure_6_caption\",\"other_involved\":\"figure_6_data\",\"action\":\"modify\",\"edit_statement\":\"rephrase description\",\"reason\":\"no trend\"}"
          ],
          "letters": ["C", "B", "A", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 6"]
    }
  ],
  "YQjdNC0NkW": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 5,
          "image_id": "YQjdNC0NkW_5_b5e64f6b",
          "bbox": {
            "x": 0.17529760088239396,
            "y": 0.45383146439475575,
            "width": 0.6607142857142857,
            "height": 0.14022988505747128
          }
        },
        {
          "type": "image",
          "page": 6,
          "image_id": "YQjdNC0NkW_6_88c21d25",
          "bbox": {
            "x": 0.1723214104062035,
            "y": 0.09314175309806035,
            "width": 0.6666666666666666,
            "height": 0.14482758620689656
          }
        }
      ],
      "review_text": "Table 1 & Table 2: For the same model and CLIP4CLIP encoder, the AV-Align metrics is reported as 0.243 in Table 1 but 0.225 in Table 2, while other metrics remain the same.",
      "category": "table-table",
      "description": "The results for Clip4Clip are the same across the two tables except for the AV-Align",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the first table consistent with the content of the second table?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the first table inconsistent with the content of the second table?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "YQjdNC0NkW_5_b5e64f6b",
          "correct": "YQjdNC0NkW_6_88c21d25",
          "incorrect": [
            "YQjdNC0NkW_6_table_table4",
            "YQjdNC0NkW_6_table_table3",
            "YQjdNC0NkW_7_table_table5"
          ],
          "letters": ["B", "D", "C", "A"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"AV-Align score\",\"claim\":{\"source\":\"table_1_and_table_2\",\"statement\":\"is different\"},\"evidence\":{\"source\":\"table_1_and_table_2\",\"statement\":\"other metrics remain consistent\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"FAD and IS scores\",\"claim\":{\"source\":\"table_1_and_table_2\",\"statement\":\"are inconsistent\"},\"evidence\":{\"source\":\"table_1_and_table_2\",\"statement\":\"AV-Align remains the same\"}}",
            "{\"letter\":\"B\",\"attribute\":\"performance metrics\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should vary\"},\"evidence\":{\"source\":\"table_1_and_table_2\",\"statement\":\"are identical\"}}",
            "{\"letter\":\"D\",\"attribute\":\"CLAP and CAVP scores\",\"claim\":{\"source\":\"table_1_and_table_2\",\"statement\":\"vary\"},\"evidence\":{\"source\":\"table_1_and_table_2\",\"statement\":\"AV-Align is consistent\"}}"
          ],
          "letters": ["C", "A", "B", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"AV-Align score\",\"target\":\"Table_1\",\"other_involved\":\"Table_2\",\"action\":\"modify\",\"edit_statement\":\"align value\",\"reason\":\"different\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"FAD and IS scores\",\"target\":\"Table_1\",\"other_involved\":\"Table_2\",\"action\":\"modify\",\"edit_statement\":\"align values\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"B\",\"attribute\":\"performance metrics\",\"target\":\"Table_1, Table_2\",\"other_involved\":null,\"action\":\"remove\",\"edit_statement\":\"inconsistency\",\"reason\":\"not present\"}",
            "{\"letter\":\"D\",\"attribute\":\"CLAP and CAVP scores\",\"target\":\"Table_1, Table_2\",\"other_involved\":null,\"action\":\"remove\",\"edit_statement\":\"inconsistency\",\"reason\":\"not present\"}"
          ],
          "letters": ["C", "A", "B", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 1", "Table 2"]
    }
  ],
  "Y89o3LAEHX": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 3,
          "image_id": "Y89o3LAEHX_3_e9e873ce",
          "bbox": {
            "x": 0.16636902945382254,
            "y": 0.25601533034752155,
            "width": 0.6666666666666666,
            "height": 0.2482758620689655
          }
        },
        {
          "type": "text",
          "page": 2,
          "content": "For datasets, our experiments were conducted on four commonly used benchmark datasets: ETTh1, ETTh2 from ETTh (Zhou et al., 2021a), and ETTm1, ETTm2 from ETTm (Zhou et al., 2021b). All datasets are split into training, validation, and testing sets with the 7:1:2 ratio.",
          "line": 100
        }
      ],
      "review_text": "Figures 1 and 2: These figures represent only three datasets out of the total used in the study. The authors should explain why this subset was chosen.",
      "category": "figure-text",
      "description": "The text states 4 datasets used, but the figure only shows results for three",
      "confidence": 2,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "For datasets, our experiments were conducted on four commonly used benchmark datasets: ETTh1, ETTh2 from ETTh (Zhou et al., 2021a), and ETTm1, ETTm2 from ETTm (Zhou et al., 2021b). All datasets are split into training, validation, and testing sets with the 7:1:2 ratio.",
          "correct": "Y89o3LAEHX_3_e9e873ce",
          "incorrect": [
            "Y89o3LAEHX_4_table_table2",
            "Y89o3LAEHX_5_table_table3",
            "Y89o3LAEHX_6_table_table4"
          ],
          "letters": ["A", "D", "C", "B"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"number of datasets\",\"claim\":{\"source\":\"text\",\"statement\":\"four datasets\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"three datasets\"}}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"number of datasets\",\"claim\":{\"source\":\"text\",\"statement\":\"has amount n \"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"shows more than n\"}}",
            "{\"letter\":\"B\",\"attribute\":\"number of datasets\",\"claim\":{\"source\":\"text\",\"statement\":\"three datasets\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"four datasets\"}}",
            "{\"letter\":\"D\",\"attribute\":\"dataset names\",\"claim\":{\"source\":\"text\",\"statement\":\"ETTh1, ETTh2, ETTm1, ETTm2\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"different names\"}}"
          ],
          "letters": ["A", "C", "B", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"datasets\",\"target\":\"table_1\",\"other_involved\":\"text\",\"action\":\"add\",\"edit_statement\":\"missing dataset results\",\"reason\":\"unspecified\"}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"datasets\",\"target\":\"text\",\"other_involved\":\"table_1\",\"action\":\"add\",\"edit_statement\":\"missing dataset names\",\"reason\":\"incomplete\"}",
            "{\"letter\":\"B\",\"attribute\":\"datasets\",\"target\":\"text\",\"other_involved\":\"table_1\",\"action\":\"modify\",\"edit_statement\":\"update dataset count\",\"reason\":\"mismatch\"}",
            "{\"letter\":\"D\",\"attribute\":\"dataset names\",\"target\":\"table_1\",\"other_involved\":\"text\",\"action\":\"replace\",\"edit_statement\":\"dataset names\",\"reason\":\"mismatch\"}"
          ],
          "letters": ["A", "C", "B", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 1"]
    }
  ],
  "Y0P6cOZzNm": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 7,
          "image_id": "Y0P6cOZzNm_7_29d243a8",
          "bbox": {
            "x": 0.16934521993001303,
            "y": 0.22704979907507183,
            "width": 0.6755952380952381,
            "height": 0.14942528735632185
          }
        }
      ],
      "review_text": "Other concerns and questions: - In the Stable Diffusion experiments, were seeds fixed between the baseline and your method\u2019s results? The outputs do not appear to come from the same seeds (in contrast to the results in Fig. 5 where they clearly come from the same seed). But I may be wrong. If the results do not come from the same seeds, the authors should revise the figure to display results with fixed seeds, otherwise this may be a very unfair cherry picking.",
      "category": "figure-only",
      "description": "There is a watermark \"shutterstock\" in one of the finetuned images, which raises the question if the image was actually generated",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the figure that is consistent with a different part of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the figure that is inconsistent with a different part of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"watermark\",\"claim\":{\"source\":\"expectation\",\"statement\":\"shouldn't be present\"},\"evidence\":{\"source\":\"figure_5\",\"statement\":\"present after finetuning\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"indoor scenes\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be present\"},\"evidence\":{\"source\":\"figure_5\",\"statement\":\"absent before finetuning\"}}",
            "{\"letter\":\"C\",\"attribute\":\"noise and blur\",\"claim\":{\"source\":\"expectation\",\"statement\":\"shouldn't increase\"},\"evidence\":{\"source\":\"figure_5\",\"statement\":\"increased after finetuning\"}}",
            "{\"letter\":\"D\",\"attribute\":\"labels\",\"claim\":{\"source\":\"expectation\",\"statement\":\"shouldn't be swapped\"},\"evidence\":{\"source\":\"figure_5\",\"statement\":\"swapped\"}}"
          ],
          "letters": ["B", "A", "C", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"watermark\",\"target\":\"figure_5\",\"other_involved\":null,\"action\":\"remove\",\"edit_statement\":\"remove watermark\",\"reason\":\"presence\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"indoor scenes\",\"target\":\"figure_5\",\"other_involved\":null,\"action\":\"add\",\"edit_statement\":\"add indoor scenes\",\"reason\":\"lacking\"}",
            "{\"letter\":\"C\",\"attribute\":\"image noise and blur\",\"target\":\"figure_5\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"reduce noise and blur\",\"reason\":\"increase\"}",
            "{\"letter\":\"D\",\"attribute\":\"text labels\",\"target\":\"figure_5\",\"other_involved\":null,\"action\":\"replace\",\"edit_statement\":\"swap labels\",\"reason\":\"swapped\"}"
          ],
          "letters": ["B", "A", "C", "D"]
        }
      },
      "severity": 1,
      "visual_elements": ["Figure 5"]
    }
  ],
  "Xq12wsoNux": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 6,
          "image_id": "Xq12wsoNux_6_e5ab5acb",
          "bbox": {
            "x": 0.23779760088239396,
            "y": 0.6641762919809626,
            "width": 0.5982142857142857,
            "height": 0.03908045977011495
          }
        },
        {
          "type": "text",
          "page": 6,
          "content": "The time efficiency of DP-ZeRO consists of two parts: the local computation (including forward and\nbackward propagation) and the global communication (including intra-node and inter-node commu-\nnication). Given that the only difference between DP-ZeRO and ZeRO is the back-propagation,\nwe claim that DP-ZeRO could enjoy high efficiency on-par with the standard ZeRO when (I) DP\nback-propagation exhibits a time efficiency comparable to the standard, similar to the single GPU\ntraining, and/or (II) the time efficiency of the parts other than back-propagation is not insignificant.\nWe give the time of each part of DP-ZeRO in equation 3 to illustrate our claim.",
          "line": 301
        }
      ],
      "review_text": "Equation (3) lines 308-309: It seems that the numerator and denominator of the fraction are reversed.",
      "category": "equation-text",
      "description": "The numerator and denominator are flipped",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the equation consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the equation inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "The time efficiency of DP-ZeRO consists of two parts: the local computation (including forward and\nbackward propagation) and the global communication (including intra-node and inter-node commu-\nnication). Given that the only difference between DP-ZeRO and ZeRO is the back-propagation,\nwe claim that DP-ZeRO could enjoy high efficiency on-par with the standard ZeRO when (I) DP\nback-propagation exhibits a time efficiency comparable to the standard, similar to the single GPU\ntraining, and/or (II) the time efficiency of the parts other than back-propagation is not insignificant.\nWe give the time of each part of DP-ZeRO in equation 3 to illustrate our claim.",
          "correct": "Xq12wsoNux_6_e5ab5acb",
          "incorrect": [
            "Xq12wsoNux_5_interline-equation_equation38",
            "Xq12wsoNux_4_interline-equation_equation42",
            "Xq12wsoNux_4_interline-equation_equation28"
          ],
          "letters": ["A", "C", "D", "B"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"equation terms\",\"claim\":{\"source\":\"expectation\",\"statement\":\"terms should be consistent\"},\"evidence\":{\"source\":\"(3)\",\"statement\":\"terms are interchanged\"}}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"equation terms\",\"claim\":{\"source\":\"text\",\"statement\":\"should not include forward prop and communication\"},\"evidence\":{\"source\":\"(3)\",\"statement\":\"includes forward prop and communication\"}}",
            "{\"letter\":\"B\",\"attribute\":\"speed calculation\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should include intra-node and inter-node terms\"},\"evidence\":{\"source\":\"(3)\",\"statement\":\"only general term\"}}",
            "{\"letter\":\"C\",\"attribute\":\"speed ratio\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should have backward prop in numerator\"},\"evidence\":{\"source\":\"(3)\",\"statement\":\"has different terms\"}}"
          ],
          "letters": ["A", "D", "B", "C"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"right side terms\",\"target\":\"equation_3\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"exchange numerator denominator\",\"reason\":\"interchanged\"}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"terms\",\"target\":\"equation_3\",\"other_involved\":\"text\",\"action\":\"remove\",\"edit_statement\":\"forward communication terms\",\"reason\":\"unnecessary\"}",
            "{\"letter\":\"B\",\"attribute\":\"equation terms\",\"target\":\"equation_3\",\"other_involved\":\"text\",\"action\":\"add\",\"edit_statement\":\"detail communication terms\",\"reason\":\"incomplete\"}",
            "{\"letter\":\"C\",\"attribute\":\"left side terms\",\"target\":\"equation_3\",\"other_involved\":null,\"action\":\"reposition\",\"edit_statement\":\"exchange numerator denominator\",\"reason\":\"interchanged\"}"
          ],
          "letters": ["A", "D", "B", "C"]
        }
      },
      "severity": 0,
      "visual_elements": ["(3)"]
    }
  ],
  "XWPp9FJ0uJ": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 5,
          "image_id": "XWPp9FJ0uJ_5_5a6bb95f",
          "bbox": {
            "x": 0.4401785532633464,
            "y": 0.7187739931303879,
            "width": 0.39285714285714285,
            "height": 0.02528735632183908
          }
        },
        {
          "type": "image",
          "page": 5,
          "image_id": "XWPp9FJ0uJ_5_75496f3b",
          "bbox": {
            "x": 0.16934521993001303,
            "y": 0.09463606209590518,
            "width": 0.6696428571428571,
            "height": 0.22298850574712642
          }
        }
      ],
      "review_text": "Equation 3: The order of the combined embedding starts with z, while in Figure 1, it starts with z_cls.",
      "category": "figure-equation",
      "description": "The equation starts with the vector z and finally the z_cls, but in the figure, it starts with z_cls and then vector z",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the content of the equation?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the content of the equation?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "XWPp9FJ0uJ_5_5a6bb95f",
          "correct": "XWPp9FJ0uJ_5_75496f3b",
          "incorrect": [
            "XWPp9FJ0uJ_6_image_figure2",
            "XWPp9FJ0uJ_9_image_figure4",
            "XWPp9FJ0uJ_15_image_figure5"
          ],
          "letters": ["B", "C", "A", "D"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"input order\",\"claim\":{\"source\":\"equation_3\",\"statement\":\"z before z_cls\"},\"evidence\":{\"source\":\"figure_1\",\"statement\":\"z_cls before z\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"z components\",\"claim\":{\"source\":\"figure_1\",\"statement\":\"multiple z components\"},\"evidence\":{\"source\":\"equation_3\",\"statement\":\"single z\"}}",
            "{\"letter\":\"C\",\"attribute\":\"f_FI correspondence\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be clear\"},\"evidence\":{\"source\":\"figure_1\",\"statement\":\"unclear\"}}",
            "{\"letter\":\"D\",\"attribute\":\"h output\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be final output\"},\"evidence\":{\"source\":\"figure_1\",\"statement\":\"not final output\"}}"
          ],
          "letters": ["A", "B", "C", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"input order\",\"target\":\"equation_3\",\"other_involved\":\"figure_1\",\"action\":\"modify\",\"edit_statement\":\"align input order\",\"reason\":\"different\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"components of z\",\"target\":\"equation_3\",\"other_involved\":\"figure_1\",\"action\":\"add\",\"edit_statement\":\"add z components\",\"reason\":\"missing\"}",
            "{\"letter\":\"C\",\"attribute\":\"function label f_FI\",\"target\":\"figure_1\",\"other_involved\":\"equation_3\",\"action\":\"add\",\"edit_statement\":\"add function label\",\"reason\":\"unclear\"}",
            "{\"letter\":\"D\",\"attribute\":\"output h\",\"target\":\"figure_1\",\"other_involved\":\"equation_3\",\"action\":\"modify\",\"edit_statement\":\"align output representation\",\"reason\":\"inconsistent\"}"
          ],
          "letters": ["A", "B", "C", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["(3)", "Figure 1"]
    }
  ],
  "Wl5HGuFYVp": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 7,
          "image_id": "Wl5HGuFYVp_7_cf232e8e",
          "bbox": {
            "x": 0.1723214104062035,
            "y": 0.4861302869073276,
            "width": 0.6666666666666666,
            "height": 0.1425287356321839
          }
        }
      ],
      "review_text": "Table 7: There are two 'Method without our method' entries, which is inconsistent and unclear.",
      "category": "table-only",
      "description": "The table shows two times method without our method, while the second time it should be \"method with our method\"",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the table that is consistent with a different part of the table?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the table that is inconsistent with a different part of the table?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"header\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be 'Method with our method'\"},\"evidence\":{\"source\":\"Table 7\",\"statement\":\"'Method without our method' appears twice\"}}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"recovery results\",\"claim\":{\"source\":\"caption\",\"statement\":\"smaller the better\"},\"evidence\":{\"source\":\"Table 7\",\"statement\":\"highlighted results are larger\"}}",
            "{\"letter\":\"D\",\"attribute\":\"visual dividers\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should have dividers\"},\"evidence\":{\"source\":\"Table 7\",\"statement\":\"lacks dividers\"}}",
            "{\"letter\":\"A\",\"attribute\":\"datasets\",\"claim\":{\"source\":\"caption\",\"statement\":\"two real-world datasets\"},\"evidence\":{\"source\":\"table\",\"statement\":\"synthetic datasets\"}}"
          ],
          "letters": ["B", "C", "D", "A"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"header 'Method without our method'\",\"target\":\"table_7_header\",\"other_involved\":null,\"action\":\"replace\",\"edit_statement\":\"header 'Method with our method'\",\"reason\":\"comparison missing\"}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"recovery results\",\"target\":\"table_7\",\"other_involved\":\"table_7_caption\",\"action\":\"modify\",\"edit_statement\":\"highlight results\",\"reason\":\"best value mismatch\"}",
            "{\"letter\":\"D\",\"attribute\":\"dividers\",\"target\":\"table_7\",\"other_involved\":\"Chebyshev dataset, Clark dataset\",\"action\":\"add\",\"edit_statement\":\"dataset dividers\",\"reason\":\"distinguish datasets\"}",
            "{\"letter\":\"A\",\"attribute\":\"datasets\",\"target\":\"table_7_caption\",\"other_involved\":\"table_7\",\"action\":\"modify\",\"edit_statement\":\"update dataset description\",\"reason\":\"synthetic-real mismatch\"}"
          ],
          "letters": ["B", "C", "D", "A"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 7"]
    }
  ],
  "VfvxZLXYgd": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 10,
          "image_id": "VfvxZLXYgd_10_cf30813a",
          "bbox": {
            "x": 0.5175595056442986,
            "y": 0.41302686669360633,
            "width": 0.32142857142857145,
            "height": 0.2045977011494253
          }
        },
        {
          "type": "text",
          "page": 10,
          "content": "Fig. 8 demonstrates the computational efficiency in\nterms of the amount of GPU memory used and up-\ndate time in each step, respectively. Firstly, POGM\nuses much less GPU memory than Fishr and ERM.",
          "line": -1
        }
      ],
      "review_text": "Figure 8: The figure shows that POGM uses more GPU memory than ERM, which contradicts the text stating that POGM uses less GPU memory than ERM.",
      "category": "figure-text",
      "description": "Text says POGM uses less GPU memory than ERM, but figure shows otherwise",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "Fig. 8 demonstrates the computational efficiency in\nterms of the amount of GPU memory used and up-\ndate time in each step, respectively. Firstly, POGM\nuses much less GPU memory than Fishr and ERM.",
          "correct": "VfvxZLXYgd_10_cf30813a",
          "incorrect": [
            "VfvxZLXYgd_8_image_figure7",
            "VfvxZLXYgd_8_image_figure6",
            "VfvxZLXYgd_8_image_figure5"
          ],
          "letters": ["C", "D", "B", "A"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"GPU memory usage comparison between POGM and ERM\",\"claim\":{\"source\":\"text\",\"statement\":\"POGM uses less GPU memory than ERM\"},\"evidence\":{\"source\":\"Figure 8\",\"statement\":\"POGM uses more GPU memory than ERM\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"GPU memory usage comparison between POGM and Fishr\",\"claim\":{\"source\":\"text\",\"statement\":\"POGM uses less GPU memory than Fishr\"},\"evidence\":{\"source\":\"Figure 8\",\"statement\":\"POGM uses more GPU memory than Fishr\"}}",
            "{\"letter\":\"C\",\"attribute\":\"time per iteration comparison\",\"claim\":{\"source\":\"expectation\",\"statement\":\"POGM is computationally efficient\"},\"evidence\":{\"source\":\"Figure 8\",\"statement\":\"ERM has lowest time per iteration\"}}",
            "{\"letter\":\"D\",\"attribute\":\"GPU memory usage comparison between POGM and Fishr\",\"claim\":{\"source\":\"text\",\"statement\":\"POGM uses less GPU memory than Fishr\"},\"evidence\":{\"source\":\"expectation\",\"statement\":\"Fish uses less GPU memory than POGM\"}}"
          ],
          "letters": ["A", "B", "C", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"GPU memory usage\",\"target\":\"text\",\"other_involved\":\"Figure_8\",\"action\":\"modify\",\"edit_statement\":\"align GPU memory\",\"reason\":\"contradict\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"GPU memory usage\",\"target\":\"text\",\"other_involved\":\"Figure_8\",\"action\":\"modify\",\"edit_statement\":\"align GPU memory\",\"reason\":\"contradict\"}",
            "{\"letter\":\"C\",\"attribute\":\"time per iteration\",\"target\":\"text\",\"other_involved\":\"Figure_8\",\"action\":\"modify\",\"edit_statement\":\"align computational efficiency\",\"reason\":\"contradict\"}",
            "{\"letter\":\"D\",\"attribute\":\"GPU memory usage\",\"target\":\"text\",\"other_involved\":\"POGM, Fish\",\"action\":\"modify\",\"edit_statement\":\"align GPU memory\",\"reason\":\"contradict\"}"
          ],
          "letters": ["A", "B", "C", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 8"]
    }
  ],
  "VSfvQxPPB0": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 9,
          "image_id": "VSfvQxPPB0_9_03f47547",
          "bbox": {
            "x": 0.19910712469191777,
            "y": 0.2315325506802263,
            "width": 0.6130952380952381,
            "height": 0.15862068965517243
          }
        }
      ],
      "review_text": "Table 6: The third column shows evaluation of the critic without using the critic, which is a contradiction.",
      "category": "table-only",
      "description": "Critic w/o critic does not make sense",
      "confidence": 2,
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the table that is consistent with a different part of the table?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the table that is inconsistent with a different part of the table?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"column heading\",\"claim\":{\"source\":\"expectation\",\"statement\":\"evaluate critic's performance with critic\"},\"evidence\":{\"source\":\"Table 6\",\"statement\":\"w/o critic column\"}}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"column presence\",\"claim\":{\"source\":\"expectation\",\"statement\":\"not duplicated\"},\"evidence\":{\"source\":\"Table 6\",\"statement\":\"w/o HR duplicated\"}}",
            "{\"letter\":\"D\",\"attribute\":\"SELU results\",\"claim\":{\"source\":\"expectation\",\"statement\":\"removing elements increases performance\"},\"evidence\":{\"source\":\"Table 6\",\"statement\":\"SELU highest results\"}}",
            "{\"letter\":\"B\",\"attribute\":\"metric format\",\"claim\":{\"source\":\"expectation\",\"statement\":\"not percentage\"},\"evidence\":{\"source\":\"Table 6\",\"statement\":\"percentage format\"}}"
          ],
          "letters": ["A", "C", "D", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"column 'w/o critic'\",\"target\":\"table_6\",\"other_involved\":\"'Critic (Success Detection Accuracy)' section\",\"action\":\"remove\",\"edit_statement\":\"superfluous column\",\"reason\":\"contradictory\"}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"column 'w/o HR'\",\"target\":\"table_6\",\"other_involved\":\"'Critic' and 'Actor' sections\",\"action\":\"remove\",\"edit_statement\":\"duplicate column\",\"reason\":\"redundant\"}",
            "{\"letter\":\"D\",\"attribute\":\"'SELU' results\",\"target\":\"table_6\",\"other_involved\":\"'Critic' and 'Actor' sections\",\"action\":\"modify\",\"edit_statement\":\"update values\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"B\",\"attribute\":\"metrics format\",\"target\":\"table_6\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"change to values\",\"reason\":\"incorrect\"}"
          ],
          "letters": ["A", "C", "D", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 6"]
    }
  ],
  "VSVQljJU5N": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 8,
          "image_id": "VSVQljJU5N_8_12f6b672",
          "bbox": {
            "x": 0.16934521993001303,
            "y": 0.23279692551185346,
            "width": 0.6726190476190476,
            "height": 0.2413793103448276
          }
        }
      ],
      "review_text": "Table 1: UltraGCN's R@10 is shown to be higher than UltraGCN's R@20, which is unusual as R@10 should typically be lower than R@20.",
      "category": "table-only",
      "description": "R@20 is lower than R@10 for UltraGCN on Yahoo, which is unusual",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the table that is consistent with a different part of the table?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the table that is inconsistent with a different part of the table?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"R@20 value\",\"claim\":{\"source\":\"expectation\",\"statement\":\"higher than R@10\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"lower than R@10\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"table consistency\",\"claim\":{\"source\":\"expectation\",\"statement\":\"show Top-10 and Top-20\"},\"evidence\":{\"source\":\"table\",\"statement\":\"does not show for all\"}}",
            "{\"letter\":\"C\",\"attribute\":\"method name\",\"claim\":{\"source\":\"expectation\",\"statement\":\"implies superiority\"},\"evidence\":{\"source\":\"results\",\"statement\":\"not always superior\"}}",
            "{\"letter\":\"A\",\"attribute\":\"dataset usage\",\"claim\":{\"source\":\"expectation\",\"statement\":\"used for all tests\"},\"evidence\":{\"source\":\"text\",\"statement\":\"not used for R@10 and R@20\"}}"
          ],
          "letters": ["D", "B", "C", "A"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"R@20 value\",\"target\":\"table_1\",\"other_involved\":\"UltraGCN,Yahoo,R@10 value\",\"action\":\"modify\",\"edit_statement\":\"adjust R@20 value\",\"reason\":\"lower than R@10\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"performance\",\"target\":\"table_1\",\"other_involved\":\"Top-10,Top-20\",\"action\":\"modify\",\"edit_statement\":\"show all measures\",\"reason\":\"not consistent\"}",
            "{\"letter\":\"C\",\"attribute\":\"UltraGCN\",\"target\":\"paper\",\"other_involved\":\"results\",\"action\":\"modify\",\"edit_statement\":\"align claim results\",\"reason\":\"not best\"}",
            "{\"letter\":\"A\",\"attribute\":\"datasets\",\"target\":\"table_1\",\"other_involved\":\"NDCG@10,R@10,R@20\",\"action\":\"modify\",\"edit_statement\":\"align dataset usage\",\"reason\":\"only used\"}"
          ],
          "letters": ["D", "B", "C", "A"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 1"]
    }
  ],
  "UwbX8KOZgK": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 9,
          "image_id": "UwbX8KOZgK_9_ee7553c9",
          "bbox": {
            "x": 0.16339283897763207,
            "y": 0.22567050977684988,
            "width": 0.6785714285714286,
            "height": 0.11264367816091955
          }
        },
        {
          "type": "text",
          "page": 1,
          "content": "Training large vision-language models requires extensive, detailed image-text pairs. Existing web-scraped datasets, however, are noisy and lack detailed image descriptions. To bridge this gap, we introduce PixelProse, a comprehensive dataset of over 16M (million) synthetically generated captions, leveraging cutting-edge vision-language models for detailed and accurate descriptions",
          "line": 11
        }
      ],
      "review_text": "Table 6: The use of only 3M images from the CC12M subset of PixelProse seems inconsistent with the proposed PixelProse-16M dataset, which is supposed to be the main contribution of the paper.",
      "category": "table-text",
      "description": "The main contribution of the paper is a 16M dataset, but it is only evaluated in the table with 3M data points",
      "confidence": 1,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the table consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the table inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {
          "question": "Training large vision-language models requires extensive, detailed image-text pairs. Existing web-scraped datasets, however, are noisy and lack detailed image descriptions. To bridge this gap, we introduce PixelProse, a comprehensive dataset of over 16M (million) synthetically generated captions, leveraging cutting-edge vision-language models for detailed and accurate descriptions",
          "correct": "UwbX8KOZgK_9_ee7553c9",
          "incorrect": [
            "UwbX8KOZgK_8_table_table5",
            "UwbX8KOZgK_5_table_table4",
            "UwbX8KOZgK_4_table_table3"
          ],
          "letters": ["C", "B", "D", "A"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"dataset size\",\"claim\":{\"source\":\"text\",\"statement\":\"16 million data points\"},\"evidence\":{\"source\":\"Table 6\",\"statement\":\"3 million data points\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"dataset purpose\",\"claim\":{\"source\":\"text\",\"statement\":\"pre-training dataset\"},\"evidence\":{\"source\":\"Table 6\",\"statement\":\"evaluated on FineTune Dataset\"}}",
            "{\"letter\":\"A\",\"attribute\":\"dataset accuracy\",\"claim\":{\"source\":\"text\",\"statement\":\"comprehensive and accurate\"},\"evidence\":{\"source\":\"accuracy comparison\",\"statement\":\"lower accuracy\"}}",
            "{\"letter\":\"C\",\"attribute\":\"model training\",\"claim\":{\"source\":\"text\",\"statement\":\"training large vision-language models\"},\"evidence\":{\"source\":\"Table 16\",\"statement\":\"FineTuning non vision-language datasets\"}}"
          ],
          "letters": ["D", "B", "A", "C"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"dataset size\",\"target\":\"table_6\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"add evaluation results\",\"reason\":\"incomplete\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"PixelProse evaluation\",\"target\":\"table_6\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align purpose\",\"reason\":\"contradictory\"}",
            "{\"letter\":\"A\",\"attribute\":\"accuracy\",\"target\":\"text\",\"other_involved\":\"PixelProse Original 3M, PixelProse Ours 3M\",\"action\":\"modify\",\"edit_statement\":\"align claims\",\"reason\":\"contradictory\"}",
            "{\"letter\":\"C\",\"attribute\":\"dataset type\",\"target\":\"table_16\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align consistency\",\"reason\":\"inconsistent\"}"
          ],
          "letters": ["D", "B", "A", "C"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 6"]
    }
  ],
  "UoYxPYMUWd": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 8,
          "image_id": "UoYxPYMUWd_8_6d7ced11",
          "bbox": {
            "x": 0.1723214104062035,
            "y": 0.5734866087464081,
            "width": 0.6696428571428571,
            "height": 0.271264367816092
          }
        }
      ],
      "review_text": "Table 1: The bold font is used inconsistently to highlight ODAF's performance, as not all results are statistically significant. For example, in the 'walker2d m-r' row, SVR gets a better score than ODAF.",
      "category": "table-only",
      "description": "The bolded numbers do not always highlight the best result",
      "confidence": 2,
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the table that is consistent with a different part of the table?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the table that is inconsistent with a different part of the table?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"bolded value\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be highest\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"not highest\"}}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"decimal places\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be same\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"not same\"}}",
            "{\"letter\":\"A\",\"attribute\":\"bolded value\",\"claim\":{\"source\":\"expectation\",\"statement\":\"OSR-10 should be bolded\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"ODAF(Ours) is bolded\"}}",
            "{\"letter\":\"B\",\"attribute\":\"decimal places\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be same\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"not same\"}}"
          ],
          "letters": ["C", "D", "A", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"bolding\",\"target\":\"table_1\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"change bolding\",\"reason\":\"incorrect\"}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"decimal places\",\"target\":\"table_1\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"standardize decimal\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"A\",\"attribute\":\"bolding\",\"target\":\"table_1\",\"other_involved\":\"ODAF(Ours), OSR-10\",\"action\":\"modify\",\"edit_statement\":\"change bolding\",\"reason\":\"incorrect\"}",
            "{\"letter\":\"B\",\"attribute\":\"decimal places\",\"target\":\"table_1\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"standardize decimal\",\"reason\":\"inconsistent\"}"
          ],
          "letters": ["C", "D", "A", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 1"]
    }
  ],
  "UeHunlny77": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 9,
          "image_id": "UeHunlny77_9_18ab2567",
          "bbox": {
            "x": 0.16934521993001303,
            "y": 0.09463606209590518,
            "width": 0.6755952380952381,
            "height": 0.4505747126436782
          }
        }
      ],
      "review_text": "Fig.4(a): The claim in line 418 that 'the closed-source LCM (GPT-4o) maintains a relatively stable performance, showing minimal degradation' contradicts the data shown in several subplots, which indicate high degradation.",
      "category": "figure-only",
      "description": "Part (a) should have as x-label some sort of length measure (as it is called L-CiteEval-Length), not hardness (like for L-CiteEval-Hardness)",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the figure that is consistent with a different part of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the figure that is inconsistent with a different part of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"x-axis labels\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should describe length\"},\"evidence\":{\"source\":\"figure_4a\",\"statement\":\"describe hardness\"}}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"legend\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be complete\"},\"evidence\":{\"source\":\"figure_4a\",\"statement\":\"missing information\"}}",
            "{\"letter\":\"B\",\"attribute\":\"symbols\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"figure_4\",\"statement\":\"not in every plot\"}}",
            "{\"letter\":\"D\",\"attribute\":\"titles\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should match content\"},\"evidence\":{\"source\":\"figure_4\",\"statement\":\"swapped\"}}"
          ],
          "letters": ["A", "C", "B", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"x-axis labels\",\"target\":\"figure_4a\",\"other_involved\":\"figure_4a title\",\"action\":\"modify\",\"edit_statement\":\"align with title\",\"reason\":\"mismatch\"}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"model legend\",\"target\":\"figure_4a\",\"other_involved\":null,\"action\":\"add\",\"edit_statement\":\"missing information\",\"reason\":\"incomplete\"}",
            "{\"letter\":\"B\",\"attribute\":\"legend symbols\",\"target\":\"figure_4\",\"other_involved\":null,\"action\":\"add\",\"edit_statement\":\"missing symbols\",\"reason\":\"incomplete\"}",
            "{\"letter\":\"D\",\"attribute\":\"titles\",\"target\":\"figure_4a\",\"other_involved\":\"figure_4b\",\"action\":\"swap\",\"edit_statement\":\"exchange titles\",\"reason\":\"swapped\"}"
          ],
          "letters": ["A", "C", "B", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 4"]
    }
  ],
  "UEE13WQlNU": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 4,
          "image_id": "UEE13WQlNU_4_2f87314c",
          "bbox": {
            "x": 0.16934521993001303,
            "y": 0.09808433795797414,
            "width": 0.6726190476190476,
            "height": 0.3218390804597701
          }
        },
        {
          "type": "text",
          "page": 4,
          "content": "In SSCM, the masking of data propels the learning of nuanced features, while the self-supervised methodology amplifies the model\u2019s robustness in a teacher-student mutual learning method. The teacher network \ud835\udcaf(\u22c5) is frozen during training and is updated via an exponential moving average (EMA Tarvainen & Valpola (2017)) predicated on the current model\u2019s parameters. This process is articulated as follows:\n\n$$\n\\theta_t^{(t+1)} = \\gamma \\theta_t^{(t)} + (1 - \\gamma) \\theta_s^{(t)},\n$$\n\nwhere $\\theta_t$ and $\\theta_s$ represent the parameters of the teacher and student model, respectively, at training step $t$, and $\\gamma$ is the decay term controlling the update momentum.\n",
          "line": 183
        }
      ],
      "review_text": "For SSCM: which part of visual encoder is updated during training? From Fig.2 (1), the teacher part is frozen, but from Eq. (2), the parameters of teacher is updated. It is not clear.",
      "category": "figure-text",
      "description": "The equation shows updating of the teacher parameters, yet the figure shows the teacher weights are frozen",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {
          "question": "In SSCM, the masking of data propels the learning of nuanced features, while the self-supervised methodology amplifies the model\u2019s robustness in a teacher-student mutual learning method. The teacher network \ud835\udcaf(\u22c5) is frozen during training and is updated via an exponential moving average (EMA Tarvainen & Valpola (2017)) predicated on the current model\u2019s parameters. This process is articulated as follows:\n\n$$\n\\theta_t^{(t+1)} = \\gamma \\theta_t^{(t)} + (1 - \\gamma) \\theta_s^{(t)},\n$$\n\nwhere $\\theta_t$ and $\\theta_s$ represent the parameters of the teacher and student model, respectively, at training step $t$, and $\\gamma$ is the decay term controlling the update momentum.\n",
          "correct": "UEE13WQlNU_4_2f87314c",
          "incorrect": [
            "UEE13WQlNU_1_image_figure1",
            "UEE13WQlNU_9_image_figure4",
            "UEE13WQlNU_9_image_figure3"
          ],
          "letters": ["C", "D", "B", "A"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"teacher model update\",\"claim\":{\"source\":\"text\",\"statement\":\"updated via EMA\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"frozen during EMA update\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"teacher model update\",\"claim\":{\"source\":\"text\",\"statement\":\"frozen during training\"},\"evidence\":{\"source\":\"Figure\",\"statement\":\"update during Visual Anchors Updating Module\"}}",
            "{\"letter\":\"C\",\"attribute\":\"teacher model update\",\"claim\":{\"source\":\"expectation\",\"statement\":\"not direct modification\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"direct modification\"}}",
            "{\"letter\":\"B\",\"attribute\":\"methodology behavior\",\"claim\":{\"source\":\"text\",\"statement\":\"self-supervised\"},\"evidence\":{\"source\":\"Figure\",\"statement\":\"not self-supervised\"}}"
          ],
          "letters": ["D", "A", "C", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"teacher model update\",\"target\":\"figure_2\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align teacher model update\",\"reason\":\"inconsistent\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"model status\",\"target\":\"text\",\"other_involved\":\"figure_2\",\"action\":\"modify\",\"edit_statement\":\"align teacher model status\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"C\",\"attribute\":\"EMA update\",\"target\":\"figure_2\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align EMA update\",\"reason\":\"contradiction\"}",
            "{\"letter\":\"B\",\"attribute\":\"methodology\",\"target\":\"figure_2\",\"other_involved\":\"text\",\"action\":\"add\",\"edit_statement\":\"add self-supervised behavior\",\"reason\":\"missing\"}"
          ],
          "letters": ["D", "A", "C", "B"]
        }
      },
      "severity": 1,
      "visual_elements": ["Figure 2"]
    }
  ],
  "U2ZtvonVQz": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 9,
          "image_id": "U2ZtvonVQz_9_d782e38f",
          "bbox": {
            "x": 0.1723214104062035,
            "y": 0.47555556023257906,
            "width": 0.6636904761904762,
            "height": 0.11264367816091955
          }
        }
      ],
      "review_text": "Figure 7 captions are wrong: none of figures mention FEM, but caption says (a) is FEM. From context, it is expected to be (a)$L_0$, (b)$\\mu_1$, (c) $\\mu_2$.",
      "category": "figure-caption",
      "description": "The caption does not match the figure",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the caption of the figure consistent with the content of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the caption of the figure inconsistent with the content of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"sub-plot captions\",\"claim\":{\"source\":\"caption\",\"statement\":\"match titles\"},\"evidence\":{\"source\":\"figure_7\",\"statement\":\"don't match titles\"}}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"caption\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should match figure\"},\"evidence\":{\"source\":\"figure_7\",\"statement\":\"doesn't match figure\"}}",
            "{\"letter\":\"D\",\"attribute\":\"panel labels\",\"claim\":{\"source\":\"caption\",\"statement\":\"correct\"},\"evidence\":{\"source\":\"figure_7\",\"statement\":\"incorrect\"}}",
            "{\"letter\":\"B\",\"attribute\":\"legend\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should match data\"},\"evidence\":{\"source\":\"figure_7\",\"statement\":\"doesn't match data\"}}"
          ],
          "letters": ["A", "C", "D", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"captions\",\"target\":\"figure_7a\",\"other_involved\":\"figure_7b, figure_7c\",\"action\":\"modify\",\"edit_statement\":\"match sub-plots titles\",\"reason\":\"not matching\"}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"mu plots\",\"target\":\"figure_7\",\"other_involved\":\"caption\",\"action\":\"add\",\"edit_statement\":\"mu plots\",\"reason\":\"lacking\"}",
            "{\"letter\":\"D\",\"attribute\":\"panel labels\",\"target\":\"figure_7b\",\"other_involved\":\"figure_7c, caption\",\"action\":\"replace\",\"edit_statement\":\"PINNs with DC-PINNs\",\"reason\":\"contradicting\"}",
            "{\"letter\":\"B\",\"attribute\":\"ground truth\",\"target\":\"figure_7a\",\"other_involved\":\"figure_7b, figure_7c, legend\",\"action\":\"add\",\"edit_statement\":\"ground truth line\",\"reason\":\"missing\"}"
          ],
          "letters": ["A", "C", "D", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 7"]
    }
  ],
  "Tnd3dZxyEv": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 17,
          "image_id": "Tnd3dZxyEv_17_3e981805",
          "bbox": {
            "x": 0.16636902945382254,
            "y": 0.10268203910739944,
            "width": 0.6696428571428571,
            "height": 0.8252873563218391
          }
        }
      ],
      "review_text": "Figure 4, 5, and 6: The reviewer mentions that most of the losses have not converged, in particular for the 'no KGI MLPs', which contradicts the interpretation of the experimental results presented in the paper.",
      "category": "figure-only",
      "description": "Most of the No-KGI cases have not yet converged in training",
      "confidence": 2,
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the figure that is consistent with a different part of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the figure that is inconsistent with a different part of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"convergence\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be converged\"},\"evidence\":{\"source\":\"figure_6\",\"statement\":\"not converged\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"convergence\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be converged\"},\"evidence\":{\"source\":\"figure_6\",\"statement\":\"not converged\"}}",
            "{\"letter\":\"D\",\"attribute\":\"plot\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be different\"},\"evidence\":{\"source\":\"figure_6\",\"statement\":\"identical\"}}",
            "{\"letter\":\"C\",\"attribute\":\"MSE\",\"claim\":{\"source\":\"expectation\",\"statement\":\"No KGI higher\"},\"evidence\":{\"source\":\"figure_6\",\"statement\":\"No KGI higher\"}}"
          ],
          "letters": ["A", "B", "D", "C"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"loss history plots\",\"target\":\"figure_6\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"edit plots convergence\",\"reason\":\"not converged\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"loss history plots\",\"target\":\"figure_6\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"edit plots convergence\",\"reason\":\"not converged\"}",
            "{\"letter\":\"D\",\"attribute\":\"plots\",\"target\":\"figure_6\",\"other_involved\":null,\"action\":\"replace\",\"edit_statement\":\"replace copied plots\",\"reason\":\"identical\"}",
            "{\"letter\":\"C\",\"attribute\":\"final MSE values\",\"target\":\"figure_6\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"edit values\",\"reason\":\"inconsistent\"}"
          ],
          "letters": ["A", "B", "D", "C"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 6"]
    }
  ],
  "TCiJvhH2fC": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 8,
          "image_id": "TCiJvhH2fC_8_03a32a27",
          "bbox": {
            "x": 0.16636902945382254,
            "y": 0.2878544248383621,
            "width": 0.6785714285714286,
            "height": 0.5471264367816092
          }
        },
        {
          "type": "text",
          "page": 9,
          "content": " Furthermore, based on the last two rows in Figure 5, we significantly outperform MPRNet by thoroughly eliminating the reflective flare with the fewest artifacts, which proves that some artifacts may occur during the reflective flare removal process and our PIP manages to depress such phenomenon",
          "line": 460
        }
      ],
      "review_text": "Lines 460-462: MPRNet is mentioned in the text, but Figure 5 appears to be missing MPRNet, creating a discrepancy between the textual and visual elements.",
      "category": "figure-text",
      "description": "MPRNet is not represented in the figure",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {
          "question": " Furthermore, based on the last two rows in Figure 5, we significantly outperform MPRNet by thoroughly eliminating the reflective flare with the fewest artifacts, which proves that some artifacts may occur during the reflective flare removal process and our PIP manages to depress such phenomenon",
          "correct": "TCiJvhH2fC_8_03a32a27",
          "incorrect": [
            "TCiJvhH2fC_6_image_figure3",
            "TCiJvhH2fC_6_image_figure4",
            "TCiJvhH2fC_4_image_figure2"
          ],
          "letters": ["B", "C", "A", "D"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"comparison\",\"claim\":{\"source\":\"text\",\"statement\":\"based on Figure 5\"},\"evidence\":{\"source\":\"Figure 5\",\"statement\":\"MPRNet not displayed\"}}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"caption\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should discuss all models\"},\"evidence\":{\"source\":\"Figure 5 caption\",\"statement\":\"only discusses FF-Former\"}}",
            "{\"letter\":\"B\",\"attribute\":\"flare elimination\",\"claim\":{\"source\":\"text\",\"statement\":\"eliminates flare\"},\"evidence\":{\"source\":\"Figure 5\",\"statement\":\"flare present\"}}",
            "{\"letter\":\"D\",\"attribute\":\"bounding boxes\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"Figure 5\",\"statement\":\"inconsistent bounding boxes\"}}"
          ],
          "letters": ["A", "C", "B", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"MPRNet inclusion\",\"target\":\"figure_5\",\"other_involved\":\"text\",\"action\":\"add\",\"edit_statement\":\"MPRNet results\",\"reason\":\"missing\"}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"caption detail\",\"target\":\"caption_figure_5\",\"other_involved\":\"figure_5\",\"action\":\"modify\",\"edit_statement\":\"include Unet Uformer\",\"reason\":\"incomplete\"}",
            "{\"letter\":\"B\",\"attribute\":\"flare elimination claim\",\"target\":\"text\",\"other_involved\":\"figure_5\",\"action\":\"modify\",\"edit_statement\":\"modify elimination claim\",\"reason\":\"contradiction\"}",
            "{\"letter\":\"D\",\"attribute\":\"bounding boxes\",\"target\":\"figure_5\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"align consistency\",\"reason\":\"inconsistent\"}"
          ],
          "letters": ["A", "C", "B", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 5"]
    }
  ],
  "SjMtxqdQ73": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 5,
          "image_id": "SjMtxqdQ73_5_49aa2baa",
          "bbox": {
            "x": 0.362797600882394,
            "y": 0.3091187926544541,
            "width": 0.4732142857142857,
            "height": 0.4252873563218391
          }
        }
      ],
      "review_text": "Minor points: In Fig. 3, both \u2018PEPTID\u2019 and \u2018PEITED\u2019 are misspelled at the top of the figure.",
      "category": "figure-only",
      "description": "The word PEPTIDE is mispelled as PEPTID or PEITED",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the figure that is consistent with a different part of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the figure that is inconsistent with a different part of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"label\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be PEPTIDE\"},\"evidence\":{\"source\":\"figure_3\",\"statement\":\"labeled PEPTID and PEITED\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"label\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be PEPTID\"},\"evidence\":{\"source\":\"figure_3\",\"statement\":\"labeled PEPTIDE\"}}",
            "{\"letter\":\"B\",\"attribute\":\"word\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be Sequence\"},\"evidence\":{\"source\":\"figure_3\",\"statement\":\"spelled Sequenec\"}}",
            "{\"letter\":\"C\",\"attribute\":\"model architecture\",\"claim\":{\"source\":\"expectation\",\"statement\":\"Decoder feeds from Encoder\"},\"evidence\":{\"source\":\"figure_3\",\"statement\":\"Decoder feeds into Encoder\"}}"
          ],
          "letters": ["D", "A", "B", "C"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"PEPTIDE\",\"target\":\"figure_3\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"correct spelling\",\"reason\":\"inconsistent spelling\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"Target Peptide Sequence\",\"target\":\"figure_3\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"correct labelling\",\"reason\":\"incorrect labelling\"}",
            "{\"letter\":\"B\",\"attribute\":\"Sequence\",\"target\":\"figure_3\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"correct spelling\",\"reason\":\"misspelled as Sequenec\"}",
            "{\"letter\":\"C\",\"attribute\":\"Decoder\",\"target\":\"figure_3\",\"other_involved\":\"Encoder\",\"action\":\"reposition\",\"edit_statement\":\"reverse flow\",\"reason\":\"incorrect flow\"}"
          ],
          "letters": ["D", "A", "B", "C"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 3"]
    }
  ],
  "SOVwGa0H2c": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 3,
          "image_id": "SOVwGa0H2c_3_5979cc90",
          "bbox": {
            "x": 0.538392838977632,
            "y": 0.28268201104525864,
            "width": 0.2946428571428571,
            "height": 0.15172413793103448
          }
        },
        {
          "type": "text",
          "page": 3,
          "content": "As shown in Table 1, the Zoomed Crop method significantly outperformed the others, achieving\nan accuracy of 0.76 with a token usage of 270. In comparison, the Unaltered Input method, de-\nspite processing the entire image, only achieved an accuracy of 0.64 while consuming 955 tokens.\nSimilarly, the Image Crop method, although reducing the token count to 270, did not yield any\nimprovement in accuracy compared to the unprocessed input.",
          "line": 144
        }
      ],
      "review_text": "Table 1: The accuracy values mentioned in L144-147 do not match the data presented in Table 1. Specifically, Zoomed Crop and Unaltered Input are argued to achieve 0.76 and 0.64 accuracy respectively, but in Table 1, the reported numbers are 0.64 and 0.57 respectively.",
      "category": "table-text",
      "description": "The performance data claimed in the text and in the table do not match",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the table consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the table inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "As shown in Table 1, the Zoomed Crop method significantly outperformed the others, achieving\nan accuracy of 0.76 with a token usage of 270. In comparison, the Unaltered Input method, de-\nspite processing the entire image, only achieved an accuracy of 0.64 while consuming 955 tokens.\nSimilarly, the Image Crop method, although reducing the token count to 270, did not yield any\nimprovement in accuracy compared to the unprocessed input.",
          "correct": "SOVwGa0H2c_3_5979cc90",
          "incorrect": [
            "SOVwGa0H2c_8_table_table2",
            "SOVwGa0H2c_8_table_table3",
            "SOVwGa0H2c_9_table_table5"
          ],
          "letters": ["B", "C", "D", "A"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"performance\",\"claim\":{\"source\":\"text\",\"statement\":\"Zoomed Crop performance\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"different accuracy\"}}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"performance\",\"claim\":{\"source\":\"text\",\"statement\":\"Unaltered Input performance\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"different accuracy\"}}",
            "{\"letter\":\"B\",\"attribute\":\"accuracy\",\"claim\":{\"source\":\"text\",\"statement\":\"Image Crop improved accuracy\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"did not improve\"}}",
            "{\"letter\":\"D\",\"attribute\":\"prompt token count\",\"claim\":{\"source\":\"text\",\"statement\":\"Zoomed Crop count\"},\"evidence\":{\"source\":\"table\",\"statement\":\"different count\"}}"
          ],
          "letters": ["A", "C", "B", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"performance\",\"target\":\"text\",\"other_involved\":\"table_1\",\"action\":\"modify\",\"edit_statement\":\"match accuracy\",\"reason\":\"does not match\"}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"performance\",\"target\":\"text\",\"other_involved\":\"table_1\",\"action\":\"modify\",\"edit_statement\":\"match accuracy\",\"reason\":\"does not match\"}",
            "{\"letter\":\"B\",\"attribute\":\"accuracy\",\"target\":\"text\",\"other_involved\":\"table_1\",\"action\":\"modify\",\"edit_statement\":\"align with table\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"D\",\"attribute\":\"prompt token count\",\"target\":\"text\",\"other_involved\":\"table_1\",\"action\":\"modify\",\"edit_statement\":\"align with table\",\"reason\":\"inconsistent\"}"
          ],
          "letters": ["A", "C", "B", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 1"]
    }
  ],
  "SM1guXel3E": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 8,
          "image_id": "SM1guXel3E_8_75b8435e",
          "bbox": {
            "x": 0.16934521993001303,
            "y": 0.2992337281676545,
            "width": 0.6696428571428571,
            "height": 0.1149425287356322
          }
        }
      ],
      "review_text": "Table 1 and 5: The overall ranking is inconsistent, e.g., DeiT.",
      "category": "table-only",
      "description": "The table is not sorted according to overall ranking",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the table that is consistent with a different part of the table?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the table that is inconsistent with a different part of the table?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"order\",\"claim\":{\"source\":\"expectation\",\"statement\":\"ordered by Overall\"},\"evidence\":{\"source\":\"Table 5\",\"statement\":\"not ordered by Overall\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"consistency\",\"claim\":{\"source\":\"expectation\",\"statement\":\"consistent\"},\"evidence\":{\"source\":\"Table 5\",\"statement\":\"inconsistent\"}}",
            "{\"letter\":\"D\",\"attribute\":\"Overall ranking\",\"claim\":{\"source\":\"expectation\",\"statement\":\"integer\"},\"evidence\":{\"source\":\"Table 5\",\"statement\":\"decimal\"}}",
            "{\"letter\":\"B\",\"attribute\":\"total column\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should exist\"},\"evidence\":{\"source\":\"Table 5\",\"statement\":\"does not exist\"}}"
          ],
          "letters": ["C", "A", "D", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"columns\",\"target\":\"table_5\",\"other_involved\":\"Overall ranking\",\"action\":\"reposition\",\"edit_statement\":\"order by ranking\",\"reason\":\"disordered\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"ranking\",\"target\":\"table_5\",\"other_involved\":\"Performance row, Applicability row, Overall row\",\"action\":\"modify\",\"edit_statement\":\"numeric consistency\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"D\",\"attribute\":\"Overall rankings\",\"target\":\"table_5\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"integer display\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"B\",\"attribute\":\"column\",\"target\":\"table_5\",\"other_involved\":null,\"action\":\"add\",\"edit_statement\":\"total/average column\",\"reason\":\"missing\"}"
          ],
          "letters": ["C", "A", "D", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 5"]
    },
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 8,
          "image_id": "SM1guXel3E_8_f0cadd53",
          "bbox": {
            "x": 0.17529760088239396,
            "y": 0.09164751425556752,
            "width": 0.6636904761904762,
            "height": 0.20919540229885059
          }
        }
      ],
      "review_text": "Figure 4: The legend is shared with other figures, making it hard to parse and understand the information conveyed.",
      "category": "figure-only",
      "description": "The caption looks like datapoints themselves and obscure the plot",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the figure that is consistent with a different part of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the figure that is inconsistent with a different part of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"numerical labels\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should not look like data points\"},\"evidence\":{\"source\":\"figure_4\",\"statement\":\"look like data points\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"metric\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should align with caption\"},\"evidence\":{\"source\":\"caption\",\"statement\":\"not aligned\"}}",
            "{\"letter\":\"B\",\"attribute\":\"plot title\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should match plot dimension\"},\"evidence\":{\"source\":\"plot\",\"statement\":\"2D\"}}",
            "{\"letter\":\"D\",\"attribute\":\"font size\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"subplot\",\"statement\":\"inconsistent\"}}"
          ],
          "letters": ["C", "A", "B", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"numerical labels\",\"target\":\"GPU memory\",\"other_involved\":\"data points\",\"action\":\"reposition\",\"edit_statement\":\"adjust position\",\"reason\":\"misleading\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"metric\",\"target\":\"total training hours\",\"other_involved\":\"caption\",\"action\":\"modify\",\"edit_statement\":\"align values\",\"reason\":\"misalignment\"}",
            "{\"letter\":\"B\",\"attribute\":\"variables\",\"target\":\"plot titles\",\"other_involved\":\"plots\",\"action\":\"modify\",\"edit_statement\":\"update dimension\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"D\",\"attribute\":\"font size\",\"target\":\"axis labels\",\"other_involved\":\"subplots\",\"action\":\"modify\",\"edit_statement\":\"standardize size\",\"reason\":\"inconsistent\"}"
          ],
          "letters": ["C", "A", "B", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 4"]
    }
  ],
  "RrWAtQNGAg": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 6,
          "image_id": "RrWAtQNGAg_6_a5d1bfae",
          "bbox": {
            "x": 0.1723214104062035,
            "y": 0.0879693393049569,
            "width": 0.6636904761904762,
            "height": 0.30344827586206896
          }
        }
      ],
      "review_text": "Figure 2: The correct topological orderings for the first example should be [a.py, b.py, c.py, d.py] or [a.py, b.py, d.py, c.py]; and for the second example, either [a.py, b.py, c.py, d.py, f.py], [b.py, a.py, c.py, d.py, f.py] are correct, which contradicts the ordering shown in the figure.",
      "category": "figure-only",
      "description": "The green path in case 2 should be b,c,d,f",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the figure that is consistent with a different part of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the figure that is inconsistent with a different part of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"green dotted path in Case 2\",\"claim\":{\"source\":\"expectation\",\"statement\":\"match file sequence\"},\"evidence\":{\"source\":\"figure_2\",\"statement\":\"sequence mismatch\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"red dotted path in Case 2\",\"claim\":{\"source\":\"figure_2\",\"statement\":\"match file sequence\"},\"evidence\":{\"source\":\"figure_2\",\"statement\":\"sequence mismatch\"}}",
            "{\"letter\":\"A\",\"attribute\":\"red dotted path in Case 1\",\"claim\":{\"source\":\"figure_2\",\"statement\":\"match file sequence\"},\"evidence\":{\"source\":\"figure_2\",\"statement\":\"sequence mismatch\"}}",
            "{\"letter\":\"C\",\"attribute\":\"green dotted path in Case 1\",\"claim\":{\"source\":\"figure_2\",\"statement\":\"match file sequence\"},\"evidence\":{\"source\":\"figure_2\",\"statement\":\"sequence mismatch\"}}"
          ],
          "letters": ["D", "B", "A", "C"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"green dotted path\",\"target\":\"figure_2 (Case 2)\",\"other_involved\":\"file sequence\",\"action\":\"modify\",\"edit_statement\":\"align sequence\",\"reason\":\"inconsistent\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"red dotted path\",\"target\":\"figure_2 (Case 2)\",\"other_involved\":\"file sequence\",\"action\":\"modify\",\"edit_statement\":\"align path\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"A\",\"attribute\":\"red dotted path\",\"target\":\"figure_2 (Case 1)\",\"other_involved\":\"file sequence\",\"action\":\"modify\",\"edit_statement\":\"align path\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"C\",\"attribute\":\"green dotted path\",\"target\":\"figure_2 (Case 1)\",\"other_involved\":\"file sequence\",\"action\":\"modify\",\"edit_statement\":\"align path\",\"reason\":\"inconsistent\"}"
          ],
          "letters": ["D", "B", "A", "C"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 2"]
    },
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 8,
          "image_id": "RrWAtQNGAg_8_5afa4603",
          "bbox": {
            "x": 0.1723214104062035,
            "y": 0.09291188908719468,
            "width": 0.6636904761904762,
            "height": 0.3218390804597701
          }
        }
      ],
      "review_text": "Table 1: The Chain-Instruct model is only compared to the pretrained DeepSeek-Coder model, but not to a stronger DeepSeek-Coder-Instruct, which is a contradiction if the goal is to show the superiority of the Chain-Instruct model.",
      "category": "table-only",
      "description": "The Chain-Instruct model performance is compared to DeepSeek-Coder, where the comparison with DeepSeek-Coder-Instruct would be more appropriate",
      "confidence": 1,
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the table that is consistent with a different part of the table?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the table that is inconsistent with a different part of the table?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"tuning\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be same\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"not same\"}}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"Model Weight\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be omitted\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"not omitted\"}}",
            "{\"letter\":\"C\",\"attribute\":\"Params\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should match\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"do not match\"}}",
            "{\"letter\":\"B\",\"attribute\":\"Base Model\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be listed\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"not listed\"}}"
          ],
          "letters": ["A", "D", "C", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"models\",\"target\":\"table_1\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"align instruction tuning\",\"reason\":\"inconsistent\"}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"Model Weight column\",\"target\":\"table_1\",\"other_involved\":null,\"action\":\"remove\",\"edit_statement\":\"remove redundant\",\"reason\":\"redundant\"}",
            "{\"letter\":\"C\",\"attribute\":\"Params\",\"target\":\"table_1\",\"other_involved\":\"DeepseekCoder models\",\"action\":\"modify\",\"edit_statement\":\"match parameters\",\"reason\":\"mismatch\"}",
            "{\"letter\":\"B\",\"attribute\":\"Base Model\",\"target\":\"table_1\",\"other_involved\":null,\"action\":\"add\",\"edit_statement\":\"Base Model info\",\"reason\":\"missing\"}"
          ],
          "letters": ["A", "D", "C", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 1"]
    }
  ],
  "RG806nMtQr": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 6,
          "image_id": "RG806nMtQr_6_12829608",
          "bbox": {
            "x": 0.16934521993001303,
            "y": 0.10934862158764369,
            "width": 0.6755952380952381,
            "height": 0.1839080459770115
          }
        }
      ],
      "review_text": "Table 1: It seems as if the first row of the datasets & of the models is copied.",
      "category": "table-only",
      "description": "The rows of CIFAR-10 and ResNet have the exact same result",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the table that is consistent with a different part of the table?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the table that is inconsistent with a different part of the table?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"values\",\"claim\":{\"source\":\"expectation\",\"statement\":\"be different\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"same values\"}}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"baseline\",\"claim\":{\"source\":\"expectation\",\"statement\":\"be consistent\"},\"evidence\":{\"source\":\"Clean accuracy\",\"statement\":\"inconsistent\"}}",
            "{\"letter\":\"B\",\"attribute\":\"accuracy\",\"claim\":{\"source\":\"expectation\",\"statement\":\"show drop\"},\"evidence\":{\"source\":\"Clean accuracy\",\"statement\":\"no drop\"}}",
            "{\"letter\":\"A\",\"attribute\":\"performance\",\"claim\":{\"source\":\"expectation\",\"statement\":\"show improvements\"},\"evidence\":{\"source\":\"proposed methods\",\"statement\":\"show drop\"}}"
          ],
          "letters": ["C", "D", "B", "A"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"performance values\",\"target\":\"table_1\",\"other_involved\":\"ResNet\",\"action\":\"modify\",\"edit_statement\":\"align CIFAR-10\",\"reason\":\"same\"}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"clean accuracy\",\"target\":\"table_1\",\"other_involved\":\"CIFAR-10, TinyImage\",\"action\":\"modify\",\"edit_statement\":\"align baseline values\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"B\",\"attribute\":\"clean accuracy\",\"target\":\"table_1\",\"other_involved\":null,\"action\":\"remove\",\"edit_statement\":\"not drop\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"A\",\"attribute\":\"performance\",\"target\":\"table_1\",\"other_involved\":\"baseline\",\"action\":\"modify\",\"edit_statement\":\"show improvement\",\"reason\":\"expected\"}"
          ],
          "letters": ["C", "D", "B", "A"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 1"]
    }
  ],
  "R8t9Q3jmCQ": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 9,
          "image_id": "R8t9Q3jmCQ_9_08354664",
          "bbox": {
            "x": 0.16934521993001303,
            "y": 0.7755555602325791,
            "width": 0.6696428571428571,
            "height": 0.15172413793103448
          }
        }
      ],
      "review_text": "Figure 10: The boot of 'w/o Color Adapter' is red, while the text mentions it should be black, indicating a contradiction in the visual and textual elements.",
      "category": "figure-only",
      "description": "In the w/o Color Adapter, the left foot is in front, whereas in the other images, the right foot is in front. This raises questions about the capabilities of the color adapter",
      "confidence": 3,
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the figure that is consistent with a different part of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the figure that is inconsistent with a different part of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"foot position\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"figure_10\",\"statement\":\"inconsistent\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"foot position\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"figure_10\",\"statement\":\"inconsistent\"}}",
            "{\"letter\":\"C\",\"attribute\":\"foot position\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"figure_10\",\"statement\":\"inconsistent\"}}",
            "{\"letter\":\"A\",\"attribute\":\"foot position\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"figure_10\",\"statement\":\"inconsistent\"}}"
          ],
          "letters": ["D", "B", "C", "A"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"model foot position\",\"target\":\"figure_10_wo_Color_Adapter\",\"other_involved\":\"figure_10_LQ,figure_10_HQ,figure_10_w_Color_Adapter\",\"action\":\"modify\",\"edit_statement\":\"update foot position\",\"reason\":\"inconsistent\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"model foot position\",\"target\":\"figure_10_LQ\",\"other_involved\":\"figure_10_HQ\",\"action\":\"modify\",\"edit_statement\":\"update foot position\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"C\",\"attribute\":\"model foot position\",\"target\":\"figure_10\",\"other_involved\":\"figure_10_HQ\",\"action\":\"modify\",\"edit_statement\":\"update foot position\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"A\",\"attribute\":\"model foot position\",\"target\":\"figure_10\",\"other_involved\":\"figure_10\",\"action\":\"modify\",\"edit_statement\":\"update foot position\",\"reason\":\"inconsistent\"}"
          ],
          "letters": ["D", "B", "C", "A"]
        }
      },
      "severity": 1,
      "visual_elements": ["Figure 10"]
    }
  ],
  "QtSw71HJ6M": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 7,
          "image_id": "QtSw71HJ6M_7_3549790c",
          "bbox": {
            "x": 0.16692796235300522,
            "y": 0.3875280613854307,
            "width": 0.3292682926829268,
            "height": 0.25841184387617766
          }
        },
        {
          "type": "text",
          "page": 7,
          "content": "Notably, both models display a peak at negative Qdifference values, with a long tail extending toward\npositive values. However, our model exhibits a more concentrated peak near Qdifference = 0, while\nCQL shows more spread in the positive direction, indicating more frequent overestimations.",
          "line": 371
        }
      ],
      "review_text": "Figure 2: The paper claims that CQL shows more frequent overestimations, but in Figure 2, it is actually the proposed method (normal_ours) that exhibits more instances of high Q_difference = hat(Q)(s, a) - Q*(s, a), suggesting that the proposed method may also suffer from overestimation issues, contrary to the claimed advantage.",
      "category": "figure-text",
      "description": "The text states that CQL shows more frequent overestimations, yet the Q_difference of the proposed method is more positive than CQL.",
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {
          "question": "Notably, both models display a peak at negative Qdifference values, with a long tail extending toward\npositive values. However, our model exhibits a more concentrated peak near Qdifference = 0, while\nCQL shows more spread in the positive direction, indicating more frequent overestimations.",
          "correct": "QtSw71HJ6M_7_3549790c",
          "incorrect": [
            "QtSw71HJ6M_4_image_figure1",
            "QtSw71HJ6M_14_image_figure4",
            "QtSw71HJ6M_15_image_figure5"
          ],
          "letters": ["C", "A", "B", "D"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"Q_difference distribution\",\"claim\":{\"source\":\"expectation\",\"statement\":\"CQL overestimates more\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"normal_ours overestimates more\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"peak location\",\"claim\":{\"source\":\"text\",\"statement\":\"peak near Qdifference = 0\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"peak is negative\"}}",
            "{\"letter\":\"C\",\"attribute\":\"Q_difference concentration\",\"claim\":{\"source\":\"text\",\"statement\":\"peak at negative values\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"peak at positive values\"}}",
            "{\"letter\":\"D\",\"attribute\":\"Q_difference tail\",\"claim\":{\"source\":\"text\",\"statement\":\"long tail to positive values\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"no data at positive values\"}}"
          ],
          "letters": ["B", "A", "C", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"spread in Q_difference\",\"target\":\"text\",\"other_involved\":\"figure_2\",\"action\":\"modify\",\"edit_statement\":\"align spread description\",\"reason\":\"contradictory\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"peak concentration\",\"target\":\"text\",\"other_involved\":\"figure_2\",\"action\":\"modify\",\"edit_statement\":\"align peak position\",\"reason\":\"contradictory\"}",
            "{\"letter\":\"C\",\"attribute\":\"Q_difference concentrations\",\"target\":\"figure_2\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align peak values\",\"reason\":\"contradictory\"}",
            "{\"letter\":\"D\",\"attribute\":\"long tail\",\"target\":\"text\",\"other_involved\":\"figure_2\",\"action\":\"modify\",\"edit_statement\":\"align data presence\",\"reason\":\"contradictory\"}"
          ],
          "letters": ["B", "A", "C", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 2"]
    }
  ],
  "QmJoF47DIR": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 4,
          "image_id": "QmJoF47DIR_4_35e8ed9a",
          "bbox": {
            "x": 0.16867012263175085,
            "y": 0.10192912167206428,
            "width": 0.6637630662020906,
            "height": 0.34454912516823694
          }
        },
        {
          "type": "text",
          "page": 3,
          "content": "An MLP decoder then maps the features sampled from the interpolated feature line\ninto an opacity offset, which is added to canonical opacity. Finally, the aforementioned gaussian\nattributes are combined to render the image, which is compared with the ground truth.",
          "line": 155
        }
      ],
      "review_text": "Figure 2: The direction of the arrow pointing toward the opacity offset is wrong based on the method description, which contradicts the visual representation in the figure.",
      "category": "figure-text",
      "description": "The arrow from Opacity offset to 3DGS rasterizer should be reversed.",
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {
          "question": "An MLP decoder then maps the features sampled from the interpolated feature line\ninto an opacity offset, which is added to canonical opacity. Finally, the aforementioned gaussian\nattributes are combined to render the image, which is compared with the ground truth.",
          "correct": "QmJoF47DIR_4_35e8ed9a",
          "incorrect": [
            "QmJoF47DIR_6_image_figure3",
            "QmJoF47DIR_7_image_figure4",
            "QmJoF47DIR_1_image_figure1"
          ],
          "letters": ["C", "A", "B", "D"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"arrow direction\",\"claim\":{\"source\":\"expectation\",\"statement\":\"reversed\"},\"evidence\":{\"source\":\"figure_2\",\"statement\":\"reversed\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"arrow\",\"claim\":{\"source\":\"figure_2\",\"statement\":\"missing\"},\"evidence\":{\"source\":\"expectation\",\"statement\":\"should exist\"}}",
            "{\"letter\":\"B\",\"attribute\":\"order\",\"claim\":{\"source\":\"expectation\",\"statement\":\"wrong order\"},\"evidence\":{\"source\":\"figure_2\",\"statement\":\"wrong order\"}}",
            "{\"letter\":\"C\",\"attribute\":\"arrow direction\",\"claim\":{\"source\":\"expectation\",\"statement\":\"wrong direction\"},\"evidence\":{\"source\":\"figure_2\",\"statement\":\"wrong direction\"}}"
          ],
          "letters": ["D", "A", "B", "C"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"arrow\",\"target\":\"figure_2\",\"other_involved\":\"Opacity offset, 3DGS rasterizer\",\"action\":\"modify\",\"edit_statement\":\"reverse arrow direction\",\"reason\":\"reversed\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"arrow\",\"target\":\"figure_2\",\"other_involved\":\"Opacity offset, 3DGS rasterizer\",\"action\":\"add\",\"edit_statement\":\"add arrow\",\"reason\":\"missing\"}",
            "{\"letter\":\"B\",\"attribute\":\"Opacity offset box\",\"target\":\"figure_2\",\"other_involved\":\"Canonical Appearance box\",\"action\":\"reposition\",\"edit_statement\":\"reposition Opacity offset box\",\"reason\":\"incorrect order\"}",
            "{\"letter\":\"C\",\"attribute\":\"arrow\",\"target\":\"figure_2\",\"other_involved\":\"Opacity offset, Dynamic Texture, 3DGS rasterizer\",\"action\":\"modify\",\"edit_statement\":\"change arrow target\",\"reason\":\"incorrect target\"}"
          ],
          "letters": ["D", "A", "B", "C"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 2"]
    }
  ],
  "QjrC77Nyu6": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 7,
          "image_id": "QjrC77Nyu6_7_68b75b41",
          "bbox": {
            "x": 0.1704122829104965,
            "y": 0.7009869828358956,
            "width": 0.6637630662020906,
            "height": 0.22745625841184391
          }
        }
      ],
      "review_text": "Table 1: Why do some metrics report F1 scores while others report AUROC? This inconsistency in the reported metrics needs clarification.",
      "category": "table-only",
      "description": "The table shows F1 metric for one method and AUROC for the other two.",
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the table that is consistent with a different part of the table?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the table that is inconsistent with a different part of the table?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"secondary metric\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"not consistent\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"error margin\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"not consistent\"}}",
            "{\"letter\":\"C\",\"attribute\":\"highlighting\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"not consistent\"}}",
            "{\"letter\":\"B\",\"attribute\":\"highlighting\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"not consistent\"}}"
          ],
          "letters": ["D", "A", "C", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"secondary metric type\",\"target\":\"table_1\",\"other_involved\":\"MIT-AFIB, LT-AF, SVT\",\"action\":\"modify\",\"edit_statement\":\"align\",\"reason\":\"different\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"Accuracy values\",\"target\":\"table_1\",\"other_involved\":\"LT-AF, SVT\",\"action\":\"add\",\"edit_statement\":\"error margins\",\"reason\":\"missing\"}",
            "{\"letter\":\"C\",\"attribute\":\"bolded or underlined values\",\"target\":\"table_1\",\"other_involved\":\"downstream tasks\",\"action\":\"add\",\"edit_statement\":\"values\",\"reason\":\"missing\"}",
            "{\"letter\":\"B\",\"attribute\":\"bold and underlined values\",\"target\":\"table_1\",\"other_involved\":\"F1 for MIT-AFIB\",\"action\":\"modify\",\"edit_statement\":\"remove contradiction\",\"reason\":\"contradictory\"}"
          ],
          "letters": ["D", "A", "C", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 1"]
    }
  ],
  "QfGc9txfGO": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 7,
          "image_id": "QfGc9txfGO_7_cea309b9",
          "bbox": {
            "x": 0.17315175097276264,
            "y": 0.0980445218373494,
            "width": 0.6595330739299611,
            "height": 0.47740963855421686
          }
        },
        {
          "type": "image",
          "page": 9,
          "image_id": "QfGc9txfGO_9_9392fd6e",
          "bbox": {
            "x": 0.1704122829104965,
            "y": 0.2701884253028264,
            "width": 0.6620209059233448,
            "height": 0.15074024226110364
          }
        }
      ],
      "review_text": "Table 3: The reported result for the methods with Sup-21K is 47.47, but changes to 47.37 in Table 5, 6, and 7.",
      "category": "table-table",
      "description": "Table 3 shows Sup-21K performance on Split Aircrafts as 47.47, but in Table 6 it is 47.37",
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the first table consistent with the content of the second table?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the first table inconsistent with the content of the second table?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {
          "question": "QfGc9txfGO_7_cea309b9",
          "correct": "QfGc9txfGO_9_9392fd6e",
          "incorrect": [
            "QfGc9txfGO_9_table_table7",
            "QfGc9txfGO_8_table_table5",
            "QfGc9txfGO_7_table_table4"
          ],
          "letters": ["A", "B", "C", "D"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"performance\",\"claim\":{\"source\":\"table_3\",\"statement\":\"47.47\"},\"evidence\":{\"source\":\"table_6\",\"statement\":\"47.37\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"performance\",\"claim\":{\"source\":\"table_4\",\"statement\":\"73.90\"},\"evidence\":{\"source\":\"table_6\",\"statement\":\"different dataset\"}}",
            "{\"letter\":\"C\",\"attribute\":\"configuration\",\"claim\":{\"source\":\"table_4\",\"statement\":\"present\"},\"evidence\":{\"source\":\"table_6\",\"statement\":\"absent\"}}",
            "{\"letter\":\"D\",\"attribute\":\"scalability\",\"claim\":{\"source\":\"expectation\",\"statement\":\"consistent performance\"},\"evidence\":{\"source\":\"table_4\",\"statement\":\"inconsistent performance\"}}"
          ],
          "letters": ["B", "A", "C", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"performance\",\"target\":\"table_3\",\"other_involved\":\"table_6\",\"action\":\"modify\",\"edit_statement\":\"align value\",\"reason\":\"inconsistent value\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"HiDe-Prompt performance\",\"target\":\"table_4\",\"other_involved\":\"table_6\",\"action\":\"modify\",\"edit_statement\":\"align value\",\"reason\":\"inconsistent value\"}",
            "{\"letter\":\"C\",\"attribute\":\"'Ours' method performance\",\"target\":\"table_4\",\"other_involved\":\"table_6\",\"action\":\"add\",\"edit_statement\":\"add configuration\",\"reason\":\"missing configuration\"}",
            "{\"letter\":\"D\",\"attribute\":\"L2P performance\",\"target\":\"table_6\",\"other_involved\":\"table_4\",\"action\":\"modify\",\"edit_statement\":\"align value\",\"reason\":\"scalability issue\"}"
          ],
          "letters": ["B", "A", "C", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 3", "Table 6"]
    }
  ],
  "QYgtZRTv3e": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 4,
          "image_id": "QYgtZRTv3e_4_6f137405",
          "bbox": {
            "x": 0.16692796235300522,
            "y": 0.09860923633601112,
            "width": 0.6655052264808361,
            "height": 0.35262449528936746
          }
        },
        {
          "type": "text",
          "page": 7,
          "content": "To ensure a fair comparison, all methods use the ImageNet-21K pre-\ntrained VIT-B-16 (Dosovitskiy et al., 2020) as the backbone network. We optimise our model with a\nlearning rate of 0.001 and set the number of epochs to 20 using Adam (Kingma & Ba, 2014). We set\nthe pool size to 200 for all datasets, with the exception of CIFAR, which we set to 100. We set the\nsecond-level prompt M length to 4 and the first-level prompt m length for CLIP to 16. ",
          "line": 340
        }
      ],
      "review_text": "3) It is confusing that, in the experiments L340 it says using the Pre-Trained model on ImageNet-21K but on the Figure2 are all using CLIP models. Also, the performance results reported in Table 1 differ significantly from those in the original papers, such as CODA-Prompt, making it difficult to interpret the findings accurately.",
      "category": "figure-text",
      "description": "The text states that the author's method uses pre-trained VIT-B-16, but the Figure shows CLIP.",
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "To ensure a fair comparison, all methods use the ImageNet-21K pre-\ntrained VIT-B-16 (Dosovitskiy et al., 2020) as the backbone network. We optimise our model with a\nlearning rate of 0.001 and set the number of epochs to 20 using Adam (Kingma & Ba, 2014). We set\nthe pool size to 200 for all datasets, with the exception of CIFAR, which we set to 100. We set the\nsecond-level prompt M length to 4 and the first-level prompt m length for CLIP to 16. ",
          "correct": "QYgtZRTv3e_4_6f137405",
          "incorrect": [
            "QYgtZRTv3e_1_image_figure1",
            "QYgtZRTv3e_7_image_figure3",
            "QYgtZRTv3e_13_image_figure4"
          ],
          "letters": ["A", "C", "B", "D"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"backbone network\",\"claim\":{\"source\":\"text\",\"statement\":\"VIT-B-16\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"CLIP Encoders\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"Figure 2\",\"claim\":{\"source\":\"expectation\",\"statement\":\"described\"},\"evidence\":{\"source\":\"text\",\"statement\":\"not mentioned\"}}",
            "{\"letter\":\"C\",\"attribute\":\"prompt parameters\",\"claim\":{\"source\":\"expectation\",\"statement\":\"frozen\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"learnable\"}}",
            "{\"letter\":\"A\",\"attribute\":\"Figure\",\"claim\":{\"source\":\"expectation\",\"statement\":\"consistent\"},\"evidence\":{\"source\":\"Figure and caption\",\"statement\":\"swapped\"}}"
          ],
          "letters": ["D", "B", "C", "A"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"pre-trained backbone network\",\"target\":\"text\",\"other_involved\":\"figure_2\",\"action\":\"modify\",\"edit_statement\":\"network type\",\"reason\":\"mismatch\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"Multi-Head Self Attention Layers\",\"target\":\"text\",\"other_involved\":\"figure_2\",\"action\":\"add\",\"edit_statement\":\"module description\",\"reason\":\"missing\"}",
            "{\"letter\":\"C\",\"attribute\":\"Learnable Parameters\",\"target\":\"figure_2\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"parameters status\",\"reason\":\"contradiction\"}",
            "{\"letter\":\"A\",\"attribute\":\"parts\",\"target\":\"figure_2\",\"other_involved\":\"caption\",\"action\":\"swap\",\"edit_statement\":\"figure parts\",\"reason\":\"swapped\"}"
          ],
          "letters": ["D", "B", "C", "A"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 2"]
    }
  ],
  "QO4bF6MHza": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 22,
          "image_id": "QO4bF6MHza_22_7e55be21",
          "bbox": {
            "x": 0.16867012263175085,
            "y": 0.12608795371382908,
            "width": 0.6689895470383275,
            "height": 0.5531628532974429
          }
        }
      ],
      "review_text": "Line 1065: Figure 15: Examples of data for the Single-Step Single-Document (MSSD) task.\\nLine 1121: Figure 16: Examples of data for the Single-Step Single-Document (SSMD) task.\\nLine 1171: Figure 17: Examples of data for the Single-Step Single-Document (MSMD) task.\\nThe labels for Figures 16 and 17 seem to be inconsistent with the text description of the tasks.",
      "category": "figure-caption",
      "description": "The caption describes a 'Single-Step Single-Document' Task, but the figure shows a Multi-Step Multi-Document task.",
      "mcq": {
        "binary_consistent": {
          "question": "Is the caption of the figure consistent with the content of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the caption of the figure inconsistent with the content of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"task type\",\"claim\":{\"source\":\"caption\",\"statement\":\"Single Step\"},\"evidence\":{\"source\":\"figure_17\",\"statement\":\"Multi Step\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"title\",\"claim\":{\"source\":\"caption\",\"statement\":\"Single Document\"},\"evidence\":{\"source\":\"figure\",\"statement\":\"MSMD\"}}",
            "{\"letter\":\"B\",\"attribute\":\"numerical answers\",\"claim\":{\"source\":\"caption\",\"statement\":\"correct\"},\"evidence\":{\"source\":\"relevant documents\",\"statement\":\"incorrect\"}}",
            "{\"letter\":\"C\",\"attribute\":\"relevant documents\",\"claim\":{\"source\":\"expectation\",\"statement\":\"relevant to question\"},\"evidence\":{\"source\":\"relevant documents\",\"statement\":\"irrelevant\"}}"
          ],
          "letters": ["D", "A", "B", "C"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"task type\",\"target\":\"caption_figure_17\",\"other_involved\":\"figure_17\",\"action\":\"modify\",\"edit_statement\":\"update description\",\"reason\":\"misleading\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"title\",\"target\":\"figure_17\",\"other_involved\":\"questions\",\"action\":\"modify\",\"edit_statement\":\"update task type\",\"reason\":\"contradictory\"}",
            "{\"letter\":\"B\",\"attribute\":\"numerical answers\",\"target\":\"figure_17\",\"other_involved\":\"relevant_documents\",\"action\":\"modify\",\"edit_statement\":\"correct calculations\",\"reason\":\"inaccurate\"}",
            "{\"letter\":\"C\",\"attribute\":\"information\",\"target\":\"relevant_document_sections\",\"other_involved\":\"questions\",\"action\":\"remove\",\"edit_statement\":\"irrelevant data\",\"reason\":\"flawed\"}"
          ],
          "letters": ["D", "A", "B", "C"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 17"]
    }
  ],
  "Pnr8XNWcY0": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 6,
          "image_id": "Pnr8XNWcY0_6_bb5e958e",
          "bbox": {
            "x": 0.16518580207425956,
            "y": 0.10980257468140142,
            "width": 0.6707317073170731,
            "height": 0.39030955585464333
          }
        }
      ],
      "review_text": "Table 3: Inconsistent representation of units. Some tasks show accuracy without the percentage sign (e.g., '0.87' instead of '87%').",
      "category": "table-only",
      "description": "Some of the accuracies are reported as percentages, some are missing the % symbol.",
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the table that is consistent with a different part of the table?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the table that is inconsistent with a different part of the table?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"percentage representation\",\"claim\":{\"source\":\"expectation\",\"statement\":\"include '%' symbol\"},\"evidence\":{\"source\":\"table_3\",\"statement\":\"missing '%' symbol\"}}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"percentage symbol\",\"claim\":{\"source\":\"expectation\",\"statement\":\"consistent symbol usage\"},\"evidence\":{\"source\":\"table_3\",\"statement\":\"consistent '%' symbol\"}}",
            "{\"letter\":\"A\",\"attribute\":\"decimal points\",\"claim\":{\"source\":\"expectation\",\"statement\":\"consistent decimal points\"},\"evidence\":{\"source\":\"table_3\",\"statement\":\"inconsistent decimal points\"}}",
            "{\"letter\":\"B\",\"attribute\":\"L5 level\",\"claim\":{\"source\":\"expectation\",\"statement\":\"consistent representation\"},\"evidence\":{\"source\":\"table_3\",\"statement\":\"percentages\"}}"
          ],
          "letters": ["D", "C", "A", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"% symbol\",\"target\":\"Table 3\",\"other_involved\":null,\"action\":\"add\",\"edit_statement\":\"add symbol\",\"reason\":\"missing\"}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"percentage values\",\"target\":\"Table 3\",\"other_involved\":\"explanation below table\",\"action\":\"modify\",\"edit_statement\":\"align\",\"reason\":\"claims decimal\"}",
            "{\"letter\":\"A\",\"attribute\":\"decimal points\",\"target\":\"Table 3\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"align\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"B\",\"attribute\":\"L5 values\",\"target\":\"Table 3\",\"other_involved\":\"footnote\",\"action\":\"modify\",\"edit_statement\":\"align\",\"reason\":\"contradicts\"}"
          ],
          "letters": ["D", "C", "A", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 3"]
    }
  ],
  "PdDm14eXO4": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 4,
          "image_id": "PdDm14eXO4_4_3dec2bd8",
          "bbox": {
            "x": 0.16518580207425956,
            "y": 0.09625392005068137,
            "width": 0.6689895470383275,
            "height": 0.32974427994616423
          }
        },
        {
          "type": "text",
          "page": 4,
          "content": "n contrast, the audio-visual corresponding samples exhibit a left-skewed\ndistribution with a higher concentration of similar instances. When the similarity of samples exceeds\nthe threshold \u03bc + 3\u03c3 (0.2564) of the audio-visual non-corresponding distribution Nnon-corresponding,\nonly 0.135% of the samples remain; thus, exceeding this threshold can be considered indicative of\naudio-visual correspondence. Notably, only 35% of the randomly selected wild data samples exhibit\nsimilarities below the \u03bc + 3\u03c3 (0.2564) threshold of the distribution Nnon\u2212corresponding .",
          "line": 203
        }
      ],
      "review_text": "Point 10 and Figure 1: Inconsistent ratios for AudioSet (65% vs 35%)",
      "category": "figure-text",
      "description": "The text states that only 35% samples exhibit similarities below the threshold, but the caption of the figure lists 65%.",
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "n contrast, the audio-visual corresponding samples exhibit a left-skewed\ndistribution with a higher concentration of similar instances. When the similarity of samples exceeds\nthe threshold \u03bc + 3\u03c3 (0.2564) of the audio-visual non-corresponding distribution Nnon-corresponding,\nonly 0.135% of the samples remain; thus, exceeding this threshold can be considered indicative of\naudio-visual correspondence. Notably, only 35% of the randomly selected wild data samples exhibit\nsimilarities below the \u03bc + 3\u03c3 (0.2564) threshold of the distribution Nnon\u2212corresponding .",
          "correct": "PdDm14eXO4_4_3dec2bd8",
          "incorrect": [
            "PdDm14eXO4_4_image_figure2",
            "PdDm14eXO4_5_image_figure3",
            "PdDm14eXO4_6_image_figure4"
          ],
          "letters": ["D", "B", "A", "C"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"similarities below threshold\",\"claim\":{\"source\":\"caption\",\"statement\":\"65%\"},\"evidence\":{\"source\":\"text\",\"statement\":\"35%\"}}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"similarities below threshold\",\"claim\":{\"source\":\"caption\",\"statement\":\"18%\"},\"evidence\":{\"source\":\"text\",\"statement\":\"35%\"}}",
            "{\"letter\":\"A\",\"attribute\":\"cumulative non-corresponding data\",\"claim\":{\"source\":\"text\",\"statement\":\"0.135%\"},\"evidence\":{\"source\":\"figure\",\"statement\":\"0.99%\"}}",
            "{\"letter\":\"B\",\"attribute\":\"\u03bc+3\u03c3 threshold\",\"claim\":{\"source\":\"caption\",\"statement\":\"0.2654\"},\"evidence\":{\"source\":\"text\",\"statement\":\"0.2564\"}}"
          ],
          "letters": ["C", "D", "A", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"randomly selected wild samples\",\"target\":\"figure_caption\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"update percentage\",\"reason\":\"conflict\"}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"audio-visual non-corresponding samples\",\"target\":\"figure_caption\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"update percentage\",\"reason\":\"conflict\"}",
            "{\"letter\":\"A\",\"attribute\":\"non-corresponding samples\",\"target\":\"text\",\"other_involved\":\"figure\",\"action\":\"modify\",\"edit_statement\":\"update percentage\",\"reason\":\"conflict\"}",
            "{\"letter\":\"B\",\"attribute\":\"\u03bc+3\u03c3 threshold\",\"target\":\"figure_caption\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align value\",\"reason\":\"different\"}"
          ],
          "letters": ["C", "D", "A", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 1"]
    }
  ],
  "PN4f0hnI0U": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 10,
          "image_id": "PN4f0hnI0U_10_01158875",
          "bbox": {
            "x": 0.16867012263175085,
            "y": 0.18891884147880217,
            "width": 0.6672473867595818,
            "height": 0.10497981157469718
          }
        },
        {
          "type": "text",
          "page": 10,
          "content": "Again, we observe that across different configurations,\nsimilar average Dice and HD95 scores are obtained by increasing the number of blocks, indicating\nthat our method is insensitive to the number of transformer blocks. However, further increasing the\nnumber of blocks will increase the computational cost. Therefore, in our experiments, we set the\nnumber of blocks to 2 by default.",
          "line": 486
        }
      ],
      "review_text": "Table 2: Using one block appears to achieve the best performance when considering both Dice and HD95 metrics, but the authors chose to use 2 blocks instead.",
      "category": "table-text",
      "description": "The table shows that block size 1 achieve the best results considering all metrics, but the text says the author chose block size 2 to keep computational costs low, but block size 1 would be even better in terms of computational costs.",
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the table consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the table inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {
          "question": "Again, we observe that across different configurations,\nsimilar average Dice and HD95 scores are obtained by increasing the number of blocks, indicating\nthat our method is insensitive to the number of transformer blocks. However, further increasing the\nnumber of blocks will increase the computational cost. Therefore, in our experiments, we set the\nnumber of blocks to 2 by default.",
          "correct": "PN4f0hnI0U_10_01158875",
          "incorrect": [
            "PN4f0hnI0U_9_table_table3",
            "PN4f0hnI0U_9_table_table4",
            "PN4f0hnI0U_8_table_table1"
          ],
          "letters": ["D", "A", "B", "C"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"block size\",\"claim\":{\"source\":\"expectation\",\"statement\":\"block size 2 not optimal\"},\"evidence\":{\"source\":\"Table 2\",\"statement\":\"block size 1 better or equal\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"block size\",\"claim\":{\"source\":\"expectation\",\"statement\":\"optimal performance\"},\"evidence\":{\"source\":\"Table 2\",\"statement\":\"block size 2 not optimal\"}}",
            "{\"letter\":\"B\",\"attribute\":\"number of blocks\",\"claim\":{\"source\":\"text\",\"statement\":\"insensitive to number of blocks\"},\"evidence\":{\"source\":\"Table 2\",\"statement\":\"degradation in performance\"}}",
            "{\"letter\":\"C\",\"attribute\":\"computational cost\",\"claim\":{\"source\":\"text\",\"statement\":\"increases with number of blocks\"},\"evidence\":{\"source\":\"Table 2\",\"statement\":\"cost goes down\"}}"
          ],
          "letters": ["D", "A", "B", "C"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"optimal performance block_size\",\"target\":\"text\",\"other_involved\":\"table_2\",\"action\":\"modify\",\"edit_statement\":\"choose best cost-performance block size\",\"reason\":\"not optimal block size chosen\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"optimal performance block_size\",\"target\":\"table_2\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"choose block 3\",\"reason\":\"block 2 chosen\"}",
            "{\"letter\":\"B\",\"attribute\":\"performance insensitive to block size\",\"target\":\"text\",\"other_involved\":\"table_2\",\"action\":\"modify\",\"edit_statement\":\"align text with table\",\"reason\":\"contradiction\"}",
            "{\"letter\":\"C\",\"attribute\":\"computational_cost descreasing\",\"target\":\"table_2\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"show increase\",\"reason\":\"contradiction\"}"
          ],
          "letters": ["D", "A", "B", "C"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 2"]
    }
  ],
  "OzwGZP8h2A": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 4,
          "image_id": "OzwGZP8h2A_4_ba512e8d",
          "bbox": {
            "x": 0.5571718647920296,
            "y": 0.48153879568682706,
            "width": 0.27177700348432055,
            "height": 0.14535666218034995
          }
        },
        {
          "type": "text",
          "page": 4,
          "content": "As illustrated in Figure 3, ab is a logical sharing\nnode that appears twice in the expressions and can be shared as a node in the final circuit.",
          "line": 200
        }
      ],
      "review_text": "Figure 3: The figure does not match its explanation in the text (line 200).",
      "category": "figure-text",
      "description": "The text description says Figure 3 shows ab as a logical sharing node that appears twice in the expressions, but Figure 3 does not show that or any references to 'ab'.",
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "As illustrated in Figure 3, ab is a logical sharing\nnode that appears twice in the expressions and can be shared as a node in the final circuit.",
          "correct": "OzwGZP8h2A_4_ba512e8d",
          "incorrect": [
            "OzwGZP8h2A_4_image_figure4",
            "OzwGZP8h2A_3_image_figure2",
            "OzwGZP8h2A_5_image_figure5"
          ],
          "letters": ["A", "C", "B", "D"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"node 'ab'\",\"claim\":{\"source\":\"text\",\"statement\":\"exists\"},\"evidence\":{\"source\":\"Figure 3\",\"statement\":\"absent\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"total length\",\"claim\":{\"source\":\"expectation\",\"statement\":\"less than 10\"},\"evidence\":{\"source\":\"Figure 3\",\"statement\":\"equal to 10\"}}",
            "{\"letter\":\"D\",\"attribute\":\"node 'ab'\",\"claim\":{\"source\":\"text\",\"statement\":\"shared\"},\"evidence\":{\"source\":\"Figure 3\",\"statement\":\"not shared\"}}",
            "{\"letter\":\"C\",\"attribute\":\"shared node\",\"claim\":{\"source\":\"text\",\"statement\":\"'ab'\"},\"evidence\":{\"source\":\"Figure 3\",\"statement\":\"x1 and x4\"}}"
          ],
          "letters": ["B", "A", "D", "C"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"logical sharing node 'ab'\",\"target\":\"figure_3\",\"other_involved\":\"text\",\"action\":\"add\",\"edit_statement\":\"add node 'ab'\",\"reason\":\"omitted\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"total length\",\"target\":\"figure_3\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"reduce length\",\"reason\":\"sharing\"}",
            "{\"letter\":\"D\",\"attribute\":\"shared node 'ab'\",\"target\":\"figure_3\",\"other_involved\":\"text\",\"action\":\"add\",\"edit_statement\":\"add 'ab' sharing\",\"reason\":\"missing\"}",
            "{\"letter\":\"C\",\"attribute\":\"shared node\",\"target\":\"figure_3\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"change shared node\",\"reason\":\"mismatch\"}"
          ],
          "letters": ["B", "A", "D", "C"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 3"]
    }
  ],
  "OuxdVB6g1F": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 4,
          "image_id": "OuxdVB6g1F_4_4840fb3c",
          "bbox": {
            "x": 0.14253771845056618,
            "y": 0.11123825627733851,
            "width": 0.7195121951219512,
            "height": 0.3122476446837147
          }
        },
        {
          "type": "text",
          "page": 4,
          "content": "In Section 4.2, we propose the Graph2Text module that can information loselessly transform the Graph-of-Text view to Text-of-Graph view.",
          "line": 170
        }
      ],
      "review_text": "Figure 2 caption: The caption mentions 'Graph-of-Text view' twice, which is inconsistent with the expected caption format.",
      "category": "figure-text",
      "description": "The text defines Graph2Text to transform Graph-of-Text into Text-of-Graph, but the caption of Figure 2 shows a different transformation.",
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {
          "question": "In Section 4.2, we propose the Graph2Text module that can information loselessly transform the Graph-of-Text view to Text-of-Graph view.",
          "correct": "OuxdVB6g1F_4_4840fb3c",
          "incorrect": [
            "OuxdVB6g1F_1_image_figure1",
            "OuxdVB6g1F_9_table_figure3",
            "OuxdVB6g1F_9_image_figure4"
          ],
          "letters": ["B", "A", "C", "D"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"Graph2Text definition\",\"claim\":{\"source\":\"caption\",\"statement\":\"Graph-of-Text to Graph-of-Text\"},\"evidence\":{\"source\":\"text\",\"statement\":\"Graph-of-Text to Text-of-Graph\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"Graph2Text name\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistently named\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"different name\"}}",
            "{\"letter\":\"C\",\"attribute\":\"Graph2Text input\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be graph view\"},\"evidence\":{\"source\":\"Figure 2(b)\",\"statement\":\"BFS Tree\"}}",
            "{\"letter\":\"D\",\"attribute\":\"Graph2Text transformation\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be lossless\"},\"evidence\":{\"source\":\"Figure 2(b)\",\"statement\":\"Preorder Traversal\"}}"
          ],
          "letters": ["A", "B", "C", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"transformation\",\"target\":\"caption_figure_2b\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"update view transformation\",\"reason\":\"mismatch\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"method name\",\"target\":\"figure_2\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align method name\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"C\",\"attribute\":\"input\",\"target\":\"figure_2b\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"clarify module input\",\"reason\":\"linear representation\"}",
            "{\"letter\":\"D\",\"attribute\":\"transformation type\",\"target\":\"figure_2b\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"explain lossless transformation\",\"reason\":\"not lossless\"}"
          ],
          "letters": ["A", "B", "C", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 2"]
    }
  ],
  "OIhON8zd8d": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 4,
          "image_id": "OIhON8zd8d_4_1d62a48d",
          "bbox": {
            "x": 0.16692796235300522,
            "y": 0.09122923983218371,
            "width": 0.6689895470383275,
            "height": 0.19246298788694485
          }
        },
        {
          "type": "text",
          "page": 3,
          "content": "We thus formulate the inversion as training an encoder\u2013decoder model. The encoder turns input point clouds into an ECT, whereas the decoder aims to reconstruct a point cloud from an ECT. ",
          "line": 160
        }
      ],
      "review_text": "Figure 1: The purpose of the 'encoder' does not match the text in L161.",
      "category": "figure-text",
      "description": "The role of the encoder is contradictory in the figure and the text.",
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {
          "question": "We thus formulate the inversion as training an encoder\u2013decoder model. The encoder turns input point clouds into an ECT, whereas the decoder aims to reconstruct a point cloud from an ECT. ",
          "correct": "OIhON8zd8d_4_1d62a48d",
          "incorrect": [
            "OIhON8zd8d_5_image_figure2",
            "OIhON8zd8d_5_image_figure3",
            "OIhON8zd8d_7_image_figure4"
          ],
          "letters": ["D", "B", "A", "C"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"encoder output\",\"claim\":{\"source\":\"text\",\"statement\":\"outputs ECT\"},\"evidence\":{\"source\":\"Figure 1\",\"statement\":\"outputs reconstructed point cloud\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"input to Encoder\",\"claim\":{\"source\":\"caption\",\"statement\":\"ECT is computed first\"},\"evidence\":{\"source\":\"Figure 1\",\"statement\":\"initial point cloud is input\"}}",
            "{\"letter\":\"D\",\"attribute\":\"VAE function\",\"claim\":{\"source\":\"expectation\",\"statement\":\"VAE encodes point cloud\"},\"evidence\":{\"source\":\"Figure 1\",\"statement\":\"VAE handles abstract shapes\"}}",
            "{\"letter\":\"B\",\"attribute\":\"decoder component\",\"claim\":{\"source\":\"expectation\",\"statement\":\"decoder reconstructs point cloud\"},\"evidence\":{\"source\":\"Figure 1\",\"statement\":\"ECT is input to VAE\"}}"
          ],
          "letters": ["C", "A", "D", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"encoder output\",\"target\":\"figure_1\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align output element\",\"reason\":\"contradiction\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"encoder input\",\"target\":\"figure_1\",\"other_involved\":\"caption\",\"action\":\"modify\",\"edit_statement\":\"align input type\",\"reason\":\"contradiction\"}",
            "{\"letter\":\"D\",\"attribute\":\"VAE input\",\"target\":\"figure_1\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align input element\",\"reason\":\"contradiction\"}",
            "{\"letter\":\"B\",\"attribute\":\"decoder input\",\"target\":\"figure_1\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align input element\",\"reason\":\"contradiction\"}"
          ],
          "letters": ["C", "A", "D", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 1"]
    }
  ],
  "O2CG9B2k9Q": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 8,
          "image_id": "O2CG9B2k9Q_8_c2e7f4d9",
          "bbox": {
            "x": 0.16867012263175085,
            "y": 0.11175413889005721,
            "width": 0.6637630662020906,
            "height": 0.31359353970390313
          }
        },
        {
          "type": "image",
          "page": 8,
          "image_id": "O2CG9B2k9Q_8_7f725fab",
          "bbox": {
            "x": 0.16867012263175085,
            "y": 0.4462987558357167,
            "width": 0.6620209059233448,
            "height": 0.3310901749663526
          }
        }
      ],
      "review_text": "b) Inconsistency in X-axis scales: Figure 4 uses a linear scale, while Figure 5 uses a logarithmic one.",
      "category": "figure-figure",
      "description": "Figure 4 shows a linear scaling for D-FLD, but Figure 5 has a logarithmic scale for FLD",
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the first figure consistent with the content of the second figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the first figure inconsistent with the content of the second figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "O2CG9B2k9Q_8_c2e7f4d9",
          "correct": "O2CG9B2k9Q_8_7f725fab",
          "incorrect": [
            "O2CG9B2k9Q_8_image_figure7",
            "O2CG9B2k9Q_8_image_figure6",
            "O2CG9B2k9Q_7_image_figure4"
          ],
          "letters": ["A", "D", "B", "C"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"scale\",\"claim\":{\"source\":\"expectation\",\"statement\":\"inconsistent scale\"},\"evidence\":{\"source\":\"Figure 4 and Figure 5\",\"statement\":\"different scales\"}}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"scale\",\"claim\":{\"source\":\"expectation\",\"statement\":\"inconsistent scale\"},\"evidence\":{\"source\":\"Figure 4 and Figure 5\",\"statement\":\"different scales\"}}",
            "{\"letter\":\"A\",\"attribute\":\"scale\",\"claim\":{\"source\":\"expectation\",\"statement\":\"same scale\"},\"evidence\":{\"source\":\"figures\",\"statement\":\"different scales\"}}",
            "{\"letter\":\"C\",\"attribute\":\"scale\",\"claim\":{\"source\":\"expectation\",\"statement\":\"consistent scale\"},\"evidence\":{\"source\":\"Figure 4 and Figure 5\",\"statement\":\"different scales for FID\"}}"
          ],
          "letters": ["B", "D", "A", "C"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"scale\",\"target\":\"figure_5\",\"other_involved\":\"figure_4\",\"action\":\"modify\",\"edit_statement\":\"logarithmic D-FLD\",\"reason\":\"inconsistent\"}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"scale\",\"target\":\"figure_4\",\"other_involved\":\"figure_5\",\"action\":\"modify\",\"edit_statement\":\"logarithmic D-FLD\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"A\",\"attribute\":\"lines\",\"target\":\"figure_4\",\"other_involved\":\"figure_5\",\"action\":\"modify\",\"edit_statement\":\"align scale\",\"reason\":\"different\"}",
            "{\"letter\":\"C\",\"attribute\":\"scale\",\"target\":\"figure_5\",\"other_involved\":\"figure_4\",\"action\":\"modify\",\"edit_statement\":\"logarithmic FID\",\"reason\":\"inconsistent\"}"
          ],
          "letters": ["B", "D", "A", "C"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 4", "Figure 5"]
    }
  ],
  "NK09Bcvuxl": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 6,
          "image_id": "NK09Bcvuxl_6_4ebe11ce",
          "bbox": {
            "x": 0.4485866155725486,
            "y": 0.6081512117646432,
            "width": 0.3904593639575971,
            "height": 0.32103825136612024
          }
        },
        {
          "type": "text",
          "page": 4,
          "content": "At acquisition round t, suppose we have labeled set Lt\u22121 and unlabeled set Ut\u22121 as the results\nfrom the previous round t \u2212 1, and new sample xi \u2208 Ut\u22121 that is currently under consideration for\nacquisition, the goal of this section is to estimate the parameters of model ft|xi,yi that could has been\nobtained after training ft\u22121 on the combined dataset {Lt\u22121 \u222a xi}.",
          "line": 180
        }
      ],
      "review_text": "Page 6, Algorithm 1 description, line 5: 'randomly sample n_ivp from unlabeled data U_i,t' this is wrong from the description in page 4, 183 line. We should sample labeled data instead of unlabeled data to update the model parameter (see line 227) But U_i,t from the description is the unlabeled data",
      "category": "algorithm-only",
      "description": "The algorithm at step 5 states randomly sample n_ivp from unlabeled data U_i, but the text states that we should sample from labeled data L_i",
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the algorithm that is consistent with a different part of the algorithm?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the algorithm that is inconsistent with a different part of the algorithm?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {
          "question": "At acquisition round t, suppose we have labeled set Lt\u22121 and unlabeled set Ut\u22121 as the results\nfrom the previous round t \u2212 1, and new sample xi \u2208 Ut\u22121 that is currently under consideration for\nacquisition, the goal of this section is to estimate the parameters of model ft|xi,yi that could has been\nobtained after training ft\u22121 on the combined dataset {Lt\u22121 \u222a xi}.",
          "correct": "NK09Bcvuxl_6_4ebe11ce",
          "incorrect": [
            "NK09Bcvuxl_6_interline-equation_equation3",
            "NK09Bcvuxl_6_interline-equation_equation16",
            "NK09Bcvuxl_6_interline-equation_equation8"
          ],
          "letters": ["A", "C", "B", "D"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"sampling source\",\"claim\":{\"source\":\"text\",\"statement\":\"labeled set\"},\"evidence\":{\"source\":\"Algorithm 1\",\"statement\":\"unlabeled set\"}}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"sampling source\",\"claim\":{\"source\":\"text\",\"statement\":\"L_t-1\"},\"evidence\":{\"source\":\"Algorithm 1\",\"statement\":\"U_t,i\"}}",
            "{\"letter\":\"C\",\"attribute\":\"input usage\",\"claim\":{\"source\":\"expectation\",\"statement\":\"use U_t-1\"},\"evidence\":{\"source\":\"Algorithm 1\",\"statement\":\"not used\"}}",
            "{\"letter\":\"B\",\"attribute\":\"training data\",\"claim\":{\"source\":\"expectation\",\"statement\":\"train on L_t-1 \u222a x_i\"},\"evidence\":{\"source\":\"algorithm\",\"statement\":\"not shown\"}}"
          ],
          "letters": ["A", "D", "C", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"data points\",\"target\":\"Algorithm_1\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"update source\",\"reason\":\"inconsistent\"}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"samples source\",\"target\":\"Algorithm_1\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"update source\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"C\",\"attribute\":\"input U_t-1\",\"target\":\"Algorithm_1\",\"other_involved\":\"L_t-1 set\",\"action\":\"remove\",\"edit_statement\":\"redundant input\",\"reason\":\"not used\"}",
            "{\"letter\":\"B\",\"attribute\":\"training process\",\"target\":\"Algorithm_1\",\"other_involved\":\"text\",\"action\":\"add\",\"edit_statement\":\"explicitly show\",\"reason\":\"missing\"}"
          ],
          "letters": ["A", "D", "C", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Algorithm 1"]
    }
  ],
  "N5qFgohx9u": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 25,
          "image_id": "N5qFgohx9u_25_a17fa590",
          "bbox": {
            "x": 0.16766788765735421,
            "y": 0.10487248988750855,
            "width": 0.6625441696113074,
            "height": 0.523224043715847
          }
        },
        {
          "type": "text",
          "page": 9,
          "content": "As seen in the attention heatmaps in Fig. 8 in the Appendix, vanilla attention almost\nnever assigns zero attention score to a token pair. In contrast, M\u00f6biusAttention gives most of the\npairs zero score and only a few a non-zero one",
          "line": 460
        }
      ],
      "review_text": "Figure 8: The argument 'Fig. 8 in the Appendix, vanilla attention almost never assigns zero attention score to a token pair. In contrast, M\u00f6biusAttention gives most of the pairs zero score and only a few a non-zero one' seems not supported by the results. In Figure 8, the average number of zero elements in Vanilla head (a~f) is 11.17, while M\u00f6bius head (g~l) is 9.67. More importantly, the attention matrix of M\u00f6biusAttention seems almost uniform.",
      "category": "figure-text",
      "description": "The text states the vanilla attention almost never assigns zero attention score, but M\u00f6biusAttention gives most of the pairs zero score is contradicted by the Figure, showing on average M\u00f6biusAttention gives less zero attention score.",
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "As seen in the attention heatmaps in Fig. 8 in the Appendix, vanilla attention almost\nnever assigns zero attention score to a token pair. In contrast, M\u00f6biusAttention gives most of the\npairs zero score and only a few a non-zero one",
          "correct": "N5qFgohx9u_25_a17fa590",
          "incorrect": [
            "N5qFgohx9u_23_image_figure7",
            "N5qFgohx9u_22_image_figure5",
            "N5qFgohx9u_22_image_figure6"
          ],
          "letters": ["C", "A", "D", "B"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"average attention scores\",\"claim\":{\"source\":\"text\",\"statement\":\"M\u00f6biusAttention gives most pairs zero\"},\"evidence\":{\"source\":\"Figure 8\",\"statement\":\"fewer zero scores in M\u00f6biusAttention\"}}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"vanilla attention scores\",\"claim\":{\"source\":\"expectation\",\"statement\":\"shouldn't be predominantly zero\"},\"evidence\":{\"source\":\"Figure 8(m)\",\"statement\":\"predominantly zero\"}}",
            "{\"letter\":\"A\",\"attribute\":\"attention scores\",\"claim\":{\"source\":\"text\",\"statement\":\"predominantly zero\"},\"evidence\":{\"source\":\"Figure 8\",\"statement\":\"mix of zero and non-zero\"}}",
            "{\"letter\":\"D\",\"attribute\":\"attention scores\",\"claim\":{\"source\":\"expectation\",\"statement\":\"shouldn't be mostly zero for vanilla\"},\"evidence\":{\"source\":\"Figure 8(m)\",\"statement\":\"mostly zero for vanilla\"}}"
          ],
          "letters": ["B", "C", "A", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"attention scores\",\"target\":\"figure_8n\",\"other_involved\":\"text, figure_8m\",\"action\":\"modify\",\"edit_statement\":\"show more zeros\",\"reason\":\"inconsistent\"}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"attention scores\",\"target\":\"text\",\"other_involved\":\"figure_8m\",\"action\":\"modify\",\"edit_statement\":\"update description\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"A\",\"attribute\":\"attention heads scores\",\"target\":\"text\",\"other_involved\":\"figure_8a, figure_8k\",\"action\":\"modify\",\"edit_statement\":\"update scores\",\"reason\":\"contradictory\"}",
            "{\"letter\":\"D\",\"attribute\":\"attention scores\",\"target\":\"text\",\"other_involved\":\"figure_8\",\"action\":\"modify\",\"edit_statement\":\"update description\",\"reason\":\"contradictory\"}"
          ],
          "letters": ["B", "C", "A", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 8"]
    }
  ],
  "N18Z2MkMEa": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 5,
          "image_id": "N18Z2MkMEa_5_88ba7555",
          "bbox": {
            "x": 0.3284452728163648,
            "y": 0.20282332623591193,
            "width": 0.5053003533568904,
            "height": 0.05191256830601093
          }
        },
        {
          "type": "image",
          "page": 6,
          "image_id": "N18Z2MkMEa_6_b106f2c0",
          "bbox": {
            "x": 0.33197884172095843,
            "y": 0.5020036541047644,
            "width": 0.5035335689045937,
            "height": 0.05191256830601093
          }
        }
      ],
      "review_text": "Eq(2) and Eq(8): These equations are the same, but they are presented as different equations.",
      "category": "equation-equation",
      "description": "Equation (2) and (8) are identical, besides the fact they are denoted as different and appear in different parts of the text.",
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the first equation consistent with the content of the second equation?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the first equation inconsistent with the content of the second equation?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "N18Z2MkMEa_5_88ba7555",
          "correct": "N18Z2MkMEa_6_b106f2c0",
          "incorrect": [
            "N18Z2MkMEa_5_interline-equation_equation31.5",
            "N18Z2MkMEa_5_interline-equation_equation24",
            "N18Z2MkMEa_5_interline-equation_equation17"
          ],
          "letters": ["B", "A", "D", "C"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"equation identity\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be different\"},\"evidence\":{\"source\":\"(2), (8)\",\"statement\":\"are identical\"}}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"sign presence\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be same\"},\"evidence\":{\"source\":\"(2), (8)\",\"statement\":\"are different\"}}",
            "{\"letter\":\"B\",\"attribute\":\"summation limits\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be same\"},\"evidence\":{\"source\":\"(2), (8)\",\"statement\":\"are different\"}}",
            "{\"letter\":\"D\",\"attribute\":\"term presence\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be same\"},\"evidence\":{\"source\":\"(2), (8)\",\"statement\":\"are different\"}}"
          ],
          "letters": ["A", "C", "B", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"equation content\",\"target\":\"equation_2\",\"other_involved\":\"equation_8\",\"action\":\"modify\",\"edit_statement\":\"distinguish content\",\"reason\":\"identical\"}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"negative sign\",\"target\":\"equation_2\",\"other_involved\":\"equation_8\",\"action\":\"remove\",\"edit_statement\":\"sign\",\"reason\":\"absent in (8)\"}",
            "{\"letter\":\"B\",\"attribute\":\"summation limits\",\"target\":\"equation_2\",\"other_involved\":\"equation_8\",\"action\":\"modify\",\"edit_statement\":\"align limits\",\"reason\":\"different\"}",
            "{\"letter\":\"D\",\"attribute\":\"additional term\",\"target\":\"equation_8\",\"other_involved\":\"equation_2\",\"action\":\"remove\",\"edit_statement\":\"term\",\"reason\":\"term not found in (2)\"}"
          ],
          "letters": ["A", "C", "B", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["(2)", "(8)"]
    }
  ],
  "MsAglk31tQ": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 10,
          "image_id": "MsAglk31tQ_10_340cf2f0",
          "bbox": {
            "x": 0.1659011032050574,
            "y": 0.09783702079064209,
            "width": 0.6749116607773851,
            "height": 0.21038251366120223
          }
        },
        {
          "type": "text",
          "page": 10,
          "content": "We consider the following domain-centric baselines. (Image) For image\ndata, we consider three segmentation methods (Kim et al., 2024). Patches (Dosovitskiy et al., 2021)\ndivides the image into grids where each cell is the same size. Quickshift (Grady, 2006) connects\nsimilar neighboring pixels into a common superpixel. Watershed (Levner & Zhang, 2007) simulates\nflooding on a topographic surface. CRAFT (Fel et al., 2023) generates concept attribution maps",
          "line": 477
        }
      ],
      "review_text": "Line 478: The paper mentions 'three segmentation methods' but lists four: 'Patches', 'Quickshift', 'Watershed', and 'CRAFT'. Additionally, Table 2 includes an extra method, 'SAM', which is not cited.",
      "category": "table-text",
      "description": "The text says three segmentation methods, lists four methods and the table shows SAM additionally, so five.",
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the table consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the table inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "We consider the following domain-centric baselines. (Image) For image\ndata, we consider three segmentation methods (Kim et al., 2024). Patches (Dosovitskiy et al., 2021)\ndivides the image into grids where each cell is the same size. Quickshift (Grady, 2006) connects\nsimilar neighboring pixels into a common superpixel. Watershed (Levner & Zhang, 2007) simulates\nflooding on a topographic surface. CRAFT (Fel et al., 2023) generates concept attribution maps",
          "correct": "MsAglk31tQ_10_340cf2f0",
          "incorrect": [
            "MsAglk31tQ_24_table_table4",
            "MsAglk31tQ_8_image_figure6",
            "MsAglk31tQ_7_image_figure5"
          ],
          "letters": ["A", "B", "C", "D"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"segmentation methods\",\"claim\":{\"source\":\"expectation\",\"statement\":\"three methods\"},\"evidence\":{\"source\":\"text\",\"statement\":\"four methods\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"segmentation methods\",\"claim\":{\"source\":\"text\",\"statement\":\"three methods\"},\"evidence\":{\"source\":\"Table 2\",\"statement\":\"two entries\"}}",
            "{\"letter\":\"D\",\"attribute\":\"segmentation methods\",\"claim\":{\"source\":\"expectation\",\"statement\":\"four methods\"},\"evidence\":{\"source\":\"Table 2\",\"statement\":\"omits one method\"}}",
            "{\"letter\":\"B\",\"attribute\":\"segmentation methods\",\"claim\":{\"source\":\"text\",\"statement\":\"three methods\"},\"evidence\":{\"source\":\"Table 2\",\"statement\":\"six methods\"}}"
          ],
          "letters": ["C", "A", "D", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"segmentation methods\",\"target\":\"table_2\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align count\",\"reason\":\"different numbers\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"segmentation methods\",\"target\":\"table_2\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"add entries\",\"reason\":\"missing\"}",
            "{\"letter\":\"D\",\"attribute\":\"segmentation methods\",\"target\":\"table_2\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"harmonize methods\",\"reason\":\"mismatch\"}",
            "{\"letter\":\"B\",\"attribute\":\"segmentation methods\",\"target\":\"table_2\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"reduce entries\",\"reason\":\"too many\"}"
          ],
          "letters": ["C", "A", "D", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 2"]
    }
  ],
  "MazxSMs6Hs": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 7,
          "image_id": "MazxSMs6Hs_7_0fd0114e",
          "bbox": {
            "x": 0.17296824101424468,
            "y": 0.43902554538080607,
            "width": 0.7226148409893992,
            "height": 0.28688524590163933
          }
        }
      ],
      "review_text": "Figures 2-4: The y-axis labels states U-WER, which is essentially a standard deviation metric measuring uncertainty. However, the captions states 'WER' which indicates model final performance. These two are different metrics, which is confusing.",
      "category": "figure-caption",
      "description": "The caption states WER as a performance measurement, but the y-axis shows U-WER, a different metric.",
      "mcq": {
        "binary_consistent": {
          "question": "Is the caption of the figure consistent with the content of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the caption of the figure inconsistent with the content of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"metric\",\"claim\":{\"source\":\"caption\",\"statement\":\"WER Performance\"},\"evidence\":{\"source\":\"plot\",\"statement\":\"Uncertainty WER\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"domain\",\"claim\":{\"source\":\"expectation\",\"statement\":\"accents are general\"},\"evidence\":{\"source\":\"plot\",\"statement\":\"accents not defined\"}}",
            "{\"letter\":\"D\",\"attribute\":\"settings\",\"claim\":{\"source\":\"expectation\",\"statement\":\"settings match caption\"},\"evidence\":{\"source\":\"plot\",\"statement\":\"settings not fully encompassed\"}}",
            "{\"letter\":\"C\",\"attribute\":\"legend\",\"claim\":{\"source\":\"expectation\",\"statement\":\"legend categories relate to y-axis\"},\"evidence\":{\"source\":\"plot\",\"statement\":\"y-axis not specified\"}}"
          ],
          "letters": ["A", "B", "D", "C"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"label\",\"target\":\"figure_2\",\"other_involved\":\"caption\",\"action\":\"modify\",\"edit_statement\":\"y-axis display WER\",\"reason\":\"inconsistent\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"term\",\"target\":\"caption\",\"other_involved\":\"x-axis\",\"action\":\"modify\",\"edit_statement\":\"define general domain\",\"reason\":\"unclear\"}",
            "{\"letter\":\"D\",\"attribute\":\"scope\",\"target\":\"caption\",\"other_involved\":\"figure_2\",\"action\":\"modify\",\"edit_statement\":\"reflect plot content\",\"reason\":\"incomplete\"}",
            "{\"letter\":\"C\",\"attribute\":\"metric\",\"target\":\"y-axis\",\"other_involved\":\"legend\",\"action\":\"add\",\"edit_statement\":\"detail legend categories\",\"reason\":\"missing\"}"
          ],
          "letters": ["A", "B", "D", "C"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 2"]
    }
  ],
  "MJWJoICJQh": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 8,
          "image_id": "MJWJoICJQh_8_0512052b",
          "bbox": {
            "x": 0.16766788765735421,
            "y": 0.11040526009648224,
            "width": 0.6678445229681979,
            "height": 0.5601092896174864
          }
        }
      ],
      "review_text": "Figure 4(d) BC and Figure 4(e) BC (SDDU) have reward curves that look identical.",
      "category": "figure-only",
      "description": "Figure 4(d) and Figure 4(e) show partially same lines, even though they represent different methods.",
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the figure that is consistent with a different part of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the figure that is inconsistent with a different part of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"curves\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should differ\"},\"evidence\":{\"source\":\"Figure 4\",\"statement\":\"are identical\"}}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"x-axis ranges\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"Figure 4\",\"statement\":\"are inconsistent\"}}",
            "{\"letter\":\"C\",\"attribute\":\"legend colors\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"Figure 4\",\"statement\":\"are swapped\"}}",
            "{\"letter\":\"A\",\"attribute\":\"shaded regions\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"Figure 4\",\"statement\":\"absent in Figure 4(e)\"}}"
          ],
          "letters": ["B", "D", "C", "A"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"curves\",\"target\":\"figure_4d\",\"other_involved\":\"figure_4e, legend\",\"action\":\"modify\",\"edit_statement\":\"method configuration\",\"reason\":\"identical\"}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"x-axis ranges\",\"target\":\"figure_4d\",\"other_involved\":\"figure_4e\",\"action\":\"modify\",\"edit_statement\":\"align training episodes\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"C\",\"attribute\":\"legend colors\",\"target\":\"figure_4d\",\"other_involved\":\"figure_4e\",\"action\":\"swap\",\"edit_statement\":\"BC method colors\",\"reason\":\"swapped\"}",
            "{\"letter\":\"A\",\"attribute\":\"shaded regions\",\"target\":\"figure_4e\",\"other_involved\":\"figure_4d\",\"action\":\"add\",\"edit_statement\":\"standard deviation\",\"reason\":\"absent\"}"
          ],
          "letters": ["B", "D", "C", "A"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 4"]
    }
  ],
  "M8xtZuxqC5": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 16,
          "image_id": "M8xtZuxqC5_16_3cd642ca",
          "bbox": {
            "x": 0.16943467210965105,
            "y": 0.314025545380806,
            "width": 0.666077738515901,
            "height": 0.2336065573770492
          }
        },
        {
          "type": "image",
          "page": 4,
          "image_id": "M8xtZuxqC5_4_16fbd6ca",
          "bbox": {
            "x": 0.4909894424276722,
            "y": 0.3844490259722934,
            "width": 0.34628975265017664,
            "height": 0.23224043715846995
          }
        }
      ],
      "review_text": "Figure 3: The explanation regarding the conversion of 1-10 scores to percentages is missing, leading to a discrepancy between the Appendix and the figure.",
      "category": "figure-figure",
      "description": "Figure 3 shows PM as a percentage, but Prompt 1 in the appendix shows it is a value from 1-10 assigned by an LLM-as-a-judge",
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the first figure consistent with the content of the second figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the first figure inconsistent with the content of the second figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "M8xtZuxqC5_16_3cd642ca",
          "correct": "M8xtZuxqC5_4_16fbd6ca",
          "incorrect": [
            "M8xtZuxqC5_4_image_figure4",
            "M8xtZuxqC5_5_image_figure5",
            "M8xtZuxqC5_2_image_figure2"
          ],
          "letters": ["D", "A", "B", "C"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"rating scale\",\"claim\":{\"source\":\"expectation\",\"statement\":\"consistent scale\"},\"evidence\":{\"source\":\"prompt_1_and_figure_3\",\"statement\":\"inconsistent scale\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"evaluation factors\",\"claim\":{\"source\":\"prompt_1\",\"statement\":\"multiple factors\"},\"evidence\":{\"source\":\"figure_3\",\"statement\":\"no factor details\"}}",
            "{\"letter\":\"A\",\"attribute\":\"variation\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should vary\"},\"evidence\":{\"source\":\"figure_3\",\"statement\":\"consistently high\"}}",
            "{\"letter\":\"C\",\"attribute\":\"scope\",\"claim\":{\"source\":\"prompt_1\",\"statement\":\"RM evaluation\"},\"evidence\":{\"source\":\"figure_3_title\",\"statement\":\"PM and RM quality\"}}"
          ],
          "letters": ["D", "B", "A", "C"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"Q_PM\",\"target\":\"figure_3\",\"other_involved\":\"Prompt 1\",\"action\":\"add\",\"edit_statement\":\"percentage derivation\",\"reason\":\"unexplained\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"evaluation factors\",\"target\":\"figure_3\",\"other_involved\":\"Prompt 1\",\"action\":\"add\",\"edit_statement\":\"details contribution\",\"reason\":\"missing\"}",
            "{\"letter\":\"A\",\"attribute\":\"Q_PM Q_RM\",\"target\":\"figure_3\",\"other_involved\":\"Prompt 1\",\"action\":\"modify\",\"edit_statement\":\"align consistency\",\"reason\":\"contradiction\"}",
            "{\"letter\":\"C\",\"attribute\":\"scope\",\"target\":\"figure_3 title\",\"other_involved\":\"Prompt 1\",\"action\":\"modify\",\"edit_statement\":\"align evaluation focus\",\"reason\":\"mismatch\"}"
          ],
          "letters": ["D", "B", "A", "C"]
        }
      },
      "severity": 0,
      "visual_elements": ["Prompt 1", "Figure 3"]
    }
  ],
  "M7CblLwJB8": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 7,
          "image_id": "M7CblLwJB8_7_8ef83bff",
          "bbox": {
            "x": 0.1659011032050574,
            "y": 0.09872494890390199,
            "width": 0.6696113074204946,
            "height": 0.2581967213114754
          }
        },
        {
          "type": "text",
          "page": 1,
          "content": "We verify AutoCustomization through human evaluation and show that it outperforms existing prompting techniques while being simpler",
          "line": 25
        }
      ],
      "review_text": "Figure 4(a): The figure shows that the prompting method outperforms the proposed method, contradicting the paper's claim of superior performance.",
      "category": "figure-text",
      "description": "Figure 4 (Left) shows prompting to have lower bias than the proposed method AutoCustomization, contradicting the claim in the abstract.",
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "We verify AutoCustomization through human evaluation and show that it outperforms existing prompting techniques while being simpler",
          "correct": "M7CblLwJB8_7_8ef83bff",
          "incorrect": [
            "M7CblLwJB8_7_image_figure5",
            "M7CblLwJB8_7_image_figure6",
            "M7CblLwJB8_4_image_figure3"
          ],
          "letters": ["C", "A", "B", "D"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"bias comparison\",\"claim\":{\"source\":\"text\",\"statement\":\"AutoCustomization outperforms\"},\"evidence\":{\"source\":\"figure_4_left\",\"statement\":\"Prompt Engineering lower bias\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"bias of Prompt Engineering\",\"claim\":{\"source\":\"expectation\",\"statement\":\"not negative\"},\"evidence\":{\"source\":\"figure_4_left\",\"statement\":\"negative\"}}",
            "{\"letter\":\"C\",\"attribute\":\"complexity\",\"claim\":{\"source\":\"expectation\",\"statement\":\"lower bias -> simpler\"},\"evidence\":{\"source\":\"figure_4_left\",\"statement\":\"AutoCustomization higher bias -> more complex\"}}",
            "{\"letter\":\"D\",\"attribute\":\"bias comparison\",\"claim\":{\"source\":\"expectation\",\"statement\":\"AutoCustomization superior\"},\"evidence\":{\"source\":\"figure_4_right\",\"statement\":\"AutoCustomization lower bias\"}}"
          ],
          "letters": ["B", "A", "C", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"bias\",\"target\":\"figure_4_(left)\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align Prompt Engineering bias\",\"reason\":\"contradicts claim\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"bias\",\"target\":\"figure_4_(left)\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"correct negative bias\",\"reason\":\"impossible\"}",
            "{\"letter\":\"C\",\"attribute\":\"bias\",\"target\":\"figure_4_(left)\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align AutoCustomization bias\",\"reason\":\"implies complexity\"}",
            "{\"letter\":\"D\",\"attribute\":\"bias\",\"target\":\"figure_4_(right)\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"align AutoCustomization bias\",\"reason\":\"contradicts claim\"}"
          ],
          "letters": ["B", "A", "C", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 4"]
    }
  ],
  "LieTse3fQB": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 10,
          "image_id": "LieTse3fQB_10_2ec45347",
          "bbox": {
            "x": 0.16766788765735421,
            "y": 0.09735882868532275,
            "width": 0.6696113074204946,
            "height": 0.22814207650273227
          }
        },
        {
          "type": "image",
          "page": 10,
          "image_id": "LieTse3fQB_10_e107af0b",
          "bbox": {
            "x": 0.16943467210965105,
            "y": 0.32809656695589995,
            "width": 0.6607773851590105,
            "height": 0.13797814207650275
          }
        }
      ],
      "review_text": "Figure 8: The qualitative results are shown at 5K iterations instead of 30K iterations, which contradicts the ablation study in Table 4.",
      "category": "figure-table",
      "description": "The ablation study quantitative results in Figure 8 are presented after 5000 iterations, but the qualitative results are presented after 30000 iterations.",
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the content of the table?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the content of the table?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {
          "question": "LieTse3fQB_10_2ec45347",
          "correct": "LieTse3fQB_10_e107af0b",
          "incorrect": [
            "LieTse3fQB_5_table_table1",
            "LieTse3fQB_9_image_figure8",
            "LieTse3fQB_8_image_figure6"
          ],
          "letters": ["A", "B", "C", "D"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"training iterations\",\"claim\":{\"source\":\"Figure 8\",\"statement\":\"5,000 iterations\"},\"evidence\":{\"source\":\"Table 4\",\"statement\":\"30,000 iterations\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"content\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should match\"},\"evidence\":{\"source\":\"Figure 8 and Table 4\",\"statement\":\"does not match\"}}",
            "{\"letter\":\"B\",\"attribute\":\"type of results\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"Figure 8 and Table 4\",\"statement\":\"not consistent\"}}",
            "{\"letter\":\"D\",\"attribute\":\"model comparison\",\"claim\":{\"source\":\"Figure 8\",\"statement\":\"significant improvement\"},\"evidence\":{\"source\":\"Table 4\",\"statement\":\"marginal improvement\"}}"
          ],
          "letters": ["C", "A", "B", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"iterations\",\"target\":\"figure_8\",\"other_involved\":\"table_4\",\"action\":\"modify\",\"edit_statement\":\"align iteration count\",\"reason\":\"mismatch\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"scene\",\"target\":\"figure_8\",\"other_involved\":\"table_4\",\"action\":\"add\",\"edit_statement\":\"add garden scene\",\"reason\":\"omitted\"}",
            "{\"letter\":\"B\",\"attribute\":\"metrics\",\"target\":\"figure_8\",\"other_involved\":\"table_4\",\"action\":\"add\",\"edit_statement\":\"add numerical metrics\",\"reason\":\"missing\"}",
            "{\"letter\":\"D\",\"attribute\":\"performance\",\"target\":\"figure_8\",\"other_involved\":\"table_4\",\"action\":\"modify\",\"edit_statement\":\"align performance inference\",\"reason\":\"mismatch\"}"
          ],
          "letters": ["C", "A", "B", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 8", "Table 4"]
    },
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 7,
          "image_id": "LieTse3fQB_7_8fbc2e19",
          "bbox": {
            "x": 0.17120145656194785,
            "y": 0.40787793769211067,
            "width": 0.6625441696113074,
            "height": 0.17759562841530055
          }
        },
        {
          "type": "image",
          "page": 8,
          "image_id": "LieTse3fQB_8_e8c5c9a6",
          "bbox": {
            "x": 0.17120145656194785,
            "y": 0.3302823259530824,
            "width": 0.6607773851590105,
            "height": 0.20491803278688525
          }
        }
      ],
      "review_text": "Table 2 and Table 3: why there are two different quantitative evaluations on Mip-NeRF 360 datasets?",
      "category": "table-table",
      "description": "There are two tables that show quantitative comparison with baselines on the Mip-NeRF 360 Dataset, but the datapoints that appear in both tables are not consistent",
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the first table consistent with the content of the second table?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the first table inconsistent with the content of the second table?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "LieTse3fQB_7_8fbc2e19",
          "correct": "LieTse3fQB_8_e8c5c9a6",
          "incorrect": [
            "LieTse3fQB_9_table_table4",
            "LieTse3fQB_5_table_table1",
            "LieTse3fQB_8_image_figure6"
          ],
          "letters": ["C", "A", "B", "D"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"metric values\",\"claim\":{\"source\":\"expectation\",\"statement\":\"consistent metric values\"},\"evidence\":{\"source\":\"Table 2 and Table 3\",\"statement\":\"different metric values\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"resolution\",\"claim\":{\"source\":\"expectation\",\"statement\":\"consistent resolution\"},\"evidence\":{\"source\":\"Table 2 and Table 3\",\"statement\":\"different resolution\"}}",
            "{\"letter\":\"A\",\"attribute\":\"highlighting\",\"claim\":{\"source\":\"expectation\",\"statement\":\"consistent highlighting\"},\"evidence\":{\"source\":\"Table 2 and Table 3\",\"statement\":\"inconsistent highlighting\"}}",
            "{\"letter\":\"C\",\"attribute\":\"baseline models\",\"claim\":{\"source\":\"expectation\",\"statement\":\"consistent baseline models\"},\"evidence\":{\"source\":\"Table 2 and Table 3\",\"statement\":\"different baseline models\"}}"
          ],
          "letters": ["D", "B", "A", "C"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"metric values\",\"target\":\"table_2\",\"other_involved\":\"table_3\",\"action\":\"modify\",\"edit_statement\":\"align values for common models\",\"reason\":\"different\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"breakdown\",\"target\":\"table_2\",\"other_involved\":\"table_3\",\"action\":\"modify\",\"edit_statement\":\"align result display\",\"reason\":\"different\"}",
            "{\"letter\":\"A\",\"attribute\":\"highlighting\",\"target\":\"table_2\",\"other_involved\":\"table_3\",\"action\":\"modify\",\"edit_statement\":\"align best performance\",\"reason\":\"unclear\"}",
            "{\"letter\":\"C\",\"attribute\":\"models\",\"target\":\"table_3\",\"other_involved\":\"table_2\",\"action\":\"add\",\"edit_statement\":\"include baseline models\",\"reason\":\"absent\"}"
          ],
          "letters": ["D", "B", "A", "C"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 2", "Table 3"]
    }
  ],
  "LSB2mRJdgZ": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 2,
          "image_id": "LSB2mRJdgZ_2_2edd0fd4",
          "bbox": {
            "x": 0.17120145656194785,
            "y": 0.09599270846674353,
            "width": 0.6607773851590105,
            "height": 0.30464480874316946
          }
        }
      ],
      "review_text": "Figure 1: The reviewer points out an inconsistency in the representation of physical concepts in the abstract grid. The light red bar in the middle column should also be falling, but it is not.",
      "category": "figure-only",
      "description": "The grid world should show the concept of gravity, but the red bar in the middle of the three images is not falling down.",
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the figure that is consistent with a different part of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the figure that is inconsistent with a different part of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"red bar\",\"claim\":{\"source\":\"expectation\",\"statement\":\"falls\"},\"evidence\":{\"source\":\"Figure 1\",\"statement\":\"does not fall\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"green blocks\",\"claim\":{\"source\":\"expectation\",\"statement\":\"settle at bottom\"},\"evidence\":{\"source\":\"Figure 1\",\"statement\":\"above black grid\"}}",
            "{\"letter\":\"D\",\"attribute\":\"orange blocks\",\"claim\":{\"source\":\"expectation\",\"statement\":\"static base\"},\"evidence\":{\"source\":\"Figure 1\",\"statement\":\"change shape\"}}",
            "{\"letter\":\"C\",\"attribute\":\"grid examples\",\"claim\":{\"source\":\"expectation\",\"statement\":\"show orbital motion\"},\"evidence\":{\"source\":\"Figure 1\",\"statement\":\"only fall downwards\"}}"
          ],
          "letters": ["A", "B", "D", "C"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"red bar\",\"target\":\"figure_1\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"show fall\",\"reason\":\"does not align with laws of gravity\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"green blocks\",\"target\":\"figure_1\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"show complete fall\",\"reason\":\"incomplete\"}",
            "{\"letter\":\"D\",\"attribute\":\"orange blocks\",\"target\":\"figure_1\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"maintain shape\",\"reason\":\"deforms\"}",
            "{\"letter\":\"C\",\"attribute\":\"movement\",\"target\":\"figure_1\",\"other_involved\":null,\"action\":\"add\",\"edit_statement\":\"show orbital motion\",\"reason\":\"not shown\"}"
          ],
          "letters": ["A", "B", "D", "C"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 1"]
    }
  ],
  "Kz10l3roV0": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 7,
          "image_id": "Kz10l3roV0_7_ce08980b",
          "bbox": {
            "x": 0.17120145656194785,
            "y": 0.13759108579875343,
            "width": 0.666077738515901,
            "height": 0.2363387978142077
          }
        },
        {
          "type": "image",
          "page": 7,
          "image_id": "Kz10l3roV0_7_f049f376",
          "bbox": {
            "x": 0.16943467210965105,
            "y": 0.6731102114818135,
            "width": 0.6643109540636042,
            "height": 0.21584699453551914
          }
        }
      ],
      "review_text": "Table 2: The results of removing the channel module for traffic and electricity datasets differ from those in Table 4.",
      "category": "table-table",
      "description": "The MSE values for train and electricity for the values without the channel module are different between the tables",
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the first table consistent with the content of the second table?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the first table inconsistent with the content of the second table?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {
          "question": "Kz10l3roV0_7_ce08980b",
          "correct": "Kz10l3roV0_7_f049f376",
          "incorrect": [
            "Kz10l3roV0_6_table_table2",
            "Kz10l3roV0_5_table_table1",
            "Kz10l3roV0_4_interline-equation_equation13.5"
          ],
          "letters": ["C", "A", "D", "B"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"MSE values\",\"claim\":{\"source\":\"expectation\",\"statement\":\"consistent\"},\"evidence\":{\"source\":\"Table 2 and Table 4\",\"statement\":\"inconsistent for Traffic and Electricity\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"MSE values\",\"claim\":{\"source\":\"expectation\",\"statement\":\"consistent\"},\"evidence\":{\"source\":\"Table 2 and Table 4\",\"statement\":\"inconsistent for Traffic\"}}",
            "{\"letter\":\"D\",\"attribute\":\"MAE values\",\"claim\":{\"source\":\"expectation\",\"statement\":\"present for all variants\"},\"evidence\":{\"source\":\"Table 2 and Table 4\",\"statement\":\"absent for w/o Cross-Stage\"}}",
            "{\"letter\":\"C\",\"attribute\":\"bolding\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should highlight best result\"},\"evidence\":{\"source\":\"Table 2 and Table 4\",\"statement\":\"not always highlighting best\"}}"
          ],
          "letters": ["A", "B", "D", "C"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"MSE values\",\"target\":\"Table 2\",\"other_involved\":\"Table 4\",\"action\":\"modify\",\"edit_statement\":\"align values\",\"reason\":\"different\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"MSE values\",\"target\":\"Table 2\",\"other_involved\":\"Table 4\",\"action\":\"modify\",\"edit_statement\":\"align values\",\"reason\":\"discrepancy\"}",
            "{\"letter\":\"D\",\"attribute\":\"MAE values\",\"target\":\"Table 4\",\"other_involved\":\"Table 2\",\"action\":\"add\",\"edit_statement\":\"missing variant\",\"reason\":\"absent\"}",
            "{\"letter\":\"C\",\"attribute\":\"bolding\",\"target\":\"Table 2\",\"other_involved\":\"Table 4\",\"action\":\"modify\",\"edit_statement\":\"consistent bolding\",\"reason\":\"inconsistent\"}"
          ],
          "letters": ["A", "B", "D", "C"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 2", "Table 4"]
    }
  ],
  "KJkbmBcZRx": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 15,
          "image_id": "KJkbmBcZRx_15_75e3df3c",
          "bbox": {
            "x": 0.17120145656194785,
            "y": 0.6517303967085042,
            "width": 0.6607773851590105,
            "height": 0.18306010928961752
          }
        }
      ],
      "review_text": "Table 9: The '#Params (M)' values are the same with and without PointHDMAE, which is unexpected as adding a component usually increases the number of parameters.",
      "category": "figure-only",
      "description": "Despite adding a component to a method in the second group of the table, the parameters do not increase.",
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the figure that is consistent with a different part of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the figure that is inconsistent with a different part of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"parameter count\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be different\"},\"evidence\":{\"source\":\"table_9\",\"statement\":\"same as base method\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"accuracy\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should improve\"},\"evidence\":{\"source\":\"table_9\",\"statement\":\"lower for PointHDMAE\"}}",
            "{\"letter\":\"B\",\"attribute\":\"reference\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be different\"},\"evidence\":{\"source\":\"table_9\",\"statement\":\"same as base method\"}}",
            "{\"letter\":\"D\",\"attribute\":\"parameter count\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be same\"},\"evidence\":{\"source\":\"table_9\",\"statement\":\"varies\"}}"
          ],
          "letters": ["C", "A", "B", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"Parameter counts\",\"target\":\"table_9\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"update #Params (M) values\",\"reason\":\"identical values for different component counts\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"Accuracy\",\"target\":\"table_9\",\"other_involved\":\"PointHDMAE methods\",\"action\":\"modify\",\"edit_statement\":\"update classification metrics\",\"reason\":\"lower values\"}",
            "{\"letter\":\"B\",\"attribute\":\"Reference\",\"target\":\"table_9\",\"other_involved\":\"PointHDMAE-integrated methods\",\"action\":\"add\",\"edit_statement\":\"missing citation\",\"reason\":\"missing citation\"}",
            "{\"letter\":\"D\",\"attribute\":\"Experiment setup\",\"target\":\"table_9\",\"other_involved\":\"base methods\",\"action\":\"modify\",\"edit_statement\":\"ensure controlled setup\",\"reason\":\"uncontrolled setup\"}"
          ],
          "letters": ["C", "A", "B", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 9"]
    }
  ],
  "JwNQP2dNhD": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 2,
          "image_id": "JwNQP2dNhD_2_95674c30",
          "bbox": {
            "x": 0.17120145656194785,
            "y": 0.3110200642236595,
            "width": 0.6607773851590105,
            "height": 0.30464480874316946
          }
        },
        {
          "type": "text",
          "page": 7,
          "content": "After generating the program, we need oracles to execute the generated programs and\ndetermine whether they trigger bugs in the libraries based on the execution results. Similar to (Deng\net al., 2023), we employ two types of oracles: the crash oracle and the consistency oracle. The crash\noracle detects whether a crash is triggered during program execution, which is the most severe type\nof bug. The consistency oracle checks whether the program produces inconsistent results across\ndifferent backends, such as CPU and GPU",
          "line": 343
        }
      ],
      "review_text": "Figure 1: The figure shows only one oracle, but the text states that two types of oracles are employed.",
      "category": "figure-text",
      "description": "The text outlines the use of 2 oracles, but the figure depicting the architecture only shows 1 oracle.",
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "After generating the program, we need oracles to execute the generated programs and\ndetermine whether they trigger bugs in the libraries based on the execution results. Similar to (Deng\net al., 2023), we employ two types of oracles: the crash oracle and the consistency oracle. The crash\noracle detects whether a crash is triggered during program execution, which is the most severe type\nof bug. The consistency oracle checks whether the program produces inconsistent results across\ndifferent backends, such as CPU and GPU",
          "correct": "JwNQP2dNhD_2_95674c30",
          "incorrect": [
            "JwNQP2dNhD_8_image_figure3",
            "JwNQP2dNhD_8_image_figure2",
            "JwNQP2dNhD_9_image_figure4"
          ],
          "letters": ["D", "A", "B", "C"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"oracle types\",\"claim\":{\"source\":\"text\",\"statement\":\"employs two oracles\"},\"evidence\":{\"source\":\"Figure 1\",\"statement\":\"only depicts one Oracle\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"figure content\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should show both oracles\"},\"evidence\":{\"source\":\"Figure 1\",\"statement\":\"only shows crash oracle\"}}",
            "{\"letter\":\"D\",\"attribute\":\"bug detection\",\"claim\":{\"source\":\"text\",\"statement\":\"oracles detect bugs from executed programs\"},\"evidence\":{\"source\":\"Figure 1\",\"statement\":\"Oracle generates Potential Bugs\"}}",
            "{\"letter\":\"B\",\"attribute\":\"backend comparison\",\"claim\":{\"source\":\"text\",\"statement\":\"compares different backends\"},\"evidence\":{\"source\":\"Figure 1\",\"statement\":\"compares CPU and GPU\"}}"
          ],
          "letters": ["C", "A", "D", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"oracles count\",\"target\":\"figure_1\",\"other_involved\":\"text\",\"action\":\"add\",\"edit_statement\":\"second oracle\",\"reason\":\"mismatch\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"detection process\",\"target\":\"figure_1\",\"other_involved\":\"text\",\"action\":\"add\",\"edit_statement\":\"consistency oracle details\",\"reason\":\"missing\"}",
            "{\"letter\":\"D\",\"attribute\":\"bug generation\",\"target\":\"figure_1\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"update bug source\",\"reason\":\"contradicts\"}",
            "{\"letter\":\"B\",\"attribute\":\"oracle comparison\",\"target\":\"figure_1\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"update backend types\",\"reason\":\"limited\"}"
          ],
          "letters": ["C", "A", "D", "B"]
        }
      },
      "severity": 1,
      "visual_elements": ["Figure 1"]
    }
  ],
  "JnWJbrnaUE": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 7,
          "image_id": "JnWJbrnaUE_7_d538d2a2",
          "bbox": {
            "x": 0.17120145656194785,
            "y": 0.11142985026041669,
            "width": 0.6643109540636042,
            "height": 0.4767759562841531
          }
        },
        {
          "type": "text",
          "page": 8,
          "content": "First, the proposed method can significantly improve the performance of RAG and Self-RAG.\nSpecifically, as shown in table 1, CRAG outperformed RAG by margins of 19.0% accuracy on\nPopQA, 14.9% FactScore on Biography, 36.6% accuracy on PubHealth, and 8.1% accuracy on\nArc-Challenge when based on SelfRAG-LLaMA2-7b, as well as by margins of 9.6% accuracy on\nPopQA, 2.8% FactScore on Biography, and 2.0% on Arc-Challenge when based on LLaMA2-hf-7b",
          "line": 384
        }
      ],
      "review_text": "Section 5.3: The accuracy improvements of CRAG over RAG do not match the data in Table 1. Are these typos or wrong results?",
      "category": "table-text",
      "description": "The performance improvements claimed in the text are not the same as shown in the Table.",
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the table consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the table inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {
          "question": "First, the proposed method can significantly improve the performance of RAG and Self-RAG.\nSpecifically, as shown in table 1, CRAG outperformed RAG by margins of 19.0% accuracy on\nPopQA, 14.9% FactScore on Biography, 36.6% accuracy on PubHealth, and 8.1% accuracy on\nArc-Challenge when based on SelfRAG-LLaMA2-7b, as well as by margins of 9.6% accuracy on\nPopQA, 2.8% FactScore on Biography, and 2.0% on Arc-Challenge when based on LLaMA2-hf-7b",
          "correct": "JnWJbrnaUE_7_d538d2a2",
          "incorrect": [
            "JnWJbrnaUE_7_table_table2",
            "JnWJbrnaUE_8_table_table4",
            "JnWJbrnaUE_8_table_table3"
          ],
          "letters": ["D", "C", "A", "B"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"improvement\",\"claim\":{\"source\":\"text\",\"statement\":\"19.0% improvement\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"lower margin\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"improvement\",\"claim\":{\"source\":\"text\",\"statement\":\"19.0% accuracy improvement\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"higher improvement\"}}",
            "{\"letter\":\"D\",\"attribute\":\"improvement\",\"claim\":{\"source\":\"text\",\"statement\":\"14.9% improvement\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"lower margin\"}}",
            "{\"letter\":\"B\",\"attribute\":\"improvement\",\"claim\":{\"source\":\"text\",\"statement\":\"36.6% improvement\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"lower margin\"}}"
          ],
          "letters": ["C", "A", "D", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"improvement percentage\",\"target\":\"text\",\"other_involved\":\"Table 1\",\"action\":\"modify\",\"edit_statement\":\"align value\",\"reason\":\"inconsistent\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"accuracy improvement\",\"target\":\"text\",\"other_involved\":\"Table 1\",\"action\":\"modify\",\"edit_statement\":\"align value\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"D\",\"attribute\":\"improvement percentage\",\"target\":\"text\",\"other_involved\":\"Table 1\",\"action\":\"modify\",\"edit_statement\":\"align value\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"B\",\"attribute\":\"improvement percentage\",\"target\":\"text\",\"other_involved\":\"Table 1\",\"action\":\"modify\",\"edit_statement\":\"align value\",\"reason\":\"inconsistent\"}"
          ],
          "letters": ["C", "A", "D", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 1"]
    }
  ],
  "Jl0aEFrp11": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 10,
          "image_id": "Jl0aEFrp11_10_f7feebaf",
          "bbox": {
            "x": 0.17650180991883832,
            "y": 0.09845170818391395,
            "width": 0.32862190812720843,
            "height": 0.2144808743169399
          }
        }
      ],
      "review_text": "Figure 1,2,3 and 4: The figures in the experimental section are incomplete. For example, Figure 1,2,3 and 4 contain only three and six lines, whereas the compared algorithms are total four and six, respectively.",
      "category": "figure-only",
      "description": "The legend in the left part of the figure shows 4 methods, but only 3 lines are discernible. In the right part of the figure, the legend shows 6 methods, but only 5 lines are  discernible.",
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the figure that is consistent with a different part of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the figure that is inconsistent with a different part of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"number of methods and lines\",\"claim\":{\"source\":\"legend\",\"statement\":\"4 methods, 3 lines and 6 methods, 5 lines\"},\"evidence\":{\"source\":\"Figure 1\",\"statement\":\"left 4 methods, 3 lines; right 6 methods, 5 lines\"}}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"number of methods and lines\",\"claim\":{\"source\":\"legend\",\"statement\":\"4 methods, 4 lines and 6 methods, 5 lines\"},\"evidence\":{\"source\":\"Figure 1\",\"statement\":\"left 4 methods, 4 lines; right 6 methods, 5 lines\"}}",
            "{\"letter\":\"B\",\"attribute\":\"number of lines\",\"claim\":{\"source\":\"expectation\",\"statement\":\"lines match legend\"},\"evidence\":{\"source\":\"all plots\",\"statement\":\"more lines than methods\"}}",
            "{\"letter\":\"A\",\"attribute\":\"number of lines\",\"claim\":{\"source\":\"expectation\",\"statement\":\"all plots match legend\"},\"evidence\":{\"source\":\"top-left plot\",\"statement\":\"one line missing\"}}"
          ],
          "letters": ["C", "D", "B", "A"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"lines\",\"target\":\"figure_1\",\"other_involved\":\"legend\",\"action\":\"modify\",\"edit_statement\":\"number of lines\",\"reason\":\"mismatch\"}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"lines\",\"target\":\"figure_1\",\"other_involved\":\"legend\",\"action\":\"add\",\"edit_statement\":\"line\",\"reason\":\"missing\"}",
            "{\"letter\":\"B\",\"attribute\":\"lines\",\"target\":\"figure_1\",\"other_involved\":\"legend\",\"action\":\"remove\",\"edit_statement\":\"extra lines\",\"reason\":\"exceeds\"}",
            "{\"letter\":\"A\",\"attribute\":\"lines\",\"target\":\"figure_1\",\"other_involved\":\"legend\",\"action\":\"add\",\"edit_statement\":\"line\",\"reason\":\"missing\"}"
          ],
          "letters": ["C", "D", "B", "A"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 1"]
    }
  ],
  "JfKF7Pdigi": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 10,
          "image_id": "JfKF7Pdigi_10_a767d405",
          "bbox": {
            "x": 0.16766788765735421,
            "y": 0.19722226669228146,
            "width": 0.6713780918727914,
            "height": 0.20081967213114757
          }
        },
        {
          "type": "text",
          "page": 10,
          "content": "To further elucidate the effects of dynamic information integration, we analyze activation values\nwithin different brain networks before and after the dynamics injection process (Fig. 6b). After\nthe injection, we observe increased activation in higher cognitive networks and higher activity in the visual cortex.",
          "line": 520
        }
      ],
      "review_text": "Line 523: The authors state 'After the injection, we observe increased activation in higher cognitive networks, while activity in the visual cortex decreases.' However, Figure 6(b) shows a decrease in activation according to the legend, contradicting the authors' report.",
      "category": "figure-text",
      "description": "The text states higher activity, but Figure 5(b) shows an overall decrease in activity.",
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "To further elucidate the effects of dynamic information integration, we analyze activation values\nwithin different brain networks before and after the dynamics injection process (Fig. 6b). After\nthe injection, we observe increased activation in higher cognitive networks and higher activity in the visual cortex.",
          "correct": "JfKF7Pdigi_10_a767d405",
          "incorrect": [
            "JfKF7Pdigi_8_image_figure4",
            "JfKF7Pdigi_7_image_figure3",
            "JfKF7Pdigi_15_image_figure7"
          ],
          "letters": ["A", "C", "B", "D"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"brain activity\",\"claim\":{\"source\":\"text\",\"statement\":\"higher activity and increased activation\"},\"evidence\":{\"source\":\"Figure 6(b)\",\"statement\":\"reduction in activated regions\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"verification\",\"claim\":{\"source\":\"text\",\"statement\":\"activity increased\"},\"evidence\":{\"source\":\"Figure 6(b)\",\"statement\":\"can't verify\"}}",
            "{\"letter\":\"B\",\"attribute\":\"brain activity\",\"claim\":{\"source\":\"text\",\"statement\":\"overall activity increased\"},\"evidence\":{\"source\":\"Figure\",\"statement\":\"solely activated regions change\"}}",
            "{\"letter\":\"D\",\"attribute\":\"color scale\",\"claim\":{\"source\":\"expectation\",\"statement\":\"red indicates higher activation\"},\"evidence\":{\"source\":\"Figure 6(b)\",\"statement\":\"red indicates lower activation\"}}"
          ],
          "letters": ["C", "A", "B", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"activity depiction\",\"target\":\"figure_6b\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"match activity increase\",\"reason\":\"inconsistent\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"brains\",\"target\":\"figure_6b\",\"other_involved\":\"text\",\"action\":\"replace\",\"edit_statement\":\"verify claim\",\"reason\":\"different\"}",
            "{\"letter\":\"B\",\"attribute\":\"brain activity\",\"target\":\"text\",\"other_involved\":\"figure_6b\",\"action\":\"modify\",\"edit_statement\":\"reflect regions change\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"D\",\"attribute\":\"color scale\",\"target\":\"figure_6b\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"match activity information\",\"reason\":\"misleading\"}"
          ],
          "letters": ["C", "A", "B", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 5"]
    }
  ],
  "JQbqaQjV7D": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 8,
          "image_id": "JQbqaQjV7D_8_80d621d9",
          "bbox": {
            "x": 0.1659011032050574,
            "y": 0.10862932048860145,
            "width": 0.6678445229681979,
            "height": 0.23497267759562843
          }
        },
        {
          "type": "text",
          "page": 7,
          "content": "Due to token insertion limitations in LLMs, we conducted a total of 165 samples per model (11 temperature settings from 0.0 to 1.0 * 10 temporal categories + 11 temperature settings * 5 spatial scenarios), comparing results across 9 different LLM models (as shown in Table 3).",
          "line": 338
        }
      ],
      "review_text": "Table 3: The text (line 339) suggests 15 (10 + 5) questions were experimented on, but the table shows results for only 14 questions.",
      "category": "figure-text",
      "description": "The text claims Figure 3 compares 9 different models, but the Figure shows 10 (9 models + 1 RAG variant)",
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {
          "question": "Due to token insertion limitations in LLMs, we conducted a total of 165 samples per model (11 temperature settings from 0.0 to 1.0 * 10 temporal categories + 11 temperature settings * 5 spatial scenarios), comparing results across 9 different LLM models (as shown in Table 3).",
          "correct": "JQbqaQjV7D_8_80d621d9",
          "incorrect": [
            "JQbqaQjV7D_8_table_table5",
            "JQbqaQjV7D_7_table_table4",
            "JQbqaQjV7D_9_table_table6"
          ],
          "letters": ["B", "C", "D", "A"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"model count\",\"claim\":{\"source\":\"text\",\"statement\":\"9 models\"},\"evidence\":{\"source\":\"Table 3\",\"statement\":\"10 models\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"model type\",\"claim\":{\"source\":\"text\",\"statement\":\"LLM models\"},\"evidence\":{\"source\":\"Table 3\",\"statement\":\"non-LLM models listed\"}}",
            "{\"letter\":\"B\",\"attribute\":\"parameters\",\"claim\":{\"source\":\"text\",\"statement\":\"temporal settings\"},\"evidence\":{\"source\":\"Table 3\",\"statement\":\"no temporal settings\"}}",
            "{\"letter\":\"D\",\"attribute\":\"symbols\",\"claim\":{\"source\":\"caption\",\"statement\":\"~, x, checkmark\"},\"evidence\":{\"source\":\"Table 3\",\"statement\":\"other symbols\"}}"
          ],
          "letters": ["C", "A", "B", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"number of models\",\"target\":\"table_3\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align model count\",\"reason\":\"mismatch\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"models\",\"target\":\"table_3\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align model types\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"B\",\"attribute\":\"temporal settings\",\"target\":\"table_3\",\"other_involved\":\"text\",\"action\":\"add\",\"edit_statement\":\"add experimental parameters\",\"reason\":\"missing\"}",
            "{\"letter\":\"D\",\"attribute\":\"symbols\",\"target\":\"table_3\",\"other_involved\":\"caption_table_3\",\"action\":\"add\",\"edit_statement\":\"add symbol descriptions\",\"reason\":\"incomplete\"}"
          ],
          "letters": ["C", "A", "B", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 3"]
    }
  ],
  "JIlIYIHMuv": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 6,
          "image_id": "JIlIYIHMuv_6_a90db3d3",
          "bbox": {
            "x": 0.4097173576220185,
            "y": 0.6576730696881403,
            "width": 0.4187279151943462,
            "height": 0.03278688524590164
          }
        },
        {
          "type": "image",
          "page": 5,
          "image_id": "JIlIYIHMuv_5_a092bf86",
          "bbox": {
            "x": 0.16943467210965105,
            "y": 0.10521401994215335,
            "width": 0.6625441696113074,
            "height": 0.4234972677595629
          }
        }
      ],
      "review_text": "Fig. 2: To my understanding, E^q and E^v are two different encoders. However, Eq. (1) shows them both sharing the same encoding function *Enc()*. I would suggest modifying this equation and/or the definition of *Enc()* accordingly.",
      "category": "figure-equation",
      "description": "The equation (1) shows the same encoding function for text and vision encoder, but the Figure 2 shows both encoders to be separate",
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the content of the equation?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the content of the equation?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "JIlIYIHMuv_6_a90db3d3",
          "correct": "JIlIYIHMuv_5_a092bf86",
          "incorrect": [
            "JIlIYIHMuv_1_image_figure1",
            "JIlIYIHMuv_13_image_figure3",
            "JIlIYIHMuv_5_interline-equation_equation47.5"
          ],
          "letters": ["A", "B", "D", "C"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"encoder\",\"claim\":{\"source\":\"equation_1\",\"statement\":\"single encoding function\"},\"evidence\":{\"source\":\"figure_2\",\"statement\":\"separate encoder modules\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"inputs\",\"claim\":{\"source\":\"figure_2\",\"statement\":\"inputs to LLM\"},\"evidence\":{\"source\":\"equation_1\",\"statement\":\"outputs of Enc\"}}",
            "{\"letter\":\"C\",\"attribute\":\"projection\",\"claim\":{\"source\":\"equation_1\",\"statement\":\"no visual projection\"},\"evidence\":{\"source\":\"figure_2\",\"statement\":\"projection module present\"}}",
            "{\"letter\":\"D\",\"attribute\":\"encoder output\",\"claim\":{\"source\":\"equation_1\",\"statement\":\"results shown\"},\"evidence\":{\"source\":\"figure_2\",\"statement\":\"results not shown\"}}"
          ],
          "letters": ["A", "B", "C", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"encoding function\",\"target\":\"equation_1\",\"other_involved\":\"figure_2\",\"action\":\"modify\",\"edit_statement\":\"represent multiple functions\",\"reason\":\"different\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"Q^TS and Z_V\",\"target\":\"equation_1\",\"other_involved\":\"figure_2\",\"action\":\"modify\",\"edit_statement\":\"indicate as inputs\",\"reason\":\"different\"}",
            "{\"letter\":\"C\",\"attribute\":\"Projection w module\",\"target\":\"equation_1\",\"other_involved\":\"figure_2\",\"action\":\"add\",\"edit_statement\":\"add projection module\",\"reason\":\"missing\"}",
            "{\"letter\":\"D\",\"attribute\":\"encoder results\",\"target\":\"figure_2\",\"other_involved\":\"equation_1\",\"action\":\"add\",\"edit_statement\":\"add encoder results\",\"reason\":\"missing\"}"
          ],
          "letters": ["A", "B", "C", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["(1)", "Figure 2"]
    }
  ],
  "JDa5RiTIC7": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 4,
          "image_id": "JDa5RiTIC7_4_16bb29e3",
          "bbox": {
            "x": 0.17120145656194785,
            "y": 0.091552817756361,
            "width": 0.6678445229681979,
            "height": 0.45218579234972683
          }
        }
      ],
      "review_text": "Figure 2: The Office products and Electronics categories in the search tree do not match with the arrows. Please update and change the score accordingly. The score does not match the example narrative.",
      "category": "figure-only",
      "description": "In the second column of the flow chart shown in the figure, the blue text boxes do not in category to the screenshots shown below",
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the figure that is consistent with a different part of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the figure that is inconsistent with a different part of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"state changes\",\"claim\":{\"source\":\"expectation\",\"statement\":\"match content\"},\"evidence\":{\"source\":\"figure_2\",\"statement\":\"do not match\"}}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"path\",\"claim\":{\"source\":\"expectation\",\"statement\":\"leads to correct category\"},\"evidence\":{\"source\":\"path\",\"statement\":\"leads to incorrect category\"}}",
            "{\"letter\":\"A\",\"attribute\":\"trajectories\",\"claim\":{\"source\":\"expectation\",\"statement\":\"satisfy instruction\"},\"evidence\":{\"source\":\"trajectories\",\"statement\":\"not satisfied\"}}",
            "{\"letter\":\"C\",\"attribute\":\"probabilities\",\"claim\":{\"source\":\"expectation\",\"statement\":\"correctly calculated\"},\"evidence\":{\"source\":\"trajectories\",\"statement\":\"incorrectly calculated\"}}"
          ],
          "letters": ["B", "D", "A", "C"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"state change description\",\"target\":\"blue text boxes\",\"other_involved\":\"screenshots\",\"action\":\"modify\",\"edit_statement\":\"match content\",\"reason\":\"does not match\"}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"'Click Office Products' path\",\"target\":\"Office Electronics sub-category\",\"other_involved\":\"Office Products view\",\"action\":\"modify\",\"edit_statement\":\"correct path\",\"reason\":\"bypasses view\"}",
            "{\"letter\":\"A\",\"attribute\":\"disk memory\",\"target\":\"simulated trajectories\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"update trajectories\",\"reason\":\"not satisfied\"}",
            "{\"letter\":\"C\",\"attribute\":\"probabilities\",\"target\":\"simulated trajectories\",\"other_involved\":\"displayed states\",\"action\":\"modify\",\"edit_statement\":\"correct calculation\",\"reason\":\"incorrectly calculated\"}"
          ],
          "letters": ["B", "D", "A", "C"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 2"]
    }
  ],
  "J8LYjgi7nH": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 6,
          "image_id": "J8LYjgi7nH_6_7c9b943b",
          "bbox": {
            "x": 0.16766788765735421,
            "y": 0.33595175821273054,
            "width": 0.6713780918727914,
            "height": 0.36065573770491804
          }
        }
      ],
      "review_text": "Algorithm 1: The notation $k$ appears in `kmeans_clustering` without prior definition.",
      "category": "algorithm-only",
      "description": "The variable 'k' first appears in the function kmeans_clustering() but was not defined before",
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the algorithm that is consistent with a different part of the algorithm?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the algorithm that is inconsistent with a different part of the algorithm?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"k\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be defined\"},\"evidence\":{\"source\":\"Algorithm 1\",\"statement\":\"not defined\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"Perf\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be parameter\"},\"evidence\":{\"source\":\"Algorithm 1\",\"statement\":\"not parameter\"}}",
            "{\"letter\":\"D\",\"attribute\":\"F_k\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be initialized\"},\"evidence\":{\"source\":\"Algorithm 1\",\"statement\":\"used before initialization\"}}",
            "{\"letter\":\"A\",\"attribute\":\"theta\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be updated\"},\"evidence\":{\"source\":\"Algorithm 1\",\"statement\":\"not updated\"}}"
          ],
          "letters": ["C", "B", "D", "A"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"k variable\",\"target\":\"algorithm_1\",\"other_involved\":\"kmeans_clustering function\",\"action\":\"define\",\"edit_statement\":\"k variable\",\"reason\":\"undefined\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"Perf function\",\"target\":\"algorithm_1\",\"other_involved\":null,\"action\":\"add\",\"edit_statement\":\"define parameter\",\"reason\":\"undefined\"}",
            "{\"letter\":\"D\",\"attribute\":\"F_k variable\",\"target\":\"algorithm_1\",\"other_involved\":\"kmeans_clustering function\",\"action\":\"initialize\",\"edit_statement\":\"F_k variable\",\"reason\":\"uninitialized\"}",
            "{\"letter\":\"A\",\"attribute\":\"theta\",\"target\":\"algorithm_1\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"update variable\",\"reason\":\"infinite loop\"}"
          ],
          "letters": ["C", "B", "D", "A"]
        }
      },
      "severity": 0,
      "visual_elements": ["Algorithm 1"]
    }
  ],
  "IUzQfdkkoL": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 3,
          "image_id": "IUzQfdkkoL_3_9f7b0cf8",
          "bbox": {
            "x": 0.16943467210965105,
            "y": 0.09633423852138834,
            "width": 0.6607773851590105,
            "height": 0.16120218579234974
          }
        },
        {
          "type": "text",
          "page": 3,
          "content": "While numerous food classification datasets exist, ranging from the classic Food-101 dataset Bossard et al. (2014) to the latest Food2K dataset Min et al. (2023), datasets for portion estimation or macro-nutrient estimation are significantly fewer",
          "line": 126
        }
      ],
      "review_text": "Table 1: The authors referenced Food-101 in 'Related Work', but there is no comparison with it in Table 1.",
      "category": "table-text",
      "description": "The related work section text talk about the Food-101 dataset, but is not included in the comparison of of public datasets in Table 1",
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the table consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the table inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {
          "question": "While numerous food classification datasets exist, ranging from the classic Food-101 dataset Bossard et al. (2014) to the latest Food2K dataset Min et al. (2023), datasets for portion estimation or macro-nutrient estimation are significantly fewer",
          "correct": "IUzQfdkkoL_3_9f7b0cf8",
          "incorrect": [
            "IUzQfdkkoL_5_table_table2",
            "IUzQfdkkoL_7_table_figure4",
            "IUzQfdkkoL_21_table_table6"
          ],
          "letters": ["B", "C", "D", "A"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"classic Food-101 dataset\",\"claim\":{\"source\":\"expectation\",\"statement\":\"included\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"not included\"}}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"Food2K dataset dataset classification\",\"claim\":{\"source\":\"expectation\",\"statement\":\"is 3D\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"classified as 2D\"}}",
            "{\"letter\":\"A\",\"attribute\":\"Generic 3D Dataset dataset\",\"claim\":{\"source\":\"Table 1\",\"statement\":\"is present\"},\"evidence\":{\"source\":\"text\",\"statement\":\"not referenced\"}}",
            "{\"letter\":\"C\",\"attribute\":\"datasets for portion and macro-nutrient estimation count\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be fewer\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"more\"}}"
          ],
          "letters": ["B", "D", "A", "C"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"Food-101\",\"target\":\"table_1\",\"other_involved\":\"text\",\"action\":\"add\",\"edit_statement\":\"add dataset\",\"reason\":\"missing\"}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"Food2K dataset\",\"target\":\"table_1\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"update type\",\"reason\":\"misclassified\"}",
            "{\"letter\":\"A\",\"attribute\":\"dataset\",\"target\":\"table_1\",\"other_involved\":\"text\",\"action\":\"add\",\"edit_statement\":\"add reference\",\"reason\":\"unreferenced\"}",
            "{\"letter\":\"C\",\"attribute\":\"dataset count\",\"target\":\"table_1\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"update counts\",\"reason\":\"contradictory\"}"
          ],
          "letters": ["B", "D", "A", "C"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 1"]
    },
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 5,
          "image_id": "IUzQfdkkoL_5_7173c62e",
          "bbox": {
            "x": 0.16943467210965105,
            "y": 0.09653918990671961,
            "width": 0.666077738515901,
            "height": 0.24316939890710385
          }
        }
      ],
      "review_text": "Figure 2: The images shown on the 3D Food Data Distribution have no correspondence with the food names on the abscissa. Please clarify this inconsistency.",
      "category": "figure-only",
      "description": "The Figure 2 shows food images above a bar plot which are unrelated to the food names indicated on the abscissa",
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the figure that is consistent with a different part of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the figure that is inconsistent with a different part of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"image-label consistency\",\"claim\":{\"source\":\"expectation\",\"statement\":\"images match labels\"},\"evidence\":{\"source\":\"figure_2\",\"statement\":\"images don't match labels\"}}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"axis label\",\"claim\":{\"source\":\"expectation\",\"statement\":\"y-axis is distinct items\"},\"evidence\":{\"source\":\"figure_2\",\"statement\":\"y-axis is samples\"}}",
            "{\"letter\":\"B\",\"attribute\":\"legibility\",\"claim\":{\"source\":\"expectation\",\"statement\":\"labels are legible\"},\"evidence\":{\"source\":\"figure_2\",\"statement\":\"labels are illegible\"}}",
            "{\"letter\":\"C\",\"attribute\":\"dimensionality\",\"claim\":{\"source\":\"expectation\",\"statement\":\"3D data is 3D\"},\"evidence\":{\"source\":\"figure_2\",\"statement\":\"3D data is 2D\"}}"
          ],
          "letters": ["A", "D", "B", "C"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"food images\",\"target\":\"figure_2\",\"other_involved\":\"x-axis labels\",\"action\":\"modify\",\"edit_statement\":\"match labels\",\"reason\":\"inconsistent\"}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"y-axis label\",\"target\":\"figure_2\",\"other_involved\":\"y_axis\",\"action\":\"modify\",\"edit_statement\":\"clarify quantity\",\"reason\":\"unclear\"}",
            "{\"letter\":\"B\",\"attribute\":\"font size\",\"target\":\"figure_2\",\"other_involved\":\"x_axis labels\",\"action\":\"modify\",\"edit_statement\":\"improve legibility\",\"reason\":\"illegible\"}",
            "{\"letter\":\"C\",\"attribute\":\"chart type\",\"target\":\"figure_2\",\"other_involved\":\"caption\",\"action\":\"modify\",\"edit_statement\":\"represent 3D data\",\"reason\":\"only 2D data represented\"}"
          ],
          "letters": ["A", "D", "B", "C"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 2"]
    }
  ],
  "IQ0BBfbYR2": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 5,
          "image_id": "IQ0BBfbYR2_5_2af5e86d",
          "bbox": {
            "x": 0.16943467210965105,
            "y": 0.33233150628095115,
            "width": 0.666077738515901,
            "height": 0.16530054644808745
          }
        }
      ],
      "review_text": "line 236 and other places: The notation for \u03b8 is not consistent. In some places, it goes from 0 to k, while in others, it is from 1 to k. More importantly, Eq. 7 makes it seem like \u03b8's denote a subset of indices, but they are supposed to be binary masks.",
      "category": "algorithm-only",
      "description": "The \u03b8 once goes from 0 to k and once from 1 to k.",
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the algorithm that is consistent with a different part of the algorithm?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the algorithm that is inconsistent with a different part of the algorithm?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"\u03b8 indexing\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"Algorithm 1\",\"statement\":\"0-based then 1-based\"}}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"\u03b8 indexing\",\"claim\":{\"source\":\"get_masks and cls_score\",\"statement\":\"1-based then 0-based\"},\"evidence\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"}}",
            "{\"letter\":\"B\",\"attribute\":\"class condition\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be used as constraint\"},\"evidence\":{\"source\":\"Algorithm\",\"statement\":\"not used as constraint\"}}",
            "{\"letter\":\"A\",\"attribute\":\"indexing\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"cls_score function\",\"statement\":\"references \u03b8 by t loop index\"}}"
          ],
          "letters": ["D", "C", "B", "A"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"indexing for \u03b8\",\"target\":\"get_masks\",\"other_involved\":\"cls_score\",\"action\":\"modify\",\"edit_statement\":\"match array access\",\"reason\":\"mismatch\"}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"indexing for \u03b8\",\"target\":\"get_masks\",\"other_involved\":\"cls_score\",\"action\":\"modify\",\"edit_statement\":\"match array access\",\"reason\":\"mismatch\"}",
            "{\"letter\":\"B\",\"attribute\":\"class condition c\",\"target\":\"Algorithm 1\",\"other_involved\":\"constraint\",\"action\":\"add\",\"edit_statement\":\"use as constraint\",\"reason\":\"not used\"}",
            "{\"letter\":\"A\",\"attribute\":\"cls_score function\",\"target\":\"Algorithm 1\",\"other_involved\":\"\u03b8 values\",\"action\":\"modify\",\"edit_statement\":\"correctly reference\",\"reason\":\"unpredictable application\"}"
          ],
          "letters": ["D", "C", "B", "A"]
        }
      },
      "severity": 0,
      "visual_elements": ["Algorithm 1"]
    },
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 7,
          "image_id": "IQ0BBfbYR2_7_09714439",
          "bbox": {
            "x": 0.17473502546654152,
            "y": 0.09858837023458845,
            "width": 0.6537102473498233,
            "height": 0.27868852459016397
          }
        },
        {
          "type": "text",
          "page": 6,
          "content": "As there exists no ground truth for counterfactual examples, a rough estimate regarding the quality\ncan only be assessed via quantifying desired properties as the minimality and the accuracy. We align\nour evaluation with Farid et al. (2023) and compute the FID score Heusel et al. (2017) as well as the\nL1 and L2 norm between the original and counterfactual image to measure their semantic and pixel-\nbased distance, denoting the minimality. The flip ratio (FR) determines the accuracy by measuring\nhow often the classifier predicts the counterfactual class for the generated sample.",
          "line": 292
        }
      ],
      "review_text": "line 295 mentions the L2 norm between the original and counterfactual image. It is not clear if this was supposed to be a metric in Table 1.",
      "category": "table-text",
      "description": "The text talks about L2 as a metric, but it is not shown in Table 1",
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the table consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the table inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "As there exists no ground truth for counterfactual examples, a rough estimate regarding the quality\ncan only be assessed via quantifying desired properties as the minimality and the accuracy. We align\nour evaluation with Farid et al. (2023) and compute the FID score Heusel et al. (2017) as well as the\nL1 and L2 norm between the original and counterfactual image to measure their semantic and pixel-\nbased distance, denoting the minimality. The flip ratio (FR) determines the accuracy by measuring\nhow often the classifier predicts the counterfactual class for the generated sample.",
          "correct": "IQ0BBfbYR2_7_09714439",
          "incorrect": [
            "IQ0BBfbYR2_7_image_figure4",
            "IQ0BBfbYR2_7_image_figure5",
            "IQ0BBfbYR2_6_image_figure3"
          ],
          "letters": ["D", "B", "A", "C"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"L2 norm\",\"claim\":{\"source\":\"text\",\"statement\":\"measure distance\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"not present\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"Flip Ratio\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be in table\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"not included\"}}",
            "{\"letter\":\"C\",\"attribute\":\"FID score\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be in table\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"absent\"}}",
            "{\"letter\":\"D\",\"attribute\":\"Confidence\",\"claim\":{\"source\":\"expectation\",\"statement\":\"higher is better\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"lower is better\"}}"
          ],
          "letters": ["A", "B", "C", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"L2 norm\",\"target\":\"table_1\",\"other_involved\":\"text\",\"action\":\"add\",\"edit_statement\":\"add column\",\"reason\":\"missing\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"Flip Ratio (FR)\",\"target\":\"table_1\",\"other_involved\":\"text\",\"action\":\"add\",\"edit_statement\":\"add values\",\"reason\":\"missing\"}",
            "{\"letter\":\"C\",\"attribute\":\"FID score\",\"target\":\"table_1\",\"other_involved\":\"text\",\"action\":\"add\",\"edit_statement\":\"add metric\",\"reason\":\"missing\"}",
            "{\"letter\":\"D\",\"attribute\":\"Confidence\",\"target\":\"table_1\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"correct ordering\",\"reason\":\"wrong\"}"
          ],
          "letters": ["A", "B", "C", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 1"]
    }
  ],
  "I86z54CL2y": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 4,
          "image_id": "I86z54CL2y_4_02a38b9e",
          "bbox": {
            "x": 0.17120145656194785,
            "y": 0.0994080090131916,
            "width": 0.6590106007067138,
            "height": 0.4508196721311476
          }
        },
        {
          "type": "text",
          "page": 5,
          "content": "To facilitate the decoding process\nacross different planes, we introduce a learnable embedding u that supplies additional information for\ndecoupling new planes. The learnable embedding u is first processed through self-attention encoding\nand then used as a query in a cross-attention mechanism with the encoded image latent h.",
          "line": 220
        }
      ],
      "review_text": "Line 221: The authors mention a learnable embedding $u$ in the text, but this element is absent from Figure 2, creating confusion in the presentation.",
      "category": "figure-text",
      "description": "The text mentions a learnable parameter $\\mu$, but it can't be found in Figure 2, showing the method visually",
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {
          "question": "To facilitate the decoding process\nacross different planes, we introduce a learnable embedding u that supplies additional information for\ndecoupling new planes. The learnable embedding u is first processed through self-attention encoding\nand then used as a query in a cross-attention mechanism with the encoded image latent h.",
          "correct": "I86z54CL2y_4_02a38b9e",
          "incorrect": [
            "I86z54CL2y_4_image_figure3",
            "I86z54CL2y_6_image_figure4",
            "I86z54CL2y_8_image_figure5"
          ],
          "letters": ["B", "D", "A", "C"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"parameter '\u03bc'\",\"claim\":{\"source\":\"text\",\"statement\":\"is processed\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"not shown\"}}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"viewing angle\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should match\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"do not match\"}}",
            "{\"letter\":\"B\",\"attribute\":\"latent h\",\"claim\":{\"source\":\"text\",\"statement\":\"is used\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"not shown\"}}",
            "{\"letter\":\"C\",\"attribute\":\"distribution\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be Gaussian\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"is Uniform\"}}"
          ],
          "letters": ["A", "D", "B", "C"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"parameter '\u03bc'\",\"target\":\"figure_2\",\"other_involved\":null,\"action\":\"add\",\"edit_statement\":\"add parameter '\u03bc'\",\"reason\":\"not shown\"}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"viewing angle\",\"target\":\"figure_2\",\"other_involved\":\"U-Net\",\"action\":\"modify\",\"edit_statement\":\"match angles\",\"reason\":\"not matched\"}",
            "{\"letter\":\"B\",\"attribute\":\"latent 'h'\",\"target\":\"figure_2\",\"other_involved\":null,\"action\":\"add\",\"edit_statement\":\"add latent 'h'\",\"reason\":\"not shown\"}",
            "{\"letter\":\"C\",\"attribute\":\"Gaussian Divergent Significance\",\"target\":\"figure_2\",\"other_involved\":null,\"action\":\"replace\",\"edit_statement\":\"replace Uniform with Gaussian\",\"reason\":\"uniform shown\"}"
          ],
          "letters": ["A", "D", "B", "C"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 2"]
    }
  ],
  "HGxGCjqnDd": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 4,
          "image_id": "HGxGCjqnDd_4_03a9430a",
          "bbox": {
            "x": 0.16943467210965105,
            "y": 0.0990664789585468,
            "width": 0.6643109540636042,
            "height": 0.24590163934426232
          }
        },
        {
          "type": "text",
          "page": 3,
          "content": "The matrices A \u2208 Rr\u00d7d2 and B \u2208 Rd1\u00d7r represents the learnable low-rank matrices with the rank r \u226a {d1, d2}. Typically, A adopts Kaiming uniform initialization (He et al., 2015) while B is initialized to zero at the start of the training process. Essentially, our approach centers on re-parameterizing the adaptation matrices, termed \u02dcA \u2208 Rr\u00d7d2 and \u02dcB \u2208 Rd1\u00d7r , as the spatial recovery of sparse spectral coefficients, while retaining LoRA\u2019s update schema",
          "line": 152
        }
      ],
      "review_text": "Figure 2: The stated dimensions of F and Atilde in the text (r1 by d) conflict with Figure 2, which states that F is d1 by r.",
      "category": "figure-text",
      "description": "The text shows LoRa and FoRa to have the same parameter r, but the Figure shows the parameters r for LoRa and $\\tilde r$ for FoRa.",
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "The matrices A \u2208 Rr\u00d7d2 and B \u2208 Rd1\u00d7r represents the learnable low-rank matrices with the rank r \u226a {d1, d2}. Typically, A adopts Kaiming uniform initialization (He et al., 2015) while B is initialized to zero at the start of the training process. Essentially, our approach centers on re-parameterizing the adaptation matrices, termed \u02dcA \u2208 Rr\u00d7d2 and \u02dcB \u2208 Rd1\u00d7r , as the spatial recovery of sparse spectral coefficients, while retaining LoRA\u2019s update schema",
          "correct": "HGxGCjqnDd_4_03a9430a",
          "incorrect": [
            "HGxGCjqnDd_1_image_figure1",
            "HGxGCjqnDd_8_image_figure3",
            "HGxGCjqnDd_3_interline-equation_equation24"
          ],
          "letters": ["C", "B", "A", "D"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"rank parameter\",\"claim\":{\"source\":\"text\",\"statement\":\"same rank parameter\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"different rank parameters\"}}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"trained elements\",\"claim\":{\"source\":\"text\",\"statement\":\"all elements trained\"},\"evidence\":{\"source\":\"Figure 2(a)\",\"statement\":\"B=0\"}}",
            "{\"letter\":\"B\",\"attribute\":\"latent space size\",\"claim\":{\"source\":\"expectation\",\"statement\":\"same latent space size\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"different latent space size\"}}",
            "{\"letter\":\"A\",\"attribute\":\"learnable matrices\",\"claim\":{\"source\":\"text\",\"statement\":\"A and B learnable\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"A and B Frozen\"}}"
          ],
          "letters": ["C", "D", "B", "A"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"rank parameter\",\"target\":\"figure_2\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"parameter label\",\"reason\":\"contradiction\"}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"training requirement\",\"target\":\"figure_2a\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"trainable elements\",\"reason\":\"contradiction\"}",
            "{\"letter\":\"B\",\"attribute\":\"latent space\",\"target\":\"figure_2b\",\"other_involved\":\"figure_2a\",\"action\":\"modify\",\"edit_statement\":\"size\",\"reason\":\"different\"}",
            "{\"letter\":\"A\",\"attribute\":\"matrices\",\"target\":\"figure_2\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"learnability status\",\"reason\":\"contradiction\"}"
          ],
          "letters": ["C", "D", "B", "A"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 2"]
    }
  ],
  "Gp6VU0oJX3": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 4,
          "image_id": "Gp6VU0oJX3_4_2cf3b6ef",
          "bbox": {
            "x": 0.2931095837704284,
            "y": 0.5397769115010246,
            "width": 0.40989399293286216,
            "height": 0.1475409836065574
          }
        },
        {
          "type": "text",
          "page": 3,
          "content": "For high-dimensional data X and its prediction\ntarget Y , the latent attribute set A between X and Y can be divided into the causally invariant\nattribute set C and variation attribute set V . Attributes belonging to C should satisfy P(Y |C) and\nP(X|C) being invariant across domains. Attributes belonging to V should satisfy that P(Y |V ) or\nP(X|V ) varies across domains.",
          "line": 144
        }
      ],
      "review_text": "Figure 2: The two frameworks show that both C and V are parents of X, which contradicts the assumption that P(X|C) is invariant across domains, as it should also depend on V.",
      "category": "figure-text",
      "description": "The text states P(X|C) should be invariant and P(X|V) varies, but looking at the Figure, X has parents C and V, so P(X|C) can't be invariant as it is also dependent on V.",
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "For high-dimensional data X and its prediction\ntarget Y , the latent attribute set A between X and Y can be divided into the causally invariant\nattribute set C and variation attribute set V . Attributes belonging to C should satisfy P(Y |C) and\nP(X|C) being invariant across domains. Attributes belonging to V should satisfy that P(Y |V ) or\nP(X|V ) varies across domains.",
          "correct": "Gp6VU0oJX3_4_2cf3b6ef",
          "incorrect": [
            "Gp6VU0oJX3_0_image_figure1",
            "Gp6VU0oJX3_8_image_figure3",
            "Gp6VU0oJX3_9_image_figure4"
          ],
          "letters": ["D", "A", "C", "B"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"parents of X\",\"claim\":{\"source\":\"expectation\",\"statement\":\"P(X|C) is invariant\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"X has parents C and V\"}}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"P(X|V)\",\"claim\":{\"source\":\"expectation\",\"statement\":\"varies across domains\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"V causes X\"}}",
            "{\"letter\":\"A\",\"attribute\":\"parents of Y\",\"claim\":{\"source\":\"text\",\"statement\":\"P(Y|C) is invariant\"},\"evidence\":{\"source\":\"Figure 2 (b) PICIM\",\"statement\":\"Y has parents C and V\"}}",
            "{\"letter\":\"D\",\"attribute\":\"V\",\"claim\":{\"source\":\"expectation\",\"statement\":\"V varies\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"V is connected to C\"}}"
          ],
          "letters": ["B", "C", "A", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"P(X|C) invariance\",\"target\":\"text\",\"other_involved\":\"figure_2\",\"action\":\"modify\",\"edit_statement\":\"assert X dependence\",\"reason\":\"contradiction\"}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"P(X|V) variation\",\"target\":\"text\",\"other_involved\":\"figure_2\",\"action\":\"modify\",\"edit_statement\":\"align causal link\",\"reason\":\"contradiction\"}",
            "{\"letter\":\"A\",\"attribute\":\"parents C and V\",\"target\":\"figure_2b\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align parents\",\"reason\":\"contradiction\"}",
            "{\"letter\":\"D\",\"attribute\":\"V definition\",\"target\":\"text\",\"other_involved\":\"figure_2\",\"action\":\"modify\",\"edit_statement\":\"align V connection\",\"reason\":\"implication\"}"
          ],
          "letters": ["B", "C", "A", "D"]
        }
      },
      "severity": 1,
      "visual_elements": ["Figure 2"]
    }
  ],
  "Gh1XW314zF": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 7,
          "image_id": "Gh1XW314zF_7_f4d9bea0",
          "bbox": {
            "x": 0.17296824101424468,
            "y": 0.0924408292509819,
            "width": 0.6678445229681979,
            "height": 0.2363387978142077
          }
        },
        {
          "type": "text",
          "page": 7,
          "content": "MG-LLM achieves the highest combined performance across both accuracy and F1 score, with an\naccuracy of 78.8% and an F1 score of 64.6",
          "line": 347
        }
      ],
      "review_text": "Section 4.2: The highest F1 score is stated to be achieved by the model from Rezk et al. (2024), but Table 1 shows that the model from Kim et al. (2024) has the highest F1 score.",
      "category": "table-text",
      "description": "The text claims MG-LLM to have the highest F1 score, but the Table shows (Rezk et al.) to have a higher F1 score.",
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the table consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the table inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {
          "question": "MG-LLM achieves the highest combined performance across both accuracy and F1 score, with an\naccuracy of 78.8% and an F1 score of 64.6",
          "correct": "Gh1XW314zF_7_f4d9bea0",
          "incorrect": [
            "Gh1XW314zF_7_image_table2",
            "Gh1XW314zF_8_image_figure4",
            "Gh1XW314zF_4_interline-equation_equation45"
          ],
          "letters": ["A", "B", "C", "D"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"F1 score\",\"claim\":{\"source\":\"text\",\"statement\":\"MG-LLM highest\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"Rezk et al. 2024 higher\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"accuracy\",\"claim\":{\"source\":\"text\",\"statement\":\"MG-LLM highest\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"HAIM higher\"}}",
            "{\"letter\":\"D\",\"attribute\":\"F1 score\",\"claim\":{\"source\":\"text\",\"statement\":\"64.6\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"different score\"}}",
            "{\"letter\":\"C\",\"attribute\":\"F1 score\",\"claim\":{\"source\":\"text\",\"statement\":\"MG-LLM highest\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"mmFormer higher\"}}"
          ],
          "letters": ["A", "B", "D", "C"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"F1 score\",\"target\":\"text\",\"other_involved\":\"Table 1, Rezk et al., 2024\",\"action\":\"modify\",\"edit_statement\":\"align F1 score\",\"reason\":\"inconsistent\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"accuracy\",\"target\":\"text\",\"other_involved\":\"Table 1, HAIM\",\"action\":\"modify\",\"edit_statement\":\"align accuracy\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"D\",\"attribute\":\"F1 score\",\"target\":\"text\",\"other_involved\":\"Table 1\",\"action\":\"modify\",\"edit_statement\":\"align F1 score\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"C\",\"attribute\":\"F1 score\",\"target\":\"text\",\"other_involved\":\"Table 1, mmFormer\",\"action\":\"modify\",\"edit_statement\":\"align F1 score\",\"reason\":\"inconsistent\"}"
          ],
          "letters": ["A", "B", "D", "C"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 1"]
    }
  ],
  "GDDqq0w6rs": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 8,
          "image_id": "GDDqq0w6rs_8_796f4fec",
          "bbox": {
            "x": 0.1659011032050574,
            "y": 0.09954467106386614,
            "width": 0.6696113074204946,
            "height": 0.18579234972677597
          }
        },
        {
          "type": "text",
          "page": 7,
          "content": "ScGPT-H was the top performer in two different families of tasks. ",
          "line": 377
        }
      ],
      "review_text": "Line 392: 'ScGPT-H was the top performer in two different families of tasks.' contradicts Figure 2 where it does not appear to be the top performer in any task.",
      "category": "table-text",
      "description": "The text states ScGPT-H to be a top performer in two families of tasks, but Figure 2 contradicts this.",
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the table consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the table inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "ScGPT-H was the top performer in two different families of tasks. ",
          "correct": "GDDqq0w6rs_8_796f4fec",
          "incorrect": [
            "GDDqq0w6rs_14_image_figures1",
            "GDDqq0w6rs_1_image_figure1",
            "GDDqq0w6rs_21_image_figures2"
          ],
          "letters": ["D", "C", "B", "A"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"top performer\",\"claim\":{\"source\":\"text\",\"statement\":\"top performer in two task families\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"not highest score\"}}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"superior performance\",\"claim\":{\"source\":\"text\",\"statement\":\"general superior performance\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"top performer in one task family\"}}",
            "{\"letter\":\"C\",\"attribute\":\"performance\",\"claim\":{\"source\":\"Figure 2\",\"statement\":\"higher than all models\"},\"evidence\":{\"source\":\"text\",\"statement\":\"does not state this\"}}",
            "{\"letter\":\"B\",\"attribute\":\"task families\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be specified\"},\"evidence\":{\"source\":\"text\",\"statement\":\"omits task families\"}}"
          ],
          "letters": ["A", "D", "C", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"performance\",\"target\":\"text\",\"other_involved\":\"figure_2\",\"action\":\"modify\",\"edit_statement\":\"update ScGPT-H performance\",\"reason\":\"shows ScGPT-H not with highest performance in any task\"}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"performance\",\"target\":\"text\",\"other_involved\":\"figure_2\",\"action\":\"modify\",\"edit_statement\":\"update ScGPT-H performance\",\"reason\":\"shows ScGPT-H has only top in one task family\"}",
            "{\"letter\":\"C\",\"attribute\":\"performance\",\"target\":\"figure_2\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"update ScGPT-H performance\",\"reason\":\"shows ScGPT-H has highest performance\"}",
            "{\"letter\":\"B\",\"attribute\":\"task families\",\"target\":\"text\",\"other_involved\":\"figure_2\",\"action\":\"add\",\"edit_statement\":\"mention best performing task families\",\"reason\":\"omitted\"}"
          ],
          "letters": ["A", "D", "C", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 2"]
    }
  ],
  "G2BiEoB77Z": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 1,
          "image_id": "G2BiEoB77Z_1_bd3edbaf",
          "bbox": {
            "x": 0.4909894424276722,
            "y": 0.6271402327740779,
            "width": 0.3445229681978798,
            "height": 0.15710382513661203
          }
        }
      ],
      "review_text": "Figures 1(a) and 1(b) seem to be inconsistent.",
      "category": "figure-only",
      "description": "The ranking of the bar plot in (b) does not match the table in (a)",
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the figure that is consistent with a different part of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the figure that is inconsistent with a different part of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"FEA\",\"claim\":{\"source\":\"plot_b\",\"statement\":\"highest\"},\"evidence\":{\"source\":\"table_a\",\"statement\":\"not highest\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"ANG\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be in plot\"},\"evidence\":{\"source\":\"plot_b\",\"statement\":\"missing\"}}",
            "{\"letter\":\"D\",\"attribute\":\"SAD_SUR order\",\"claim\":{\"source\":\"table_a\",\"statement\":\"SAD before SUR\"},\"evidence\":{\"source\":\"plot_b\",\"statement\":\"SUR before SAD\"}}",
            "{\"letter\":\"A\",\"attribute\":\"sum\",\"claim\":{\"source\":\"table_a\",\"statement\":\"sum to 1\"},\"evidence\":{\"source\":\"plot_b\",\"statement\":\"not sum to 1\"}}"
          ],
          "letters": ["C", "B", "D", "A"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"relative height\",\"target\":\"plot_1b\",\"other_involved\":\"table_1a\",\"action\":\"modify\",\"edit_statement\":\"align bar height\",\"reason\":\"contradiction\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"emotion 'ANG'\",\"target\":\"plot_1b\",\"other_involved\":\"table_1a\",\"action\":\"add\",\"edit_statement\":\"add bar\",\"reason\":\"missing\"}",
            "{\"letter\":\"D\",\"attribute\":\"order of 'SAD' and 'SUR'\",\"target\":\"plot_1b\",\"other_involved\":\"table_1a\",\"action\":\"modify\",\"edit_statement\":\"invert order\",\"reason\":\"inverted\"}",
            "{\"letter\":\"A\",\"attribute\":\"description degrees\",\"target\":\"table_1a\",\"other_involved\":\"plot_1b\",\"action\":\"modify\",\"edit_statement\":\"align sum\",\"reason\":\"sum different\"}"
          ],
          "letters": ["C", "B", "D", "A"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 1"]
    }
  ],
  "FNDudoox4A": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 5,
          "image_id": "FNDudoox4A_5_4d930463",
          "bbox": {
            "x": 0.16766788765735421,
            "y": 0.10132061067174694,
            "width": 0.6731448763250883,
            "height": 0.31830601092896177
          }
        }
      ],
      "review_text": "Figure 2: The target image is processed by SDI III (designated for text) during inference, contradicting the caption.",
      "category": "figure-only",
      "description": "According to the text, SDI III is responsible for text input, but the Figure shows an image as input data to SDI III",
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the figure that is consistent with a different part of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the figure that is inconsistent with a different part of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"input type\",\"claim\":{\"source\":\"caption\",\"statement\":\"textual inputs\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"receives image\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"input type\",\"claim\":{\"source\":\"caption\",\"statement\":\"multimodal inputs\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"separate inputs\"}}",
            "{\"letter\":\"D\",\"attribute\":\"input type\",\"claim\":{\"source\":\"caption\",\"statement\":\"visual inputs\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"processes tokens and embeddings\"}}",
            "{\"letter\":\"C\",\"attribute\":\"input type\",\"claim\":{\"source\":\"text\",\"statement\":\"textual inputs\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"multimodal output\"}}"
          ],
          "letters": ["A", "B", "D", "C"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"inputs\",\"target\":\"figure_2_caption\",\"other_involved\":\"figure_2_inference_phase_diagram\",\"action\":\"modify\",\"edit_statement\":\"align input type\",\"reason\":\"inconsistent\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"inputs\",\"target\":\"figure_2_caption\",\"other_involved\":\"figure_2_training_phase_diagram\",\"action\":\"modify\",\"edit_statement\":\"align input fusion\",\"reason\":\"unclear\"}",
            "{\"letter\":\"D\",\"attribute\":\"inputs\",\"target\":\"figure_2_caption\",\"other_involved\":\"figure_2_training_phase_diagram\",\"action\":\"modify\",\"edit_statement\":\"align input type\",\"reason\":\"unclear\"}",
            "{\"letter\":\"C\",\"attribute\":\"inputs\",\"target\":\"text\",\"other_involved\":\"figure_2_inference_phase_diagram\",\"action\":\"modify\",\"edit_statement\":\"align input modality\",\"reason\":\"unclear\"}"
          ],
          "letters": ["A", "B", "D", "C"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 2"]
    }
  ],
  "F4bHMojXVW": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 8,
          "image_id": "F4bHMojXVW_8_f684f3ab",
          "bbox": {
            "x": 0.17120145656194785,
            "y": 0.10118394862107241,
            "width": 0.33922261484098937,
            "height": 0.3224043715846995
          }
        },
        {
          "type": "text",
          "page": 9,
          "content": "In Fig. 3, we compare VIDEOTREE with existing methods under different cap-\ntion settings. Under similar frame caption settings (7, 9, 11), VIDEOTREE outperforms LLoVi (Zhang\net al., 2023a) and VideoAgent (Wang et al., 2024c) by 6.5% and 2.0% on average accuracy across\nall three settings. Moreover, unlike the non-hierarchical VideoAgent baseline, which suffers from\nperformance degradation after 11 frames, our method continues improving, generalizing to 62.4\nframes and achieving 6% better accuracy in terms of best performance. This result highlight the\nimportance of VIDEOTREE\u2019s hierarchical nature.",
          "line": 446
        }
      ],
      "review_text": "Line 450 and Figure 3: The authors mention performance degradation after 11 frames in the text, but the x-axis in Figure 3 represents the number of captions, not frames.",
      "category": "figure-text",
      "description": "The text mentions performance degradation after 11 frames, but the x axis in Figure 3 shows number of captions, not number of frames",
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "In Fig. 3, we compare VIDEOTREE with existing methods under different cap-\ntion settings. Under similar frame caption settings (7, 9, 11), VIDEOTREE outperforms LLoVi (Zhang\net al., 2023a) and VideoAgent (Wang et al., 2024c) by 6.5% and 2.0% on average accuracy across\nall three settings. Moreover, unlike the non-hierarchical VideoAgent baseline, which suffers from\nperformance degradation after 11 frames, our method continues improving, generalizing to 62.4\nframes and achieving 6% better accuracy in terms of best performance. This result highlight the\nimportance of VIDEOTREE\u2019s hierarchical nature.",
          "correct": "F4bHMojXVW_8_f684f3ab",
          "incorrect": [
            "F4bHMojXVW_1_image_figure1",
            "F4bHMojXVW_22_image_figure5",
            "F4bHMojXVW_22_image_figure6"
          ],
          "letters": ["B", "D", "A", "C"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"x-axis label\",\"claim\":{\"source\":\"expectation\",\"statement\":\"number of frames\"},\"evidence\":{\"source\":\"figure_3\",\"statement\":\"number of captions\"}}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"x-axis scale\",\"claim\":{\"source\":\"expectation\",\"statement\":\"linear scale\"},\"evidence\":{\"source\":\"figure_3\",\"statement\":\"logarithmic scale\"}}",
            "{\"letter\":\"B\",\"attribute\":\"degradation\",\"claim\":{\"source\":\"text\",\"statement\":\"degradation after 11 frames\"},\"evidence\":{\"source\":\"figure_3\",\"statement\":\"steady improvement\"}}",
            "{\"letter\":\"A\",\"attribute\":\"improvement\",\"claim\":{\"source\":\"text\",\"statement\":\"continues improving after 11 frames\"},\"evidence\":{\"source\":\"figure_3\",\"statement\":\"plateau after 9 captions\"}}"
          ],
          "letters": ["C", "D", "B", "A"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"x-axis label\",\"target\":\"figure_3\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align label\",\"reason\":\"mismatch\"}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"x-axis scale\",\"target\":\"figure_3\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"change scale\",\"reason\":\"mismatch\"}",
            "{\"letter\":\"B\",\"attribute\":\"performance degradation\",\"target\":\"text\",\"other_involved\":\"figure_3\",\"action\":\"modify\",\"edit_statement\":\"align performance\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"A\",\"attribute\":\"improvement\",\"target\":\"text\",\"other_involved\":\"figure_3\",\"action\":\"modify\",\"edit_statement\":\"align performance\",\"reason\":\"inconsistent\"}"
          ],
          "letters": ["C", "D", "B", "A"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 3"]
    }
  ],
  "F1cN3aoAty": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 9,
          "image_id": "F1cN3aoAty_9_5aeb04ee",
          "bbox": {
            "x": 0.16943467210965105,
            "y": 0.0842441079395065,
            "width": 0.6643109540636042,
            "height": 0.4508196721311476
          }
        }
      ],
      "review_text": "Figures 4 and 7 label the proposed framework as 'VideoLimo', which contradicts the text where it is named 'VideoLight'.",
      "category": "figure-caption",
      "description": "The caption states the proposed method is called VideoLights, but the Figure itself shows VideoLimo as the name",
      "mcq": {
        "binary_consistent": {
          "question": "Is the caption of the figure consistent with the content of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the caption of the figure inconsistent with the content of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"name\",\"claim\":{\"source\":\"expectation\",\"statement\":\"consistent naming\"},\"evidence\":{\"source\":\"figure_4\",\"statement\":\"inconsistent naming\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"performance\",\"claim\":{\"source\":\"caption\",\"statement\":\"below ground truth\"},\"evidence\":{\"source\":\"graph\",\"statement\":\"exceed ground truth\"}}",
            "{\"letter\":\"B\",\"attribute\":\"results\",\"claim\":{\"source\":\"expectation\",\"statement\":\"match query\"},\"evidence\":{\"source\":\"figure\",\"statement\":\"don't match query\"}}",
            "{\"letter\":\"C\",\"attribute\":\"method\",\"claim\":{\"source\":\"caption\",\"statement\":\"two methods\"},\"evidence\":{\"source\":\"figure\",\"statement\":\"one method\"}}"
          ],
          "letters": ["D", "A", "B", "C"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"method name\",\"target\":\"caption_figure_4\",\"other_involved\":\"figure_4\",\"action\":\"modify\",\"edit_statement\":\"align with prediction\",\"reason\":\"mismatch\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"performance\",\"target\":\"caption_figure_4\",\"other_involved\":\"figure_4\",\"action\":\"modify\",\"edit_statement\":\"align with graph\",\"reason\":\"contradiction\"}",
            "{\"letter\":\"B\",\"attribute\":\"results\",\"target\":\"figure_4a\",\"other_involved\":\"figure_4b\",\"action\":\"modify\",\"edit_statement\":\"align with queries\",\"reason\":\"mismatch\"}",
            "{\"letter\":\"C\",\"attribute\":\"method names\",\"target\":\"caption_figure_4\",\"other_involved\":\"figure_4\",\"action\":\"modify\",\"edit_statement\":\"align with figure\",\"reason\":\"mismatch\"}"
          ],
          "letters": ["D", "A", "B", "C"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 4"]
    }
  ],
  "EispKqtw5B": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 6,
          "image_id": "EispKqtw5B_6_1ae60aee",
          "bbox": {
            "x": 0.5863958028517005,
            "y": 0.45705826556096313,
            "width": 0.2438162544169611,
            "height": 0.1284153005464481
          }
        },
        {
          "type": "image",
          "page": 7,
          "image_id": "EispKqtw5B_7_585f262a",
          "bbox": {
            "x": 0.17296824101424468,
            "y": 0.10022773117315574,
            "width": 0.6554770318021201,
            "height": 0.8005464480874318
          }
        }
      ],
      "review_text": "Table 1: The number of parameters of ShuffleMamba-S is shown as 7M, while Table 2 shows it as 26M.",
      "category": "table-table",
      "description": "Table 1 shows the parameter count for different sizes of the model ShuffleMamba. The small version in Table 1 shows 7M parameters, while in Table 2 it is 26M parameter",
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the first table consistent with the content of the second table?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the first table inconsistent with the content of the second table?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "EispKqtw5B_6_1ae60aee",
          "correct": "EispKqtw5B_7_585f262a",
          "incorrect": [
            "EispKqtw5B_7_table_table3",
            "EispKqtw5B_9_table_table5",
            "EispKqtw5B_9_table_table6"
          ],
          "letters": ["C", "B", "D", "A"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"parameter count\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should match\"},\"evidence\":{\"source\":\"Table 1 and Table 2\",\"statement\":\"different\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"#Depth\",\"claim\":{\"source\":\"Table 2\",\"statement\":\"no value\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"24\"}}",
            "{\"letter\":\"B\",\"attribute\":\"Base model parameter count\",\"claim\":{\"source\":\"Table 1\",\"statement\":\"98M\"},\"evidence\":{\"source\":\"Table 2\",\"statement\":\"99M\"}}",
            "{\"letter\":\"C\",\"attribute\":\"#GFlops\",\"claim\":{\"source\":\"Table 2\",\"statement\":\"different value\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"4.3\"}}"
          ],
          "letters": ["D", "A", "B", "C"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"parameter count\",\"target\":\"table_2\",\"other_involved\":\"table_1\",\"action\":\"modify\",\"edit_statement\":\"parameter count for ShuffleMamba-S\",\"reason\":\"different\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"#Depth\",\"target\":\"table_2\",\"other_involved\":\"table_1\",\"action\":\"add\",\"edit_statement\":\"#Depth for ShuffleMamba-S\",\"reason\":\"missing information\"}",
            "{\"letter\":\"B\",\"attribute\":\"parameters\",\"target\":\"table_2\",\"other_involved\":\"table_1\",\"action\":\"modify\",\"edit_statement\":\"parameter count\",\"reason\":\"different\"}",
            "{\"letter\":\"C\",\"attribute\":\"#GFlops\",\"target\":\"table_2\",\"other_involved\":\"table_1\",\"action\":\"modify\",\"edit_statement\":\"#GFlops for ShuffleMamba-S\",\"reason\":\"different\"}"
          ],
          "letters": ["D", "A", "B", "C"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 1", "Table 2"]
    }
  ],
  "EXaKfdsw04": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 3,
          "image_id": "EXaKfdsw04_3_71528e2b",
          "bbox": {
            "x": 0.16943467210965105,
            "y": 0.5673724732112364,
            "width": 0.6625441696113074,
            "height": 0.319672131147541
          }
        },
        {
          "type": "text",
          "page": 4,
          "content": "To address the numerous issues faced by the FULL-PROOF strategy, we innovatively propose STEP-\nPROOF. STEP-PROOF employs a step-by-step generation and verification strategy, offering better\nperformance and stability compared to FULL-PROOF. The workflow of STEP-PROOF is illustrated\nin the left of Figure 1.",
          "line": 185
        }
      ],
      "review_text": "Figure 1: The workflow of STEP-PROOF is illustrated in the left, but it should be in the right.",
      "category": "figure-text",
      "description": "The text states the step-proof strategy is on the left of Figure 1, but it is actually on the right of Figure 1",
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {
          "question": "To address the numerous issues faced by the FULL-PROOF strategy, we innovatively propose STEP-\nPROOF. STEP-PROOF employs a step-by-step generation and verification strategy, offering better\nperformance and stability compared to FULL-PROOF. The workflow of STEP-PROOF is illustrated\nin the left of Figure 1.",
          "correct": "EXaKfdsw04_3_71528e2b",
          "incorrect": [
            "EXaKfdsw04_4_image_figure2",
            "EXaKfdsw04_7_image_figure4",
            "EXaKfdsw04_7_image_figure3"
          ],
          "letters": ["B", "A", "D", "C"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"placement\",\"claim\":{\"source\":\"text\",\"statement\":\"left of Figure 1\"},\"evidence\":{\"source\":\"Figure 1\",\"statement\":\"right side\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"comparative data\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should have comparative data\"},\"evidence\":{\"source\":\"Figure 1\",\"statement\":\"lacks comparative data\"}}",
            "{\"letter\":\"D\",\"attribute\":\"processing\",\"claim\":{\"source\":\"expectation\",\"statement\":\"step-by-step generation\"},\"evidence\":{\"source\":\"Figure 1\",\"statement\":\"merging all formal steps\"}}",
            "{\"letter\":\"C\",\"attribute\":\"results\",\"claim\":{\"source\":\"text\",\"statement\":\"detailed feedback\"},\"evidence\":{\"source\":\"Figure 1\",\"statement\":\"succeed or failed\"}}"
          ],
          "letters": ["A", "B", "D", "C"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"workflow illustration\",\"target\":\"text\",\"other_involved\":\"figure_1\",\"action\":\"modify\",\"edit_statement\":\"workflow location\",\"reason\":\"contradiction\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"performance claims\",\"target\":\"figure_1\",\"other_involved\":\"text\",\"action\":\"add\",\"edit_statement\":\"comparative data\",\"reason\":\"missing\"}",
            "{\"letter\":\"D\",\"attribute\":\"Step-Proof Strategy processing\",\"target\":\"figure_1\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"step-by-step generation\",\"reason\":\"contradiction\"}",
            "{\"letter\":\"C\",\"attribute\":\"Full-Proof Strategy results\",\"target\":\"figure_1\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"feedback complexity\",\"reason\":\"contradiction\"}"
          ],
          "letters": ["A", "B", "D", "C"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 1"]
    }
  ],
  "ERBm5WK8nq": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 9,
          "image_id": "ERBm5WK8nq_9_fba91ad8",
          "bbox": {
            "x": 0.49452301133226584,
            "y": 0.5061020147605021,
            "width": 0.33922261484098937,
            "height": 0.22404371584699456
          }
        }
      ],
      "review_text": "Minor: Table 5, in caption the inference and training speed is in (s) but in the table it is marked as (ms).",
      "category": "figure-caption",
      "description": "In the caption, the inference speed is denoted in seconds, but in the Table in milliseconds",
      "mcq": {
        "binary_consistent": {
          "question": "Is the caption of the figure consistent with the content of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the caption of the figure inconsistent with the content of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"unit\",\"claim\":{\"source\":\"caption\",\"statement\":\"seconds\"},\"evidence\":{\"source\":\"table\",\"statement\":\"milliseconds\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"unit\",\"claim\":{\"source\":\"caption\",\"statement\":\"milliseconds\"},\"evidence\":{\"source\":\"table\",\"statement\":\"seconds\"}}",
            "{\"letter\":\"D\",\"attribute\":\"unit\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be defined\"},\"evidence\":{\"source\":\"table\",\"statement\":\"not defined\"}}",
            "{\"letter\":\"B\",\"attribute\":\"value\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be larger\"},\"evidence\":{\"source\":\"table\",\"statement\":\"too small\"}}"
          ],
          "letters": ["C", "A", "D", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"inference speed units\",\"target\":\"table_5_caption\",\"other_involved\":\"table_5_column_inference\",\"action\":\"modify\",\"edit_statement\":\"update unit\",\"reason\":\"contradicts\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"training speed units\",\"target\":\"table_5_column_training\",\"other_involved\":\"table_5_caption\",\"action\":\"modify\",\"edit_statement\":\"update unit\",\"reason\":\"contradicts\"}",
            "{\"letter\":\"D\",\"attribute\":\"parameters units\",\"target\":\"table_5_column_parameters\",\"other_involved\":\"table_5_caption\",\"action\":\"add\",\"edit_statement\":\"define M\",\"reason\":\"unclear\"}",
            "{\"letter\":\"B\",\"attribute\":\"inference speed values\",\"target\":\"table_5_column_inference\",\"other_involved\":\"DLinear, MoLE\",\"action\":\"modify\",\"edit_statement\":\"verify values\",\"reason\":\"implausible\"}"
          ],
          "letters": ["C", "A", "D", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 5"]
    }
  ],
  "EIXZXPz7jU": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 16,
          "image_id": "EIXZXPz7jU_16_044ea11f",
          "bbox": {
            "x": 0.16236753430046377,
            "y": 0.5714025445323173,
            "width": 0.6784452296819787,
            "height": 0.23224043715846995
          }
        }
      ],
      "review_text": "Figure 16: Color inconsistency in the plots is mentioned, indicating a visual inconsistency within the paper.",
      "category": "figure-only",
      "description": "The line colors for DAS PINN and FMS PINN is swapped between (a) and (b)",
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the figure that is consistent with a different part of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the figure that is inconsistent with a different part of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"legend\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"figure_16\",\"statement\":\"inconsistent between subplots\"}}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"performance comparison\",\"claim\":{\"source\":\"plot\",\"statement\":\"DAS PINN performs better\"},\"evidence\":{\"source\":\"expectation\",\"statement\":\"inconsistent performance\"}}",
            "{\"letter\":\"B\",\"attribute\":\"y-axis range\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be the same\"},\"evidence\":{\"source\":\"figure_16\",\"statement\":\"different between subplots\"}}",
            "{\"letter\":\"D\",\"attribute\":\"x-axis label\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be present\"},\"evidence\":{\"source\":\"figure_16\",\"statement\":\"missing in subplot b\"}}"
          ],
          "letters": ["A", "C", "B", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"lines\",\"target\":\"figure_16a\",\"other_involved\":\"figure_16b\",\"action\":\"modify\",\"edit_statement\":\"represent networks\",\"reason\":\"swapped\"}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"DAS PINN performance\",\"target\":\"figure_16b\",\"other_involved\":\"figure_16a\",\"action\":\"modify\",\"edit_statement\":\"align performance\",\"reason\":\"not aligned\"}",
            "{\"letter\":\"B\",\"attribute\":\"MSE range\",\"target\":\"figure_16a\",\"other_involved\":\"figure_16b\",\"action\":\"modify\",\"edit_statement\":\"align range\",\"reason\":\"not aligned\"}",
            "{\"letter\":\"D\",\"attribute\":\"x-axis label\",\"target\":\"figure_16b\",\"other_involved\":null,\"action\":\"add\",\"edit_statement\":\"add label\",\"reason\":\"missing\"}"
          ],
          "letters": ["A", "C", "B", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 16"]
    }
  ],
  "E0UsEIRBQ8": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 7,
          "image_id": "E0UsEIRBQ8_7_b2ecee59",
          "bbox": {
            "x": 0.16943467210965105,
            "y": 0.09640252785604508,
            "width": 0.6643109540636042,
            "height": 0.27732240437158473
          }
        }
      ],
      "review_text": "Table 1: The mAP with only color transform is not shown, which contradicts the mention of an ablation study.",
      "category": "table-only",
      "description": "The red and blue highlighted numbers are not the two best results in most cases",
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the table that is consistent with a different part of the table?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the table that is inconsistent with a different part of the table?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"Scallop column\",\"claim\":{\"source\":\"expectation\",\"statement\":\"highlight best\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"higher value not highlighted\"}}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"mAP column\",\"claim\":{\"source\":\"caption\",\"statement\":\"red is second best\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"oracle is not second best\"}}",
            "{\"letter\":\"D\",\"attribute\":\"Holothurian column\",\"claim\":{\"source\":\"caption\",\"statement\":\"red is best\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"red is not best\"}}",
            "{\"letter\":\"A\",\"attribute\":\"mAP50 column\",\"claim\":{\"source\":\"expectation\",\"statement\":\"highlight second best\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"does not highlight second best\"}}"
          ],
          "letters": ["B", "C", "D", "A"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"highlighting\",\"target\":\"table_1\",\"other_involved\":\"Scallop column, Consistent-Teacher, Unbiased-Teacherv2\",\"action\":\"modify\",\"edit_statement\":\"highlight values\",\"reason\":\"incorrect\"}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"highlighting\",\"target\":\"table_1\",\"other_involved\":\"mAP column, oracle method\",\"action\":\"modify\",\"edit_statement\":\"highlight colors\",\"reason\":\"reversed\"}",
            "{\"letter\":\"D\",\"attribute\":\"highlighting\",\"target\":\"table_1\",\"other_involved\":\"Holothurian column\",\"action\":\"modify\",\"edit_statement\":\"highlight order\",\"reason\":\"reversed\"}",
            "{\"letter\":\"A\",\"attribute\":\"highlighting\",\"target\":\"table_1\",\"other_involved\":\"mAP50 scores\",\"action\":\"add\",\"edit_statement\":\"second-best highlight\",\"reason\":\"missing\"}"
          ],
          "letters": ["B", "C", "D", "A"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 1"]
    }
  ],
  "Do3whenqeY": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 2,
          "image_id": "Do3whenqeY_2_76b84041",
          "bbox": {
            "x": 0.16766788765735421,
            "y": 0.07994079589843751,
            "width": 0.6696113074204946,
            "height": 0.3265027322404372
          }
        }
      ],
      "review_text": "Figure 1: The correspondence between 1-10 and 'satisfied' and 'dissatisfied' on the left side seems to be reversed after data conversion.",
      "category": "figure-only",
      "description": "In the original question section, 1 means completely dissatisfied and 10 completely satisfied, but in the converted statement section, 10 means dissatisfied and 1 satisfied",
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the figure that is consistent with a different part of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the figure that is inconsistent with a different part of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"labels\",\"claim\":{\"source\":\"Original Question\",\"statement\":\"1 is Completely Dissatisfied\"},\"evidence\":{\"source\":\"Converted Statements\",\"statement\":\"1 is very satisfied\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"scale\",\"claim\":{\"source\":\"expectation\",\"statement\":\"consistent scale\"},\"evidence\":{\"source\":\"figure_1\",\"statement\":\"different scale\"}}",
            "{\"letter\":\"D\",\"attribute\":\"data type\",\"claim\":{\"source\":\"expectation\",\"statement\":\"consistent data type\"},\"evidence\":{\"source\":\"Converted Statements\",\"statement\":\"different than Original Questions\"}}",
            "{\"letter\":\"B\",\"attribute\":\"accuracy\",\"claim\":{\"source\":\"expectation\",\"statement\":\"consistent accuracy\"},\"evidence\":{\"source\":\"LMs' Predictions\",\"statement\":\"different accuracy\"}}"
          ],
          "letters": ["C", "A", "D", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"definitions\",\"target\":\"original_question\",\"other_involved\":\"converted_statements\",\"action\":\"modify\",\"edit_statement\":\"align scale meanings\",\"reason\":\"different\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"scale\",\"target\":\"original_question\",\"other_involved\":\"converted_statements\",\"action\":\"modify\",\"edit_statement\":\"use same scale\",\"reason\":\"different\"}",
            "{\"letter\":\"D\",\"attribute\":\"questions\",\"target\":\"original_question\",\"other_involved\":\"converted_statements\",\"action\":\"modify\",\"edit_statement\":\"use same type\",\"reason\":\"different\"}",
            "{\"letter\":\"B\",\"attribute\":\"percentages\",\"target\":\"lm_predictions\",\"other_involved\":\"lm_predictions\",\"action\":\"modify\",\"edit_statement\":\"align percentages\",\"reason\":\"different\"}"
          ],
          "letters": ["C", "A", "D", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 1"]
    }
  ],
  "DXaUC7lBq1": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 9,
          "image_id": "DXaUC7lBq1_9_d2394628",
          "bbox": {
            "x": 0.16943467210965105,
            "y": 0.09845170818391395,
            "width": 0.6590106007067138,
            "height": 0.273224043715847
          }
        },
        {
          "type": "image",
          "page": 10,
          "image_id": "DXaUC7lBq1_10_0a49712b",
          "bbox": {
            "x": 0.17650180991883832,
            "y": 0.2740665289873634,
            "width": 0.6448763250883391,
            "height": 0.2336065573770492
          }
        }
      ],
      "review_text": "Table 5: Narcissism is rated as 4.3 for Gemma-2B-Instruct base, but is rated as 4.3 for Gemma2-9B-Instruct base in Tables 1 to 4.",
      "category": "table-table",
      "description": "The base values for the subscales are swapped between Gemma-2-9B-Instruct and Gemma-2B-Instruct in Table 4 and Table 5",
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the first table consistent with the content of the second table?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the first table inconsistent with the content of the second table?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {
          "question": "DXaUC7lBq1_9_d2394628",
          "correct": "DXaUC7lBq1_10_0a49712b",
          "incorrect": [
            "DXaUC7lBq1_8_table_table4",
            "DXaUC7lBq1_7_table_table3",
            "DXaUC7lBq1_7_table_table2"
          ],
          "letters": ["B", "A", "D", "C"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"Base values\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should not be swapped\"},\"evidence\":{\"source\":\"Table 4 and Table 5\",\"statement\":\"swapped\"}}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"Base values\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"Table 4 and Table 5\",\"statement\":\"inconsistent for one model\"}}",
            "{\"letter\":\"A\",\"attribute\":\"Base values\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"Table 4 and Table 5\",\"statement\":\"different\"}}",
            "{\"letter\":\"B\",\"attribute\":\"Base column\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be present\"},\"evidence\":{\"source\":\"Table 5\",\"statement\":\"missing\"}}"
          ],
          "letters": ["C", "D", "A", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"Base values\",\"target\":\"table_4\",\"other_involved\":\"table_5\",\"action\":\"modify\",\"edit_statement\":\"align values\",\"reason\":\"identical\"}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"Base values\",\"target\":\"table_4\",\"other_involved\":\"table_5\",\"action\":\"modify\",\"edit_statement\":\"align values\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"A\",\"attribute\":\"Base values\",\"target\":\"table_4\",\"other_involved\":\"table_5\",\"action\":\"modify\",\"edit_statement\":\"align values\",\"reason\":\"different\"}",
            "{\"letter\":\"B\",\"attribute\":\"Base column\",\"target\":\"table_5\",\"other_involved\":\"table_4\",\"action\":\"add\",\"edit_statement\":\"add missing\",\"reason\":\"incomplete\"}"
          ],
          "letters": ["C", "D", "A", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 4", "Table 5"]
    }
  ],
  "DWISGL63PC": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 10,
          "image_id": "DWISGL63PC_10_12861547",
          "bbox": {
            "x": 0.16943467210965105,
            "y": 0.1539162390870475,
            "width": 0.666077738515901,
            "height": 0.2363387978142077
          }
        }
      ],
      "review_text": "W.1.2 Correctness of Results on OpenScene Dataset: The authors report that PDM-closed has 0h training time but it is reported with 62h training time in Figure 6 b).",
      "category": "figure-only",
      "description": "There is a training time of 62h for PDM-closed, but PDM-closed is rule based and does not have a training phase",
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the figure that is consistent with a different part of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the figure that is inconsistent with a different part of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"training time\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should not require training\"},\"evidence\":{\"source\":\"figure_6(b)\",\"statement\":\"shows training time\"}}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"FPS\",\"claim\":{\"source\":\"expectation\",\"statement\":\"proportional\"},\"evidence\":{\"source\":\"figure_6(b)\",\"statement\":\"disproportionately low\"}}",
            "{\"letter\":\"B\",\"attribute\":\"grouping\",\"claim\":{\"source\":\"expectation\",\"statement\":\"grouped with 6(a)\"},\"evidence\":{\"source\":\"figure_6(b)\",\"statement\":\"grouped with 6(b) methods\"}}",
            "{\"letter\":\"D\",\"attribute\":\"training time\",\"claim\":{\"source\":\"expectation\",\"statement\":\"lower\"},\"evidence\":{\"source\":\"figure_6(b)\",\"statement\":\"much higher than expected\"}}"
          ],
          "letters": ["A", "C", "B", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"training time\",\"target\":\"figure_6b\",\"other_involved\":\"pdm-closed\",\"action\":\"remove\",\"edit_statement\":\"62-hour training time\",\"reason\":\"rule-based\"}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"fps value\",\"target\":\"figure_6b\",\"other_involved\":\"pdm-closed, training time\",\"action\":\"modify\",\"edit_statement\":\"fps value\",\"reason\":\"disproportionate\"}",
            "{\"letter\":\"B\",\"attribute\":\"grouping\",\"target\":\"figure_6b\",\"other_involved\":\"pdm-closed, transfuser, he-drive, figure_6a\",\"action\":\"reposition\",\"edit_statement\":\"group pdm-closed\",\"reason\":\"misgrouped\"}",
            "{\"letter\":\"D\",\"attribute\":\"training time\",\"target\":\"figure_6b\",\"other_involved\":\"transfuser, he-drive, fps values\",\"action\":\"modify\",\"edit_statement\":\"training time values\",\"reason\":\"unexpected\"}"
          ],
          "letters": ["A", "C", "B", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 6"]
    }
  ],
  "DLhjxxXYwH": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 5,
          "image_id": "DLhjxxXYwH_5_98c2f492",
          "bbox": {
            "x": 0.2913427993181316,
            "y": 0.21853370874957312,
            "width": 0.5459363957597173,
            "height": 0.05191256830601093
          }
        },
        {
          "type": "text",
          "page": 5,
          "content": "Emergence in this context is inherently multiscale. It involves interactions across different scales\nof the network, where G represents one scale and H represents a higher scale. Emergence appears\nonly when viewed from this multiscale perspective, as it captures the complexity arising from the\nnetwork\u2019s hierarchical structure. In our graph-theoretical framework, the emergence value E of a\nneural network is defined based on the number of paths from nodes at scale G to nodes at scale H.\nThis definition captures the essence of multiscale interactions within the network. The more paths\nthat exist between these scales, the greater the degree of emergence.\n\u2022 G is the set of nodes at the lower scale,\n\u2022 H is the set of nodes at the higher scale.",
          "line": 230
        }
      ],
      "review_text": "2. The contents after eqn (3), say, L231-241, are inconsistent with this eqn. In this eqn, H belongs to G, but in L231-241, G and H denote two different sets of nodes. So, I cannot understand these contents.",
      "category": "equation-text",
      "description": "In the equation (3), H is part of G, but the text describes it as two different set of nodes",
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the equation consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the equation inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {
          "question": "Emergence in this context is inherently multiscale. It involves interactions across different scales\nof the network, where G represents one scale and H represents a higher scale. Emergence appears\nonly when viewed from this multiscale perspective, as it captures the complexity arising from the\nnetwork\u2019s hierarchical structure. In our graph-theoretical framework, the emergence value E of a\nneural network is defined based on the number of paths from nodes at scale G to nodes at scale H.\nThis definition captures the essence of multiscale interactions within the network. The more paths\nthat exist between these scales, the greater the degree of emergence.\n\u2022 G is the set of nodes at the lower scale,\n\u2022 H is the set of nodes at the higher scale.",
          "correct": "DLhjxxXYwH_5_98c2f492",
          "incorrect": [
            "DLhjxxXYwH_6_interline-equation_equation7.5",
            "DLhjxxXYwH_4_interline-equation_equation5",
            "DLhjxxXYwH_6_interline-equation_equation17"
          ],
          "letters": ["C", "D", "B", "A"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"set relation\",\"claim\":{\"source\":\"text\",\"statement\":\"G and H distinct\"},\"evidence\":{\"source\":\"equation (3)\",\"statement\":\"H subset of G\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"path origin\",\"claim\":{\"source\":\"text\",\"statement\":\"nodes at scale G to H\"},\"evidence\":{\"source\":\"equation\",\"statement\":\"N_H(x) to H\"}}",
            "{\"letter\":\"B\",\"attribute\":\"scale description\",\"claim\":{\"source\":\"text\",\"statement\":\"describes G and H\"},\"evidence\":{\"source\":\"equation (3)\",\"statement\":\"G and H not used in equation\"}}",
            "{\"letter\":\"D\",\"attribute\":\"emergence property\",\"claim\":{\"source\":\"text\",\"statement\":\"hierarchical structure\"},\"evidence\":{\"source\":\"equation (3)\",\"statement\":\"sum over individual nodes\"}}"
          ],
          "letters": ["C", "A", "B", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"set relationship\",\"target\":\"equation_3\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align node relationship\",\"reason\":\"contradictory\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"origin of paths\",\"target\":\"equation_3\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align path origin\",\"reason\":\"different\"}",
            "{\"letter\":\"B\",\"attribute\":\"scale terms\",\"target\":\"equation_3\",\"other_involved\":\"text\",\"action\":\"add\",\"edit_statement\":\"include scale terms\",\"reason\":\"not present\"}",
            "{\"letter\":\"D\",\"attribute\":\"emergence calculation\",\"target\":\"equation_3\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align emergence definition\",\"reason\":\"different\"}"
          ],
          "letters": ["C", "A", "B", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["(3)"]
    }
  ],
  "CscKx97jBi": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 6,
          "image_id": "CscKx97jBi_6_1cc47eb2",
          "bbox": {
            "x": 0.17296824101424468,
            "y": 0.4874544404243511,
            "width": 0.6519434628975265,
            "height": 0.4234972677595629
          }
        },
        {
          "type": "text",
          "page": 6,
          "content": "HumanEval dataset: The HumanEval dataset provides 164 comment descriptions of functions\npaired with a canonical implementation of each function and several input\u2013output pairs that the\nfunction should pass. We follow the same evaluation method as the MBPP dataset.\nEvaluation Metrics We use Pass@k as our evaluation metrics which is the same as previous works\n(Zhou et al. (2023)Wang et al. (2023)Shinn et al. (2023))",
          "line": 282
        }
      ],
      "review_text": "Table 1: The Pass@1 accuracy of the proposed method with GPT-4 is stated to be 97.2%, which is 0.9% higher than AgentCoder's 96.3%. However, the reviewer calculated that 159/164 equals 96.95%, and 160/164 equals 97.6%, neither of which rounds to 97.2%.",
      "category": "table-text",
      "description": "The table shows 97.2% for the proposed method using GPT-4 is not possible, as 159/164 equals 96.95%, and 160/164 equals 97.6%, both do not round to 97.2%",
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the table consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the table inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {
          "question": "HumanEval dataset: The HumanEval dataset provides 164 comment descriptions of functions\npaired with a canonical implementation of each function and several input\u2013output pairs that the\nfunction should pass. We follow the same evaluation method as the MBPP dataset.\nEvaluation Metrics We use Pass@k as our evaluation metrics which is the same as previous works\n(Zhou et al. (2023)Wang et al. (2023)Shinn et al. (2023))",
          "correct": "CscKx97jBi_6_1cc47eb2",
          "incorrect": [
            "CscKx97jBi_6_table_table2",
            "CscKx97jBi_6_image_figure3",
            "CscKx97jBi_7_image_figure4"
          ],
          "letters": ["C", "B", "A", "D"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"score\",\"claim\":{\"source\":\"expectation\",\"statement\":\"integer correct\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"97.2%\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"highlighting\",\"claim\":{\"source\":\"expectation\",\"statement\":\"top performer\"},\"evidence\":{\"source\":\"explanation\",\"statement\":\"not top performer\"}}",
            "{\"letter\":\"A\",\"attribute\":\"number of problems\",\"claim\":{\"source\":\"caption\",\"statement\":\"different number\"},\"evidence\":{\"source\":\"description\",\"statement\":\"164 problems\"}}",
            "{\"letter\":\"C\",\"attribute\":\"score difference\",\"claim\":{\"source\":\"expectation\",\"statement\":\"consistent metrics\"},\"evidence\":{\"source\":\"scores\",\"statement\":\"different scores\"}}"
          ],
          "letters": ["D", "B", "A", "C"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"HumanEval score percentage\",\"target\":\"Table_1\",\"other_involved\":\"164 problems\",\"action\":\"modify\",\"edit_statement\":\"correct value\",\"reason\":\"impossible\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"bold score\",\"target\":\"Table_1\",\"other_involved\":\"explanation in text\",\"action\":\"remove\",\"edit_statement\":\"bolding\",\"reason\":\"misrepresentation\"}",
            "{\"letter\":\"A\",\"attribute\":\"number of problems\",\"target\":\"Table_1 caption\",\"other_involved\":\"HumanEval description\",\"action\":\"modify\",\"edit_statement\":\"align count\",\"reason\":\"different\"}",
            "{\"letter\":\"C\",\"attribute\":\"HumanEval score and MBPP score\",\"target\":\"Table_1\",\"other_involved\":\"Ours with GPT-4\",\"action\":\"modify\",\"edit_statement\":\"clarify evaluation\",\"reason\":\"inconsistent\"}"
          ],
          "letters": ["D", "B", "A", "C"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 1"]
    }
  ],
  "CfXRcN4iUw": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 8,
          "image_id": "CfXRcN4iUw_8_da9a98cb",
          "bbox": {
            "x": 0.16766788765735421,
            "y": 0.10268673089032618,
            "width": 0.6625441696113074,
            "height": 0.18989071038251368
          }
        },
        {
          "type": "image",
          "page": 8,
          "image_id": "CfXRcN4iUw_8_db0680b4",
          "bbox": {
            "x": 0.17296824101424468,
            "y": 0.3261839652973446,
            "width": 0.6590106007067138,
            "height": 0.31010928961748635
          }
        }
      ],
      "review_text": "Figure 2(c): The best accuracy of the IGNN-Solver is around 0.716, while the accuracy given in Table 1 is 0.725.",
      "category": "figure-table",
      "description": "The best accuracy of IGNN-Solver for ogbn-arxiv in the Table 1 does not match the accuracy shown in Figure 2(c)",
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the content of the table?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the content of the table?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "CfXRcN4iUw_8_da9a98cb",
          "correct": "CfXRcN4iUw_8_db0680b4",
          "incorrect": [
            "CfXRcN4iUw_15_table_table2",
            "CfXRcN4iUw_16_table_table3",
            "CfXRcN4iUw_8_image_figure5"
          ],
          "letters": ["C", "B", "A", "D"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"peak accuracy\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"Figure 2(c) and Table 1\",\"statement\":\"inconsistent values\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"accuracy\",\"claim\":{\"source\":\"Figure 2(c)\",\"statement\":\"0.720\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"consistent with IGNN-Solver\"}}",
            "{\"letter\":\"C\",\"attribute\":\"numerical accuracy\",\"claim\":{\"source\":\"Table 1\",\"statement\":\"reported\"},\"evidence\":{\"source\":\"Figure 2(c)\",\"statement\":\"not extractable\"}}",
            "{\"letter\":\"D\",\"attribute\":\"performance\",\"claim\":{\"source\":\"Table 1\",\"statement\":\"superior for IGNN-Solver\"},\"evidence\":{\"source\":\"Figure 2(c)\",\"statement\":\"slowest to peak accuracy\"}}"
          ],
          "letters": ["A", "B", "C", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"peak accuracy\",\"target\":\"figure_2c\",\"other_involved\":\"table_1\",\"action\":\"modify\",\"edit_statement\":\"update accuracy\",\"reason\":\"lower\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"peak accuracy\",\"target\":\"figure_2c\",\"other_involved\":\"table_1\",\"action\":\"modify\",\"edit_statement\":\"update accuracy\",\"reason\":\"higher\"}",
            "{\"letter\":\"C\",\"attribute\":\"accuracy\",\"target\":\"figure_2c\",\"other_involved\":\"table_1\",\"action\":\"add\",\"edit_statement\":\"numerical value\",\"reason\":\"not shown\"}",
            "{\"letter\":\"D\",\"attribute\":\"speed performance of models\",\"target\":\"figure_2c\",\"other_involved\":\"table_1\",\"action\":\"modify\",\"edit_statement\":\"align implications\",\"reason\":\"discrepancy\"}"
          ],
          "letters": ["A", "B", "C", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 2", "Table 1"]
    }
  ],
  "CKx7eOYFG8": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 3,
          "image_id": "CKx7eOYFG8_3_165f72f1",
          "bbox": {
            "x": 0.16943467210965105,
            "y": 0.5070583155897798,
            "width": 0.666077738515901,
            "height": 0.3415300546448088
          }
        },
        {
          "type": "text",
          "page": 6,
          "content": "In contrast, our method optimizes all three models jointly on the current task data",
          "line": 320
        }
      ],
      "review_text": "Line 320-321: The text states that the method optimizes all three models jointly on the current task data, but Figure 1 shows that only the batch normalization layers of the teacher model are updated, which might be confusing to the reader.",
      "category": "figure-text",
      "description": "The text states all three models are optimized jointly, but the training model in the Figure 1 shows frozen weights, meaning it does not get optimized.",
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "In contrast, our method optimizes all three models jointly on the current task data",
          "correct": "CKx7eOYFG8_3_165f72f1",
          "incorrect": [
            "CKx7eOYFG8_7_image_figure2",
            "CKx7eOYFG8_8_image_figure3",
            "CKx7eOYFG8_3_interline-equation_equation20"
          ],
          "letters": ["A", "C", "D", "B"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"optimization\",\"claim\":{\"source\":\"expectation\",\"statement\":\"parameters should be optimized\"},\"evidence\":{\"source\":\"Figure 1\",\"statement\":\"parameters frozen\"}}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"optimization\",\"claim\":{\"source\":\"expectation\",\"statement\":\"all models optimized jointly\"},\"evidence\":{\"source\":\"Figure 1\",\"statement\":\"Trailing Model not optimized\"}}",
            "{\"letter\":\"A\",\"attribute\":\"optimization\",\"claim\":{\"source\":\"text\",\"statement\":\"all three models optimized jointly\"},\"evidence\":{\"source\":\"diagram\",\"statement\":\"only two models optimized\"}}",
            "{\"letter\":\"C\",\"attribute\":\"distillation\",\"claim\":{\"source\":\"text\",\"statement\":\"symmetric distillation\"},\"evidence\":{\"source\":\"figure\",\"statement\":\"asymmetric distillation\"}}"
          ],
          "letters": ["B", "D", "A", "C"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"optimization status\",\"target\":\"figure_1\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"remove snowflake\",\"reason\":\"contradiction\"}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"parameter copying\",\"target\":\"figure_1\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"remove copying\",\"reason\":\"contradiction\"}",
            "{\"letter\":\"A\",\"attribute\":\"optimization description\",\"target\":\"text\",\"other_involved\":\"figure_1\",\"action\":\"modify\",\"edit_statement\":\"describe three models\",\"reason\":\"contradiction\"}",
            "{\"letter\":\"C\",\"attribute\":\"distillation\",\"target\":\"figure_1\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"show distillation\",\"reason\":\"missing info\"}"
          ],
          "letters": ["B", "D", "A", "C"]
        }
      },
      "severity": 1,
      "visual_elements": ["Figure 1"]
    }
  ],
  "CI9JMBAsPg": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 4,
          "image_id": "CI9JMBAsPg_4_b1f1609a",
          "bbox": {
            "x": 0.16766788765735421,
            "y": 0.38356101447767255,
            "width": 0.6837455830388691,
            "height": 0.18852459016393444
          }
        },
        {
          "type": "text",
          "page": 3,
          "content": "Stage 1: Data Preprocessing. Our primary focus is to improve the data quality and enhance\nthe compilation success rate of LATEX source code. Initially, we undertake an expansion of all files\nreferenced by the \\input and \\include commands, followed by a series of crucial pre-processing\nsteps. These steps encompass the integration of requisite environment packages, the exclusion of comment lines, and the removal of extraneous tokens such as \\vspace, \\ref, and other annotations that do not contribute to the semantic essence of the document",
          "line": 159
        }
      ],
      "review_text": "Table 2: The presence of \\\\ref commands in the table contradicts the statement in line 161 that these commands have been removed.",
      "category": "table-text",
      "description": "The text states that extraneous tokens such as \\ref are removed, but in Table 2, we can see \\ref included.",
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the table consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the table inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "Stage 1: Data Preprocessing. Our primary focus is to improve the data quality and enhance\nthe compilation success rate of LATEX source code. Initially, we undertake an expansion of all files\nreferenced by the \\input and \\include commands, followed by a series of crucial pre-processing\nsteps. These steps encompass the integration of requisite environment packages, the exclusion of comment lines, and the removal of extraneous tokens such as \\vspace, \\ref, and other annotations that do not contribute to the semantic essence of the document",
          "correct": "CI9JMBAsPg_4_b1f1609a",
          "incorrect": [
            "CI9JMBAsPg_6_table_table3",
            "CI9JMBAsPg_2_table_table1",
            "CI9JMBAsPg_7_table_table4"
          ],
          "letters": ["A", "B", "C", "D"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"tokens\",\"claim\":{\"source\":\"expectation\",\"statement\":\"remove ref tokens\"},\"evidence\":{\"source\":\"Table 2\",\"statement\":\"includes ref\"}}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"examples\",\"claim\":{\"source\":\"expectation\",\"statement\":\"show vspace removal\"},\"evidence\":{\"source\":\"Table 2\",\"statement\":\"no vspace removal example\"}}",
            "{\"letter\":\"A\",\"attribute\":\"examples\",\"claim\":{\"source\":\"expectation\",\"statement\":\"simplify document\"},\"evidence\":{\"source\":\"Table 2\",\"statement\":\"lists section examples\"}}",
            "{\"letter\":\"B\",\"attribute\":\"examples\",\"claim\":{\"source\":\"expectation\",\"statement\":\"show expanded content\"},\"evidence\":{\"source\":\"Table 2\",\"statement\":\"no expanded content\"}}"
          ],
          "letters": ["C", "D", "A", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"ref token\",\"target\":\"table_2\",\"other_involved\":\"text\",\"action\":\"remove\",\"edit_statement\":\"ref token in example\",\"reason\":\"contradicts text\"}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"examples\",\"target\":\"table_2\",\"other_involved\":\"text\",\"action\":\"add\",\"edit_statement\":\"vspace token removal examples\",\"reason\":\"none shown\"}",
            "{\"letter\":\"A\",\"attribute\":\"examples\",\"target\":\"table_2\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align examples\",\"reason\":\"contradicts goal\"}",
            "{\"letter\":\"B\",\"attribute\":\"examples\",\"target\":\"table_2\",\"other_involved\":\"text\",\"action\":\"add\",\"edit_statement\":\"expanded file content examples\",\"reason\":\"none shown\"}"
          ],
          "letters": ["C", "D", "A", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 2"]
    }
  ],
  "CH7Ba4RFa2": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 4,
          "image_id": "CH7Ba4RFa2_4_6618d824",
          "bbox": {
            "x": 0.5068905024983437,
            "y": 0.12577411255549864,
            "width": 0.3250883392226148,
            "height": 0.23497267759562843
          }
        },
        {
          "type": "text",
          "page": 4,
          "content": "Consequently, we utilize a U-Net architecture as the backbone of ULM. Since we do not employ anchors, we construct a lightweight Absolute Scale Module (ASM) based on MLP to introduce absolute scale information. The ULM consists of the U-Net and the ASM, as illustrated in Figure 3.",
          "line": 208
        }
      ],
      "review_text": "Line 210: The description of ASM is unclear. It is mentioned that ASM is shown in Figure 3, but no such reference to ASM can be found in the figure.",
      "category": "figure-text",
      "description": "The text talks about ASM and refers to Figure 3 to illustrate it, but ASM can't be found in the Figure 3.",
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "Consequently, we utilize a U-Net architecture as the backbone of ULM. Since we do not employ anchors, we construct a lightweight Absolute Scale Module (ASM) based on MLP to introduce absolute scale information. The ULM consists of the U-Net and the ASM, as illustrated in Figure 3.",
          "correct": "CH7Ba4RFa2_4_6618d824",
          "incorrect": [
            "CH7Ba4RFa2_1_image_figure2",
            "CH7Ba4RFa2_0_image_figure1",
            "CH7Ba4RFa2_4_interline-equation_equation10.5"
          ],
          "letters": ["C", "A", "D", "B"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"ASM\",\"claim\":{\"source\":\"text\",\"statement\":\"part of ULM\"},\"evidence\":{\"source\":\"Figure 3\",\"statement\":\"not labeled\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"ULM function\",\"claim\":{\"source\":\"expectation\",\"statement\":\"consistent description\"},\"evidence\":{\"source\":\"Figure 3\",\"statement\":\"generating maps\"}}",
            "{\"letter\":\"D\",\"attribute\":\"Pre-trained 2D Detector\",\"claim\":{\"source\":\"expectation\",\"statement\":\"related to process\"},\"evidence\":{\"source\":\"Figure 3\",\"statement\":\"unrelated to process\"}}",
            "{\"letter\":\"C\",\"attribute\":\"3D Lane process\",\"claim\":{\"source\":\"expectation\",\"statement\":\"consistent process\"},\"evidence\":{\"source\":\"Figure 3\",\"statement\":\"separate postprocessing\"}}"
          ],
          "letters": ["A", "B", "D", "C"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"Absolute Scale Module (ASM)\",\"target\":\"figure_3\",\"other_involved\":\"text\",\"action\":\"add\",\"edit_statement\":\"add module\",\"reason\":\"omitted\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"ULM output\",\"target\":\"figure_3\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"match description\",\"reason\":\"different\"}",
            "{\"letter\":\"D\",\"attribute\":\"Pre-trained 2D Detector\",\"target\":\"text\",\"other_involved\":\"figure_3\",\"action\":\"modify\",\"edit_statement\":\"match figure\",\"reason\":\"unrelated\"}",
            "{\"letter\":\"C\",\"attribute\":\"3D Lane Postprocessing\",\"target\":\"figure_3\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align process\",\"reason\":\"inconsistent\"}"
          ],
          "letters": ["A", "B", "D", "C"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 3"]
    }
  ],
  "CGT0T9uUOY": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 6,
          "image_id": "CGT0T9uUOY_6_99f4affa",
          "bbox": {
            "x": 0.16943467210965105,
            "y": 0.09858837023458845,
            "width": 0.6625441696113074,
            "height": 0.18579234972677597
          }
        }
      ],
      "review_text": "Figure 4: The authors claim that their method produces pseudo labels that are more view consistent, but the pillows on the sofas do not have consistent masks.",
      "category": "figure-caption",
      "description": "The caption claims more consistent view-consistent segmentation, but the pillows are not consistently segmented, showing no improvement over Panoptic Lifting.",
      "mcq": {
        "binary_consistent": {
          "question": "Is the caption of the figure consistent with the content of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the caption of the figure inconsistent with the content of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"view-consistency\",\"claim\":{\"source\":\"caption\",\"statement\":\"improved view-consistency\"},\"evidence\":{\"source\":\"figure_4\",\"statement\":\"no improvement\"}}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"color assignment\",\"claim\":{\"source\":\"expectation\",\"statement\":\"consistent color\"},\"evidence\":{\"source\":\"figure_4\",\"statement\":\"inconsistent color\"}}",
            "{\"letter\":\"A\",\"attribute\":\"caption\",\"claim\":{\"source\":\"caption\",\"statement\":\"improved view-consistency\"},\"evidence\":{\"source\":\"figure_4\",\"statement\":\"invalidate caption\"}}",
            "{\"letter\":\"B\",\"attribute\":\"2D predictions\",\"claim\":{\"source\":\"expectation\",\"statement\":\"detect pillows correctly\"},\"evidence\":{\"source\":\"figure_4\",\"statement\":\"detects pillows as wall\"}}"
          ],
          "letters": ["C", "D", "A", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"segmentation\",\"target\":\"figure_4\",\"other_involved\":\"caption\",\"action\":\"modify\",\"edit_statement\":\"align consistency\",\"reason\":\"inconsistent\"}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"color assignments\",\"target\":\"figure_4\",\"other_involved\":\"tracking\",\"action\":\"modify\",\"edit_statement\":\"harmonize across views\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"A\",\"attribute\":\"view-consistency\",\"target\":\"caption\",\"other_involved\":\"figure_4\",\"action\":\"modify\",\"edit_statement\":\"update claim\",\"reason\":\"incorrect\"}",
            "{\"letter\":\"B\",\"attribute\":\"2D predictions\",\"target\":\"figure_4\",\"other_involved\":\"SAM\",\"action\":\"modify\",\"edit_statement\":\"correct detection\",\"reason\":\"incorrect\"}"
          ],
          "letters": ["C", "D", "A", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 4"]
    }
  ],
  "CB2r9PwuRQ": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 9,
          "image_id": "CB2r9PwuRQ_9_a85cde27",
          "bbox": {
            "x": 0.4309187710495803,
            "y": 0.4552140366184256,
            "width": 0.39752650176678445,
            "height": 0.1598360655737705
          }
        },
        {
          "type": "text",
          "page": 9,
          "content": "Second, deleting the CEB causal module led to a significant drop in performance.",
          "line": 464
        }
      ],
      "review_text": "Table 1: The performance is shown to be unsatisfying, which contradicts the statement in line 465-line 468 that R-L rises after deleting the CEB causal module.",
      "category": "table-text",
      "description": "The text states that deleting the CEB causal module leads to a significant drop in performance, but Table 3 shows that R-L actually goes up.",
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the table consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the table inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "Second, deleting the CEB causal module led to a significant drop in performance.",
          "correct": "CB2r9PwuRQ_9_a85cde27",
          "incorrect": [
            "CB2r9PwuRQ_9_table_table4",
            "CB2r9PwuRQ_8_table_table2",
            "CB2r9PwuRQ_7_table_table1"
          ],
          "letters": ["C", "B", "D", "A"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"performance drop\",\"claim\":{\"source\":\"text\",\"statement\":\"significant drop\"},\"evidence\":{\"source\":\"Table 3\",\"statement\":\"R-L increases\"}}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"performance drop\",\"claim\":{\"source\":\"expectation\",\"statement\":\"significant drop\"},\"evidence\":{\"source\":\"Table 3\",\"statement\":\"decreases in PPL and B-1\"}}",
            "{\"letter\":\"A\",\"attribute\":\"performance drop\",\"claim\":{\"source\":\"text\",\"statement\":\"performance drop\"},\"evidence\":{\"source\":\"Table 3\",\"statement\":\"overall improvement\"}}",
            "{\"letter\":\"D\",\"attribute\":\"module importance\",\"claim\":{\"source\":\"text\",\"statement\":\"module importance\"},\"evidence\":{\"source\":\"Table 3\",\"statement\":\"no ablation data\"}}"
          ],
          "letters": ["B", "C", "A", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"performance drop\",\"target\":\"text\",\"other_involved\":\"table_3\",\"action\":\"modify\",\"edit_statement\":\"align with table\",\"reason\":\"contradiction\"}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"performance drop\",\"target\":\"text\",\"other_involved\":\"table_3\",\"action\":\"modify\",\"edit_statement\":\"align with table\",\"reason\":\"contradiction\"}",
            "{\"letter\":\"A\",\"attribute\":\"performance drop\",\"target\":\"text\",\"other_involved\":\"table_3\",\"action\":\"modify\",\"edit_statement\":\"align with table\",\"reason\":\"contradiction\"}",
            "{\"letter\":\"D\",\"attribute\":\"ablation data\",\"target\":\"table_3\",\"other_involved\":\"text\",\"action\":\"add\",\"edit_statement\":\"add ablation data\",\"reason\":\"missing\"}"
          ],
          "letters": ["B", "C", "A", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 3"]
    }
  ],
  "C9pndmSjg6": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 8,
          "image_id": "C9pndmSjg6_8_850f5fdf",
          "bbox": {
            "x": 0.17296824101424468,
            "y": 0.19469489425909325,
            "width": 0.6607773851590105,
            "height": 0.26639344262295084
          }
        },
        {
          "type": "text",
          "page": 8,
          "content": "Then Table 1 reflects the percentage error from point to the EF line in the figures. In the table, Linear, Dual, and Diag models as relaxation models cannot always have a good effect on solution generation. But our approach based on these exact solutions, and with heuristics, gains a good effect. At the same time, we introduce more heuristics like Genetic Algorithm (GA), Tabu search(SA), and Simulated Annealing (SA) to compare and verify our approach. Compared with CPLEX optimal solutions and others heuristics (Woodside-Oriakhi et al., 2011), our approach gains a close but mostly a better effect. To be more specific, mostly we reach a mean percentage errors, implying that our method has fewer outliners and thus has a more stable results.",
          "line": 403
        }
      ],
      "review_text": "Table 1: The results of a heuristic approach are shown to be better than those obtained with an exact approach, which is not possible if the model reflects the evaluation criterion.",
      "category": "table-text",
      "description": "The table shows results for heuristic approaches to perform better than exact solutions, which is not possible.",
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the table consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the table inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "Then Table 1 reflects the percentage error from point to the EF line in the figures. In the table, Linear, Dual, and Diag models as relaxation models cannot always have a good effect on solution generation. But our approach based on these exact solutions, and with heuristics, gains a good effect. At the same time, we introduce more heuristics like Genetic Algorithm (GA), Tabu search(SA), and Simulated Annealing (SA) to compare and verify our approach. Compared with CPLEX optimal solutions and others heuristics (Woodside-Oriakhi et al., 2011), our approach gains a close but mostly a better effect. To be more specific, mostly we reach a mean percentage errors, implying that our method has fewer outliners and thus has a more stable results.",
          "correct": "C9pndmSjg6_8_850f5fdf",
          "incorrect": [
            "C9pndmSjg6_8_table_table3",
            "C9pndmSjg6_8_table_table2",
            "C9pndmSjg6_7_interline-equation_equation29.5"
          ],
          "letters": ["A", "B", "C", "D"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"optimality\",\"claim\":{\"source\":\"expectation\",\"statement\":\"CPLEX is optimal\"},\"evidence\":{\"source\":\"table_1\",\"statement\":\"heuristics outperform CPLEX\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"percentage errors\",\"claim\":{\"source\":\"text\",\"statement\":\"better effect than CPLEX\"},\"evidence\":{\"source\":\"table\",\"statement\":\"CPLEX's errors lower than ours\"}}",
            "{\"letter\":\"D\",\"attribute\":\"bolding\",\"claim\":{\"source\":\"expectation\",\"statement\":\"CPLEX should be bolded\"},\"evidence\":{\"source\":\"table_1\",\"statement\":\"CPLEX not always bolded\"}}",
            "{\"letter\":\"C\",\"attribute\":\"robustness\",\"claim\":{\"source\":\"expectation\",\"statement\":\"multiple measurements needed\"},\"evidence\":{\"source\":\"table_1\",\"statement\":\"one measurement per method\"}}"
          ],
          "letters": ["B", "A", "D", "C"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"performance\",\"target\":\"Table 1\",\"other_involved\":\"CPLEX, heuristic approaches\",\"action\":\"modify\",\"edit_statement\":\"align CPLEX optimality\",\"reason\":\"contradictory values\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"percentage error\",\"target\":\"text\",\"other_involved\":\"Table 1\",\"action\":\"modify\",\"edit_statement\":\"align superiority claim\",\"reason\":\"contradictory values\"}",
            "{\"letter\":\"D\",\"attribute\":\"bolding\",\"target\":\"Table 1\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"align bolding accuracy\",\"reason\":\"inconsistent highlighting\"}",
            "{\"letter\":\"C\",\"attribute\":\"measurement points\",\"target\":\"methods section\",\"other_involved\":\"Table 1\",\"action\":\"add\",\"edit_statement\":\"add multiple measurements\",\"reason\":\"limited assessment\"}"
          ],
          "letters": ["B", "A", "D", "C"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 1"]
    }
  ],
  "C9BA0T3xhq": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 7,
          "image_id": "C9BA0T3xhq_7_bdd9b0db",
          "bbox": {
            "x": 0.16943467210965105,
            "y": 0.6658015016649591,
            "width": 0.6625441696113074,
            "height": 0.2418032786885246
          }
        },
        {
          "type": "text",
          "page": 7,
          "content": "The \u201dpartial\u201d variant, characterized by suboptimal actions and incomplete tasksGupta\net al. (2019), is analyzed in Figure 2. The results indicate that a lower expectile is advantageous in\nthe kitchen-partial-v0 scenario. It supports the learning of conservative Q-values, thereby mitigating\nthe overestimation of actions associated with suboptimal or incomplete trajectories.",
          "line": 355
        }
      ],
      "review_text": "Figures 1 and 2: The conclusion in L358-359 was derived from Figure 2, but all the methods look pretty much the same, which contradicts the stated conclusion.",
      "category": "figure-text",
      "description": "the text concludes that a lower expectile are advantageous, but Figure 2 does not show a clear trend.",
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {
          "question": "The \u201dpartial\u201d variant, characterized by suboptimal actions and incomplete tasksGupta\net al. (2019), is analyzed in Figure 2. The results indicate that a lower expectile is advantageous in\nthe kitchen-partial-v0 scenario. It supports the learning of conservative Q-values, thereby mitigating\nthe overestimation of actions associated with suboptimal or incomplete trajectories.",
          "correct": "C9BA0T3xhq_7_bdd9b0db",
          "incorrect": [
            "C9BA0T3xhq_6_image_figure1",
            "C9BA0T3xhq_7_interline-equation_equation42",
            "C9BA0T3xhq_8_interline-equation_equation19"
          ],
          "letters": ["A", "C", "D", "B"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"performance\",\"claim\":{\"source\":\"text\",\"statement\":\"lower expectile advantageous\"},\"evidence\":{\"source\":\"figure_2b\",\"statement\":\"not superior performance\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"Q-values\",\"claim\":{\"source\":\"text\",\"statement\":\"conservative Q-values\"},\"evidence\":{\"source\":\"figure_2a\",\"statement\":\"instability in Q-values\"}}",
            "{\"letter\":\"D\",\"attribute\":\"overestimation\",\"claim\":{\"source\":\"text\",\"statement\":\"higher expectiles beneficial\"},\"evidence\":{\"source\":\"figure_2b\",\"statement\":\"lower expectiles highest rewards\"}}",
            "{\"letter\":\"A\",\"attribute\":\"relevance\",\"claim\":{\"source\":\"figure_2b\",\"statement\":\"irrelevant discussion\"},\"evidence\":{\"source\":\"text\",\"statement\":\"discusses expectiles\"}}"
          ],
          "letters": ["C", "B", "D", "A"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"advantageous expectile\",\"target\":\"text\",\"other_involved\":\"figure_2b\",\"action\":\"modify\",\"edit_statement\":\"revise conclusion\",\"reason\":\"not supported\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"Q-values stability\",\"target\":\"text\",\"other_involved\":\"figure_2a\",\"action\":\"modify\",\"edit_statement\":\"contradict claim\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"D\",\"attribute\":\"beneficial expectiles\",\"target\":\"text\",\"other_involved\":\"figure_2b\",\"action\":\"modify\",\"edit_statement\":\"revise suggestion\",\"reason\":\"contradictory\"}",
            "{\"letter\":\"A\",\"attribute\":\"discussion\",\"target\":\"text\",\"other_involved\":\"figure_2b\",\"action\":\"remove\",\"edit_statement\":\"irrelevant discussion\",\"reason\":\"inconsistent\"}"
          ],
          "letters": ["C", "B", "D", "A"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 2"]
    }
  ],
  "C7XoUdJ5ZC": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 8,
          "image_id": "C7XoUdJ5ZC_8_189f047f",
          "bbox": {
            "x": 0.17120145656194785,
            "y": 0.09681243062670765,
            "width": 0.6572438162544169,
            "height": 0.43442622950819676
          }
        }
      ],
      "review_text": "Table 2 and Table 6: The authors consistently highlight only their performance numbers even if the baselines outperform them, making it hard to read the tables.",
      "category": "table-only",
      "description": "The caption states the best result for each dataset and configuration is highlighted in bold, but sometimes, FLAIR is highlighted without having the best result.",
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the table that is consistent with a different part of the table?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the table that is inconsistent with a different part of the table?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"bolding\",\"claim\":{\"source\":\"caption\",\"statement\":\"highlights best accuracy\"},\"evidence\":{\"source\":\"table_2\",\"statement\":\"FedProto better for n_s=0.1 and n_s=0.2\"}}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"bolding\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should highlight best accuracy\"},\"evidence\":{\"source\":\"table_2\",\"statement\":\"ignores FLAIR\"}}",
            "{\"letter\":\"C\",\"attribute\":\"bolding\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should highlight best accuracy\"},\"evidence\":{\"source\":\"table_2\",\"statement\":\"ignores FLAIR for CIFAR10\"}}",
            "{\"letter\":\"B\",\"attribute\":\"bolding\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should highlight best accuracy\"},\"evidence\":{\"source\":\"table_2\",\"statement\":\"ignores n_s=0.1 and n_s=0.2\"}}"
          ],
          "letters": ["A", "D", "C", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"Test Average Acc values\",\"target\":\"table_2\",\"other_involved\":\"FedProto method\",\"action\":\"modify\",\"edit_statement\":\"remove bolding\",\"reason\":\"FLAIR not highest\"}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"bolding\",\"target\":\"table_2\",\"other_involved\":\"FLAIR method\",\"action\":\"add\",\"edit_statement\":\"add bolding\",\"reason\":\"highest accuracy missing\"}",
            "{\"letter\":\"C\",\"attribute\":\"bolding\",\"target\":\"table_2\",\"other_involved\":\"FLAIR method, CIFAR10 dataset\",\"action\":\"add\",\"edit_statement\":\"add bolding\",\"reason\":\"best performer ignoring\"}",
            "{\"letter\":\"B\",\"attribute\":\"bolding\",\"target\":\"table_2\",\"other_involved\":\"n_s=0.1, n_s=0.2 columns\",\"action\":\"modify\",\"edit_statement\":\"apply correctly\",\"reason\":\"incorrect column\"}"
          ],
          "letters": ["A", "D", "C", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 2"]
    }
  ],
  "C0Boqhem9u": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 6,
          "image_id": "C0Boqhem9u_6_d3170a98",
          "bbox": {
            "x": 0.16943467210965105,
            "y": 0.09892990028923326,
            "width": 0.6643109540636042,
            "height": 0.319672131147541
          }
        }
      ],
      "review_text": "Figure 3: Panels a and b appear indistinguishable, and panel c shows that while more voxels are activated for the nonlinear model, the average R-squared is lower compared to the linear model.",
      "category": "figure-only",
      "description": "Part (a) and (b) look the same in the figure, besides being supposed to show two different encoders",
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the figure that is consistent with a different part of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the figure that is inconsistent with a different part of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"brain activation maps\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should differ\"},\"evidence\":{\"source\":\"figure_3\",\"statement\":\"visually identical\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"R^2 values\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be similar\"},\"evidence\":{\"source\":\"figure_3\",\"statement\":\"different R^2 values\"}}",
            "{\"letter\":\"C\",\"attribute\":\"legend scale\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"figure_3\",\"statement\":\"different legend scale\"}}",
            "{\"letter\":\"A\",\"attribute\":\"rendering\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"figure_3\",\"statement\":\"inconsistent rendering\"}}"
          ],
          "letters": ["D", "B", "C", "A"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"brain activation maps\",\"target\":\"figure_3a\",\"other_involved\":\"figure_3b\",\"action\":\"modify\",\"edit_statement\":\"show different maps\",\"reason\":\"identical\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"R^2 values\",\"target\":\"figure_3a\",\"other_involved\":\"figure_3b\",\"action\":\"modify\",\"edit_statement\":\"represent R^2 values\",\"reason\":\"mismatch\"}",
            "{\"letter\":\"C\",\"attribute\":\"R^2 scale\",\"target\":\"figure_3b_legend\",\"other_involved\":\"figure_3a_legend\",\"action\":\"modify\",\"edit_statement\":\"align scale\",\"reason\":\"different\"}",
            "{\"letter\":\"A\",\"attribute\":\"brain regions\",\"target\":\"figure_3a\",\"other_involved\":\"figure_3b\",\"action\":\"modify\",\"edit_statement\":\"highlight regions\",\"reason\":\"unclear\"}"
          ],
          "letters": ["D", "B", "C", "A"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 3"]
    }
  ],
  "BrqFB8Nl7e": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 8,
          "image_id": "BrqFB8Nl7e_8_c564c052",
          "bbox": {
            "x": 0.17120145656194785,
            "y": 0.10036430984246927,
            "width": 0.6590106007067138,
            "height": 0.209016393442623
          }
        },
        {
          "type": "text",
          "page": 9,
          "content": "The curves show that our method OpenLD consistently outperforms the methods using traditional OOD detection techniques (without using CE ) in terms of accuracy for OOD classes in the OOD class set.",
          "line": 453
        }
      ],
      "review_text": "Minor comments: Line 454 claims that OpenLD consistently outperforms the methods without using C^E. However, this is not true for CIFAR-10 shown in Figure 2.",
      "category": "figure-text",
      "description": "The test's claim that OpenLD always outperforms traditional OOD techniques is not confirmed in Figure 2 Left",
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "The curves show that our method OpenLD consistently outperforms the methods using traditional OOD detection techniques (without using CE ) in terms of accuracy for OOD classes in the OOD class set.",
          "correct": "BrqFB8Nl7e_8_c564c052",
          "incorrect": [
            "BrqFB8Nl7e_9_image_figure3",
            "BrqFB8Nl7e_1_image_figure1",
            "BrqFB8Nl7e_16_image_figure4"
          ],
          "letters": ["A", "B", "C", "D"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"performance\",\"claim\":{\"source\":\"text\",\"statement\":\"OpenLD outperforms traditional OOD detection techniques\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"OpenLD[MD] and OpenLD[Mean] do not always outperform 'without CE' counterparts\"}}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"OpenLD[RMD] performance\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should achieve higher accuracy than OpenLD[MD]\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"OpenLD[RMD] achieves lower accuracy than OpenLD[MD]\"}}",
            "{\"letter\":\"B\",\"attribute\":\"traditional OOD detection techniques\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be separate models\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"presented as OpenLD variants with '-(CE)' suffix\"}}",
            "{\"letter\":\"C\",\"attribute\":\"OpenLD performance\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should consistently outperform other methods\"},\"evidence\":{\"source\":\"Figure 2 TinyImageNet plot\",\"statement\":\"OpenLD underperforms all other methods\"}}"
          ],
          "letters": ["A", "D", "B", "C"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"performance\",\"target\":\"figure_2\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align statement\",\"reason\":\"inconsistent\"}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"accuracy\",\"target\":\"figure_2\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"explain difference\",\"reason\":\"contradictory\"}",
            "{\"letter\":\"B\",\"attribute\":\"techniques\",\"target\":\"figure_2\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"clarify relationship\",\"reason\":\"confusing\"}",
            "{\"letter\":\"C\",\"attribute\":\"performance\",\"target\":\"figure_2\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align statement\",\"reason\":\"contradictory\"}"
          ],
          "letters": ["A", "D", "B", "C"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 2"]
    }
  ],
  "BpyHIrpUOL": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 9,
          "image_id": "BpyHIrpUOL_9_a838b530",
          "bbox": {
            "x": 0.16766788765735421,
            "y": 0.679872523240053,
            "width": 0.6643109540636042,
            "height": 0.24316939890710385
          }
        }
      ],
      "review_text": "Figures 4: The review asks how face attributes differentiate similar digits like '6' and '9', implying a potential inconsistency in the visual representation.",
      "category": "figure-caption",
      "description": "The caption talks about examples where the proposed method correctly predicts the digits shown, but the Figure also shows misclassifications.",
      "mcq": {
        "binary_consistent": {
          "question": "Is the caption of the figure consistent with the content of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the caption of the figure inconsistent with the content of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"prediction correctness\",\"claim\":{\"source\":\"caption\",\"statement\":\"correctly predicted\"},\"evidence\":{\"source\":\"figure_4\",\"statement\":\"initial prediction in red\"}}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"comparison methods\",\"claim\":{\"source\":\"caption\",\"statement\":\"predictions presented\"},\"evidence\":{\"source\":\"figure\",\"statement\":\"PolyhedronNet labels only\"}}",
            "{\"letter\":\"C\",\"attribute\":\"model versions\",\"claim\":{\"source\":\"caption\",\"statement\":\"face-attributed and blank versions\"},\"evidence\":{\"source\":\"figure\",\"statement\":\"face-attributed models only\"}}",
            "{\"letter\":\"A\",\"attribute\":\"rotation\",\"claim\":{\"source\":\"caption\",\"statement\":\"rotated to show ambiguity\"},\"evidence\":{\"source\":\"figure\",\"statement\":\"static images\"}}"
          ],
          "letters": ["B", "D", "C", "A"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"predictions\",\"target\":\"caption\",\"other_involved\":\"figure_4\",\"action\":\"modify\",\"edit_statement\":\"adjust prediction description\",\"reason\":\"misclassification present\"}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"comparison methods\",\"target\":\"figure_4\",\"other_involved\":\"caption\",\"action\":\"add\",\"edit_statement\":\"add comparison results\",\"reason\":\"results missing\"}",
            "{\"letter\":\"C\",\"attribute\":\"blank models\",\"target\":\"figure_4\",\"other_involved\":\"caption\",\"action\":\"add\",\"edit_statement\":\"add blank models\",\"reason\":\"models missing\"}",
            "{\"letter\":\"A\",\"attribute\":\"rotation\",\"target\":\"figure_4\",\"other_involved\":\"caption\",\"action\":\"add\",\"edit_statement\":\"add rotated models\",\"reason\":\"model missing\"}"
          ],
          "letters": ["B", "D", "C", "A"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 4"]
    }
  ],
  "BjZP3fTlVg": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 3,
          "image_id": "BjZP3fTlVg_3_d0159208",
          "bbox": {
            "x": 0.11113078518385601,
            "y": 0.20774132567025275,
            "width": 0.7826855123674912,
            "height": 0.4139344262295082
          }
        }
      ],
      "review_text": "Figure 1: The y-axis appears to change despite a fixed x-axis value of 1.0 on the right. The basis for this plot needs further explanation: Is it an extrapolation based on several sample points?",
      "category": "figure-only",
      "description": "The lines in the figure keep changing the value for the y-axis besides the same values for the x-axis.",
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the figure that is consistent with a different part of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the figure that is inconsistent with a different part of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"x-axis ticks\",\"claim\":{\"source\":\"expectation\",\"statement\":\"unique labels\"},\"evidence\":{\"source\":\"Figure 1\",\"statement\":\"repeated labels\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"y-axis range\",\"claim\":{\"source\":\"expectation\",\"statement\":\"span full range\"},\"evidence\":{\"source\":\"Figure 1\",\"statement\":\"starts at 0.3\"}}",
            "{\"letter\":\"C\",\"attribute\":\"model performance\",\"claim\":{\"source\":\"text\",\"statement\":\"larger more resistant\"},\"evidence\":{\"source\":\"Figure 1\",\"statement\":\"larger less resistant\"}}",
            "{\"letter\":\"B\",\"attribute\":\"figure title\",\"claim\":{\"source\":\"expectation\",\"statement\":\"match caption\"},\"evidence\":{\"source\":\"Figure 1\",\"statement\":\"unrelated to caption\"}}"
          ],
          "letters": ["D", "A", "C", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"x-axis tick marks\",\"target\":\"figure_1\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"correct labels\",\"reason\":\"duplicate labels\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"y-axis range\",\"target\":\"figure_1\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"correct range\",\"reason\":\"incorrect range\"}",
            "{\"letter\":\"C\",\"attribute\":\"P(Correct) value\",\"target\":\"figure_1\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align starting\",\"reason\":\"contradiction\"}",
            "{\"letter\":\"B\",\"attribute\":\"title\",\"target\":\"figure_1\",\"other_involved\":\"caption\",\"action\":\"modify\",\"edit_statement\":\"align title\",\"reason\":\"unrelated\"}"
          ],
          "letters": ["D", "A", "C", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 1"]
    }
  ],
  "BeOEmnmyFu": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 16,
          "image_id": "BeOEmnmyFu_16_d73e0f34",
          "bbox": {
            "x": 0.1835689477280256,
            "y": 0.12987247321123635,
            "width": 0.6378091872791518,
            "height": 0.7008196721311477
          }
        }
      ],
      "review_text": "Figure 8: The translation 'Certalfainly! Halerfe alfare stalfealfe' should've been 'Certainly! Here are stee.' according to the rules of the game, but was translated to 'rtainly! halerfe ar stealf.'",
      "category": "figure-only",
      "description": "The translation of GPT-4o answer 'Certalfainly! Halerfe alfare stalfealfe' is incorrect according to alfa balfa rules.",
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the figure that is consistent with a different part of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the figure that is inconsistent with a different part of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"translation accuracy\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be accurate\"},\"evidence\":{\"source\":\"figure_8\",\"statement\":\"inaccurate translation\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"instruction following\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should follow instructions\"},\"evidence\":{\"source\":\"text\",\"statement\":\"failed to follow instructions\"}}",
            "{\"letter\":\"A\",\"attribute\":\"decipherability\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be decipherable\"},\"evidence\":{\"source\":\"text\",\"statement\":\"inherently undecipherable\"}}",
            "{\"letter\":\"C\",\"attribute\":\"prompt conversion\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be correctly converted\"},\"evidence\":{\"source\":\"text\",\"statement\":\"not correctly converted\"}}"
          ],
          "letters": ["D", "B", "A", "C"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"translation\",\"target\":\"GPT-4o output\",\"other_involved\":\"Figure 8\",\"action\":\"modify\",\"edit_statement\":\"fix translation\",\"reason\":\"inaccurate rule\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"response\",\"target\":\"GPT-4o output\",\"other_involved\":\"input prompt\",\"action\":\"modify\",\"edit_statement\":\"add step-by-step instructions\",\"reason\":\"instructions missing\"}",
            "{\"letter\":\"A\",\"attribute\":\"format\",\"target\":\"input prompt\",\"other_involved\":\"GPT-4o output\",\"action\":\"modify\",\"edit_statement\":\"decipher format\",\"reason\":\"undecipherable\"}",
            "{\"letter\":\"C\",\"attribute\":\"question\",\"target\":\"input prompt\",\"other_involved\":\"Malicious question\",\"action\":\"modify\",\"edit_statement\":\"convert question\",\"reason\":\"incorrect format\"}"
          ],
          "letters": ["D", "B", "A", "C"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 8"]
    }
  ],
  "BUpdp5gETF": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 6,
          "image_id": "BUpdp5gETF_6_97fd277d",
          "bbox": {
            "x": 0.24893997246300792,
            "y": 0.6873861844422388,
            "width": 0.49293286219081267,
            "height": 0.10519125683060111
          }
        },
        {
          "type": "text",
          "page": 6,
          "content": "The relative learning rate \u03bb start starts high (3.3 for MoE and 5 for dense) and decays to 0.6. This aggressive early training helps the Embedding stabilize quickly, as it influences the entire network.",
          "line": 279
        }
      ],
      "review_text": "Table 4: The start value of Embedding should be 3.3 according to line 279 to 280, but the table shows a different value.",
      "category": "table-text",
      "description": "The text claims \\lambda to start at a different value for MoE as shown in the Table 4",
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the table consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the table inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {
          "question": "The relative learning rate \u03bb start starts high (3.3 for MoE and 5 for dense) and decays to 0.6. This aggressive early training helps the Embedding stabilize quickly, as it influences the entire network.",
          "correct": "BUpdp5gETF_6_97fd277d",
          "incorrect": [
            "BUpdp5gETF_5_table_table5",
            "BUpdp5gETF_4_table_table3",
            "BUpdp5gETF_4_table_table2"
          ],
          "letters": ["C", "B", "D", "A"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"relative learning rate\",\"claim\":{\"source\":\"text\",\"statement\":\"starts at 3.3\"},\"evidence\":{\"source\":\"Table 4\",\"statement\":\"does not include 3.3\"}}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"lambda decay\",\"claim\":{\"source\":\"text\",\"statement\":\"decays to 0.6\"},\"evidence\":{\"source\":\"Table 4\",\"statement\":\"shows different end values\"}}",
            "{\"letter\":\"B\",\"attribute\":\"Embedding start value\",\"claim\":{\"source\":\"expectation\",\"statement\":\"starts at 5\"},\"evidence\":{\"source\":\"Table 4\",\"statement\":\"starts less aggressively\"}}",
            "{\"letter\":\"C\",\"attribute\":\"MoE start values\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should specify diverse initial values\"},\"evidence\":{\"source\":\"text\",\"statement\":\"states 'high' without specifying\"}}"
          ],
          "letters": ["A", "D", "B", "C"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"relative learning rate \u03bb for MoE\",\"target\":\"text_paragraph_3\",\"other_involved\":\"table_4\",\"action\":\"modify\",\"edit_statement\":\"update value\",\"reason\":\"inconsistent\"}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"\u03bb decay\",\"target\":\"text_paragraph_3\",\"other_involved\":\"table_4\",\"action\":\"modify\",\"edit_statement\":\"align value\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"B\",\"attribute\":\"starting value of Embedding \u03bb\",\"target\":\"table_4\",\"other_involved\":\"text_paragraph_3\",\"action\":\"modify\",\"edit_statement\":\"align value\",\"reason\":\"different\"}",
            "{\"letter\":\"C\",\"attribute\":\"initial \u03bb\",\"target\":\"text_paragraph_3\",\"other_involved\":\"table_4\",\"action\":\"add\",\"edit_statement\":\"specific values\",\"reason\":\"missing\"}"
          ],
          "letters": ["A", "D", "B", "C"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 4"]
    }
  ],
  "AuckJjoD99": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 7,
          "image_id": "AuckJjoD99_7_6091a0a7",
          "bbox": {
            "x": 0.19770322334640017,
            "y": 0.1924408459272541,
            "width": 0.607773851590106,
            "height": 0.18989071038251368
          }
        }
      ],
      "review_text": "Table 5: The fine-grained emotion categorization contains logical inconsistencies. For example, 'happy' is inappropriately classified under the 'excite' primary category rather than 'Happy'.",
      "category": "table-only",
      "description": "The fine-grained emotional breakdown is attributed to the primary emotion \"Excite\", whereas it should be assigned to \"Happy\"",
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the table that is consistent with a different part of the table?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the table that is inconsistent with a different part of the table?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"emotion 'happy'\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be under 'Happy'\"},\"evidence\":{\"source\":\"table_5\",\"statement\":\"under 'Excite'\"}}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"emotion 'spark'\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be distinct\"},\"evidence\":{\"source\":\"table_5\",\"statement\":\"listed under two categories\"}}",
            "{\"letter\":\"B\",\"attribute\":\"emotion categories\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be distinct\"},\"evidence\":{\"source\":\"table_5\",\"statement\":\"overlapping categories\"}}",
            "{\"letter\":\"A\",\"attribute\":\"fine-grained terms\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be common\"},\"evidence\":{\"source\":\"table_5\",\"statement\":\"less common\"}}"
          ],
          "letters": ["D", "C", "B", "A"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"fine-grained emotion 'happy'\",\"target\":\"table_5\",\"other_involved\":null,\"action\":\"reposition\",\"edit_statement\":\"move 'happy' to 'Happy'\",\"reason\":\"incorrect assignment\"}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"fine-grained emotion 'spark'\",\"target\":\"table_5\",\"other_involved\":null,\"action\":\"remove\",\"edit_statement\":\"resolve duplicate listing\",\"reason\":\"erroneous overlap\"}",
            "{\"letter\":\"B\",\"attribute\":\"fine-grained emotions\",\"target\":\"table_5\",\"other_involved\":null,\"action\":\"reposition\",\"edit_statement\":\"relocate 'annoyed' and 'agitated'\",\"reason\":\"misallocation\"}",
            "{\"letter\":\"A\",\"attribute\":\"fine-grained emotions\",\"target\":\"table_5\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"update terms\",\"reason\":\"less common\"}"
          ],
          "letters": ["D", "C", "B", "A"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 5"]
    }
  ],
  "Aqfwhna1D7": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 6,
          "image_id": "Aqfwhna1D7_6_e40a8edc",
          "bbox": {
            "x": 0.19947000779869697,
            "y": 0.2056921453423839,
            "width": 0.606007067137809,
            "height": 0.25000000000000006
          }
        }
      ],
      "review_text": "Figure 3: The figure shows (c) instead of (g) as candidates, which is a contradiction and could cause confusion.",
      "category": "figure-only",
      "description": "The images under the 'Candidates' section are (a), (b), (c). But the answers of GPT4 are (a), (g), (b)",
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the figure that is consistent with a different part of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the figure that is inconsistent with a different part of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"label\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be displayed\"},\"evidence\":{\"source\":\"figure_3\",\"statement\":\"not displayed\"}}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"options\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be sufficient\"},\"evidence\":{\"source\":\"figure_3\",\"statement\":\"not sufficient\"}}",
            "{\"letter\":\"A\",\"attribute\":\"coloring\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should match\"},\"evidence\":{\"source\":\"figure_3\",\"statement\":\"do not match\"}}",
            "{\"letter\":\"D\",\"attribute\":\"prompts\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should differ\"},\"evidence\":{\"source\":\"figure_3\",\"statement\":\"are the same\"}}"
          ],
          "letters": ["B", "C", "A", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"answer (g)\",\"target\":\"figure_3\",\"other_involved\":\"figure_3\",\"action\":\"remove\",\"edit_statement\":\"remove text\",\"reason\":\"no image\"}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"candidates\",\"target\":\"figure_3\",\"other_involved\":\"figure_3\",\"action\":\"add\",\"edit_statement\":\"add options\",\"reason\":\"insufficient\"}",
            "{\"letter\":\"A\",\"attribute\":\"coloring\",\"target\":\"figure_3\",\"other_involved\":\"figure_3\",\"action\":\"modify\",\"edit_statement\":\"match coloring\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"D\",\"attribute\":\"prompts\",\"target\":\"figure_3\",\"other_involved\":\"figure_3\",\"action\":\"modify\",\"edit_statement\":\"change prompts\",\"reason\":\"same\"}"
          ],
          "letters": ["B", "C", "A", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 3"]
    }
  ],
  "AjunxrcKa2": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 6,
          "image_id": "AjunxrcKa2_6_027e5baf",
          "bbox": {
            "x": 0.17120145656194785,
            "y": 0.6547358778656507,
            "width": 0.3498233215547703,
            "height": 0.13114754098360656
          }
        }
      ],
      "review_text": "Table 2: The method with lowest average FID is not marked correctly, contradicting the text's claim.",
      "category": "table-caption",
      "description": "The caption states the lower the FID the better and the best results are bolded, but the bolding in the Table is arbitrary",
      "mcq": {
        "binary_consistent": {
          "question": "Is the caption of the table consistent with the content of the table?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the caption of the table inconsistent with the content of the table?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"bolded values\",\"claim\":{\"source\":\"caption\",\"statement\":\"best results bolded\"},\"evidence\":{\"source\":\"table\",\"statement\":\"multiple bolded or not lowest bolded\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"underline\",\"claim\":{\"source\":\"caption\",\"statement\":\"distinct visual markers\"},\"evidence\":{\"source\":\"table\",\"statement\":\"not underlined\"}}",
            "{\"letter\":\"C\",\"attribute\":\"bolded values\",\"claim\":{\"source\":\"caption\",\"statement\":\"best results bolded\"},\"evidence\":{\"source\":\"table\",\"statement\":\"not bolded\"}}",
            "{\"letter\":\"B\",\"attribute\":\"worst results\",\"claim\":{\"source\":\"expectation\",\"statement\":\"mark worst results\"},\"evidence\":{\"source\":\"caption\",\"statement\":\"no guideline\"}}"
          ],
          "letters": ["D", "A", "C", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"bolding criteria\",\"target\":\"table_2\",\"other_involved\":\"caption\",\"action\":\"modify\",\"edit_statement\":\"bolded values\",\"reason\":\"inconsistent\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"second best results\",\"target\":\"table_2\",\"other_involved\":\"caption\",\"action\":\"add\",\"edit_statement\":\"missing markers\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"C\",\"attribute\":\"lowest FID value\",\"target\":\"table_2\",\"other_involved\":\"Average row\",\"action\":\"add\",\"edit_statement\":\"bolding\",\"reason\":\"missing\"}",
            "{\"letter\":\"B\",\"attribute\":\"no-Lora column\",\"target\":\"table_2\",\"other_involved\":\"caption\",\"action\":\"add\",\"edit_statement\":\"worst result guidelines\",\"reason\":\"missing\"}"
          ],
          "letters": ["D", "A", "C", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 2"]
    }
  ],
  "ARIQfWf4ll": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 3,
          "image_id": "ARIQfWf4ll_3_21cbce8f",
          "bbox": {
            "x": 0.16766788765735421,
            "y": 0.0981785508452869,
            "width": 0.6749116607773851,
            "height": 0.3920765027322405
          }
        }
      ],
      "review_text": "Figure 1: There is an obvious error in the answer to X-ray image. The box is on the left of the figure, which corresponds to patients\u2019 right lung (not left).",
      "category": "figure-only",
      "description": "Figure 1 (a) should show some example datapoints for the training data, but the training data with the X-Ray image is wrong, as the right and left side of the bounding box on the lung image is flipped.",
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the figure that is consistent with a different part of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the figure that is inconsistent with a different part of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"bounding box\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should match left side\"},\"evidence\":{\"source\":\"figure_1\",\"statement\":\"flipped\"}}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"fundus photo\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be relevant\"},\"evidence\":{\"source\":\"figure_1\",\"statement\":\"not relevant\"}}",
            "{\"letter\":\"B\",\"attribute\":\"image data\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be present\"},\"evidence\":{\"source\":\"figure_1\",\"statement\":\"missing\"}}",
            "{\"letter\":\"D\",\"attribute\":\"treatment\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be complete\"},\"evidence\":{\"source\":\"figure_1\",\"statement\":\"incomplete\"}}"
          ],
          "letters": ["A", "C", "B", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"bounding box sides\",\"target\":\"figure_1a\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"flip right-left\",\"reason\":\"incorrect\"}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"fundus photo\",\"target\":\"figure_1a\",\"other_involved\":null,\"action\":\"remove\",\"edit_statement\":\"irrelevant image\",\"reason\":\"not relevant\"}",
            "{\"letter\":\"B\",\"attribute\":\"image data\",\"target\":\"figure_1a\",\"other_involved\":null,\"action\":\"add\",\"edit_statement\":\"missing image\",\"reason\":\"incomplete\"}",
            "{\"letter\":\"D\",\"attribute\":\"treatment steps\",\"target\":\"figure_1a\",\"other_involved\":null,\"action\":\"add\",\"edit_statement\":\"missing step\",\"reason\":\"incomplete\"}"
          ],
          "letters": ["A", "C", "B", "D"]
        }
      },
      "severity": 1,
      "visual_elements": ["Figure 1"]
    }
  ],
  "A2muypu61H": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 8,
          "image_id": "A2muypu61H_8_1d526e6a",
          "bbox": {
            "x": 0.17120145656194785,
            "y": 0.3566484294953894,
            "width": 0.6590106007067138,
            "height": 0.1516393442622951
          }
        }
      ],
      "review_text": "Table 1: The caption mentions 'The second best results are underlined.', but there are no underlines in Table 1.",
      "category": "table-caption",
      "description": "The captions states the second best results are underlined, but there are no underlined values",
      "mcq": {
        "binary_consistent": {
          "question": "Is the caption of the table consistent with the content of the table?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the caption of the table inconsistent with the content of the table?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"underlining\",\"claim\":{\"source\":\"caption\",\"statement\":\"second best results are underlined\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"no values are underlined\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"bolding\",\"claim\":{\"source\":\"caption\",\"statement\":\"best results are bolded\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"some highest-performing values are not bolded\"}}",
            "{\"letter\":\"D\",\"attribute\":\"dataset specification\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should specify all datasets\"},\"evidence\":{\"source\":\"caption\",\"statement\":\"only specifies CIFAR-100\"}}",
            "{\"letter\":\"C\",\"attribute\":\"naming conventions\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"methods have inconsistent naming conventions\"}}"
          ],
          "letters": ["A", "B", "D", "C"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"underlining\",\"target\":\"table_1\",\"other_involved\":\"caption\",\"action\":\"add\",\"edit_statement\":\"underlining\",\"reason\":\"missing\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"bolding\",\"target\":\"table_1\",\"other_involved\":\"caption\",\"action\":\"modify\",\"edit_statement\":\"update bolding\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"D\",\"attribute\":\"datasets\",\"target\":\"caption\",\"other_involved\":\"table_1\",\"action\":\"add\",\"edit_statement\":\"add STL10\",\"reason\":\"missing\"}",
            "{\"letter\":\"C\",\"attribute\":\"naming conventions\",\"target\":\"table_1\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"align naming across methods\",\"reason\":\"inconsistent\"}"
          ],
          "letters": ["A", "B", "D", "C"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 1"]
    }
  ],
  "9ljHiYuRHl": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 8,
          "image_id": "9ljHiYuRHl_8_2a65e14e",
          "bbox": {
            "x": 0.17120145656194785,
            "y": 0.09831512951460042,
            "width": 0.6572438162544169,
            "height": 0.2841530054644809
          }
        },
        {
          "type": "image",
          "page": 9,
          "image_id": "9ljHiYuRHl_9_a62cbca8",
          "bbox": {
            "x": 0.16943467210965105,
            "y": 0.09401181747353143,
            "width": 0.6696113074204946,
            "height": 0.31693989071038253
          }
        }
      ],
      "review_text": "Figure 5(b): Maximum chain length is shown as 9, while Figure 6(b) shows it as 10.",
      "category": "figure-figure",
      "description": "In Figure 5, the chain length is 9 at max, whereas in Figure 6, it is 10",
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the first figure consistent with the content of the second figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the first figure inconsistent with the content of the second figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "9ljHiYuRHl_8_2a65e14e",
          "correct": "9ljHiYuRHl_9_a62cbca8",
          "incorrect": [
            "9ljHiYuRHl_7_image_figure5",
            "9ljHiYuRHl_5_image_figure4",
            "9ljHiYuRHl_4_image_figure3"
          ],
          "letters": ["A", "B", "D", "C"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"x-axis max value\",\"claim\":{\"source\":\"figure_5\",\"statement\":\"9\"},\"evidence\":{\"source\":\"figure_6\",\"statement\":\"10\"}}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"chain length range\",\"claim\":{\"source\":\"expectation\",\"statement\":\"consistent range\"},\"evidence\":{\"source\":\"figure_5 and figure_6\",\"statement\":\"different intermediate values\"}}",
            "{\"letter\":\"C\",\"attribute\":\"chain length\",\"claim\":{\"source\":\"figure_5\",\"statement\":\"8\"},\"evidence\":{\"source\":\"figure_6\",\"statement\":\"9\"}}",
            "{\"letter\":\"B\",\"attribute\":\"x-axis labels\",\"claim\":{\"source\":\"expectation\",\"statement\":\"consistent labels\"},\"evidence\":{\"source\":\"figure_5 and figure_6\",\"statement\":\"inconsistent labels\"}}"
          ],
          "letters": ["A", "D", "C", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"Chain Length value\",\"target\":\"figure_5\",\"other_involved\":\"figure_6\",\"action\":\"modify\",\"edit_statement\":\"match maximum value\",\"reason\":\"different\"}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"Chain Length values\",\"target\":\"figure_6\",\"other_involved\":\"figure_5\",\"action\":\"modify\",\"edit_statement\":\"remove intermediate values\",\"reason\":\"additional\"}",
            "{\"letter\":\"C\",\"attribute\":\"Chain Length display\",\"target\":\"figure_5\",\"other_involved\":\"figure_6\",\"action\":\"modify\",\"edit_statement\":\"align display range\",\"reason\":\"different\"}",
            "{\"letter\":\"B\",\"attribute\":\"x-axis labels\",\"target\":\"figure_5\",\"other_involved\":\"figure_6\",\"action\":\"modify\",\"edit_statement\":\"use consistent labels\",\"reason\":\"inconsistent\"}"
          ],
          "letters": ["A", "D", "C", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 5", "Figure 6"]
    }
  ],
  "9DDJuab67K": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 2,
          "image_id": "9DDJuab67K_2_fc92b190",
          "bbox": {
            "x": 0.16766788765735421,
            "y": 0.10043259917712602,
            "width": 0.666077738515901,
            "height": 0.24043715846994537
          }
        },
        {
          "type": "text",
          "page": 1,
          "content": "Despite advances in MERC, challenges such as inefficient modal association persist. As illustrated in Figure 1 (a): (1) In the 6th utterance, phrases like \u201dbut no\u201d and \u201dshe\u2019s not my girlfriend\u201d clearly indicate sadness. However, if the model overemphasizes earlier positive expressions like \u201dwe com- municate on a daily,\u201d it may incorrectly classify the emotion as happiness. This underscores the risk of focusing on local context while neglecting key emotional cues. (2) In the 3th utterance, the correct label is \u201dexcited,\u201d but dynamic changes in facial expressions and vocal tone might mislead the model to classify it as anger. Such intense emotional variations can be misinterpreted as negative emotions, highlighting the complexity of multimodal data in emotion recognition tasks.",
          "line": 51
        }
      ],
      "review_text": "Line 73: The text states 'the correct label is excited', while the label shown in Figure 1 is 'excitement'.",
      "category": "figure-text",
      "description": "The text mentions the label \"excited\", but the Figure shows \"excitement\"",
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "Despite advances in MERC, challenges such as inefficient modal association persist. As illustrated in Figure 1 (a): (1) In the 6th utterance, phrases like \u201dbut no\u201d and \u201dshe\u2019s not my girlfriend\u201d clearly indicate sadness. However, if the model overemphasizes earlier positive expressions like \u201dwe com- municate on a daily,\u201d it may incorrectly classify the emotion as happiness. This underscores the risk of focusing on local context while neglecting key emotional cues. (2) In the 3th utterance, the correct label is \u201dexcited,\u201d but dynamic changes in facial expressions and vocal tone might mislead the model to classify it as anger. Such intense emotional variations can be misinterpreted as negative emotions, highlighting the complexity of multimodal data in emotion recognition tasks.",
          "correct": "9DDJuab67K_2_fc92b190",
          "incorrect": [
            "9DDJuab67K_3_image_figure2",
            "9DDJuab67K_4_image_figure3",
            "9DDJuab67K_8_image_figure4"
          ],
          "letters": ["A", "B", "C", "D"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"label\",\"claim\":{\"source\":\"text\",\"statement\":\"excited\"},\"evidence\":{\"source\":\"Figure 1(a)\",\"statement\":\"[excitement]\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"label\",\"claim\":{\"source\":\"text\",\"statement\":\"sadness\"},\"evidence\":{\"source\":\"Figure 1(a)\",\"statement\":\"[frustration]\"}}",
            "{\"letter\":\"C\",\"attribute\":\"label\",\"claim\":{\"source\":\"text\",\"statement\":\"happy\"},\"evidence\":{\"source\":\"Figure 1(a)\",\"statement\":\"[neutral]\"}}",
            "{\"letter\":\"D\",\"attribute\":\"label\",\"claim\":{\"source\":\"text\",\"statement\":\"sad\"},\"evidence\":{\"source\":\"Figure 1(a)\",\"statement\":\"[neutral]\"}}"
          ],
          "letters": ["B", "A", "C", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"label\",\"target\":\"figure_1a\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"change label\",\"reason\":\"different\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"label\",\"target\":\"figure_1a\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"change label\",\"reason\":\"different\"}",
            "{\"letter\":\"C\",\"attribute\":\"label\",\"target\":\"figure_1a\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"change label\",\"reason\":\"different\"}",
            "{\"letter\":\"D\",\"attribute\":\"label\",\"target\":\"figure_1a\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"change label\",\"reason\":\"different\"}"
          ],
          "letters": ["B", "A", "C", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 1"]
    },
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 8,
          "image_id": "9DDJuab67K_8_60d59563",
          "bbox": {
            "x": 0.17120145656194785,
            "y": 0.09722225001600923,
            "width": 0.6572438162544169,
            "height": 0.20765027322404372
          }
        },
        {
          "type": "text",
          "page": 8,
          "content": "Tables 1 and 2 represent a comparative analysis of performance metrics for the baseline models on the IEMOCAP and MELD datasets. On the IEMOCAP dataset, the proposed SUMMER framework achieves a 2.61% improvement in w-ACC and 2.15% in w-F1, surpassing baselines like CHFusion, particularly in minority classes such as \u201dexcitement.",
          "line": 406
        }
      ],
      "review_text": "Line 411: The text mentions that the proposed method surpasses baselines like CHFusion, particularly in minority classes such as 'excitement', but Table 1 does not provide any class-specific results for CHFusion.",
      "category": "table-text",
      "description": "The Table does not show results for CHFusion, especially not for the minority classes, but the text claims there are results.",
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the table consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the table inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "Tables 1 and 2 represent a comparative analysis of performance metrics for the baseline models on the IEMOCAP and MELD datasets. On the IEMOCAP dataset, the proposed SUMMER framework achieves a 2.61% improvement in w-ACC and 2.15% in w-F1, surpassing baselines like CHFusion, particularly in minority classes such as \u201dexcitement.",
          "correct": "9DDJuab67K_8_60d59563",
          "incorrect": [
            "9DDJuab67K_8_table_table4",
            "9DDJuab67K_8_table_table3",
            "9DDJuab67K_7_table_table2"
          ],
          "letters": ["C", "D", "A", "B"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"minority class performance\",\"claim\":{\"source\":\"text\",\"statement\":\"SUMMER surpasses CHFusion\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"no CHFusion metrics\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"CHFusion results\",\"claim\":{\"source\":\"expectation\",\"statement\":\"results should be present\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"no CHFusion results\"}}",
            "{\"letter\":\"A\",\"attribute\":\"overall w-ACC and w-F1\",\"claim\":{\"source\":\"text\",\"statement\":\"SUMMER better than CHFusion\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"CHFusion better than Student Model\"}}",
            "{\"letter\":\"D\",\"attribute\":\"model category\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be baseline\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"listed as Teacher Model\"}}"
          ],
          "letters": ["C", "B", "A", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"performance metrics\",\"target\":\"Table 1\",\"other_involved\":\"text\",\"action\":\"add\",\"edit_statement\":\"add CHFusion metrics\",\"reason\":\"missing data\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"results\",\"target\":\"Table 1\",\"other_involved\":\"text\",\"action\":\"add\",\"edit_statement\":\"add CHFusion results\",\"reason\":\"missing data\"}",
            "{\"letter\":\"A\",\"attribute\":\"overall w-ACC and w-F1\",\"target\":\"text\",\"other_involved\":\"Table 1\",\"action\":\"modify\",\"edit_statement\":\"align values\",\"reason\":\"contradiction\"}",
            "{\"letter\":\"D\",\"attribute\":\"model categorization\",\"target\":\"Table 1\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align category\",\"reason\":\"misclassification\"}"
          ],
          "letters": ["C", "B", "A", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 1"]
    }
  ],
  "97tbbvSJ4A": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 5,
          "image_id": "97tbbvSJ4A_5_f7f673d7",
          "bbox": {
            "x": 0.17296824101424468,
            "y": 0.10842441079395065,
            "width": 0.6537102473498233,
            "height": 0.3811475409836066
          }
        },
        {
          "type": "text",
          "page": 9,
          "content": "The experimental results depicted in Figure 2 reveal several critical insights. Our proposed PDF smoothing model consistently outperforms DP-SGD under the same backbone and experiment settings and has a narrow gap with non-private model. This indicates that our method effectively balances the trade-off between privacy and utility. As expected, higher values of \u000f correlate with improved model accuracy due to reduced smoothing factor. This is a well-documented phenomenon in differential privacy literature. ",
          "line": 446
        }
      ],
      "review_text": "Figure 2: The figure is incorrectly referenced in Section 4.4. (I suppose the reference should point to Figure 3?)",
      "category": "figure-text",
      "description": "The text talks about Figure 2 showing experimental results (performances), but Figure 2 is unrelated to that.",
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {
          "question": "The experimental results depicted in Figure 2 reveal several critical insights. Our proposed PDF smoothing model consistently outperforms DP-SGD under the same backbone and experiment settings and has a narrow gap with non-private model. This indicates that our method effectively balances the trade-off between privacy and utility. As expected, higher values of \u000f correlate with improved model accuracy due to reduced smoothing factor. This is a well-documented phenomenon in differential privacy literature. ",
          "correct": "97tbbvSJ4A_5_f7f673d7",
          "incorrect": [
            "97tbbvSJ4A_9_image_figure3",
            "97tbbvSJ4A_1_image_figure1",
            "97tbbvSJ4A_5_interline-equation_equation39.5"
          ],
          "letters": ["B", "D", "A", "C"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"figure purpose\",\"claim\":{\"source\":\"text\",\"statement\":\"experimental results\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"Adaptive Kernel PDF Representation Learning Framework\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"accuracy comparison\",\"claim\":{\"source\":\"text\",\"statement\":\"DP-SGD outperformed by PDF smoothing\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"DP-SGD higher accuracy\"}}",
            "{\"letter\":\"C\",\"attribute\":\"privacy\",\"claim\":{\"source\":\"expectation\",\"statement\":\"shouldn't show intimate images\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"shows X-Ray images\"}}",
            "{\"letter\":\"B\",\"attribute\":\"naming\",\"claim\":{\"source\":\"text\",\"statement\":\"PDF smoothing model\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"PDF representation learning\"}}"
          ],
          "letters": ["D", "A", "C", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"content\",\"target\":\"figure_2\",\"other_involved\":\"text, caption_2\",\"action\":\"modify\",\"edit_statement\":\"align experimental results\",\"reason\":\"mismatch\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"accuracy\",\"target\":\"figure_2\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align DP-SGD performance\",\"reason\":\"contradictory\"}",
            "{\"letter\":\"C\",\"attribute\":\"privacy\",\"target\":\"figure_2\",\"other_involved\":\"text\",\"action\":\"remove\",\"edit_statement\":\"X-Ray images\",\"reason\":\"irrelevant\"}",
            "{\"letter\":\"B\",\"attribute\":\"model name\",\"target\":\"figure_2\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align 'PDF smoothing model'\",\"reason\":\"inconsistent\"}"
          ],
          "letters": ["D", "A", "C", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 2"]
    }
  ],
  "8Lt27D1qhE": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 17,
          "image_id": "8Lt27D1qhE_17_0e45918d",
          "bbox": {
            "x": 0.16943467210965105,
            "y": 0.09278235930562671,
            "width": 0.6643109540636042,
            "height": 0.1598360655737705
          }
        },
        {
          "type": "text",
          "page": 2,
          "content": "For instances that are difficult to predict, such as pictures, bookshelfs, the quality of the mask corresponding to the matched query is poor.",
          "line": 104
        }
      ],
      "review_text": "Table 13 of the Appendix: The accuracy scores for objects like picture and bookshelf are higher than others, contradicting the authors' claim that these objects are difficult to predict.",
      "category": "table-text",
      "description": "The text claims bookshelfs are hard to predict, but the Table shows the performance to be among the best for this label, making it seem easier to predict than other labels.",
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the table consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the table inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "For instances that are difficult to predict, such as pictures, bookshelfs, the quality of the mask corresponding to the matched query is poor.",
          "correct": "8Lt27D1qhE_17_0e45918d",
          "incorrect": [
            "8Lt27D1qhE_18_table_table17",
            "8Lt27D1qhE_16_table_table15",
            "8Lt27D1qhE_16_table_table14"
          ],
          "letters": ["A", "D", "B", "C"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"mAP\",\"claim\":{\"source\":\"expectation\",\"statement\":\"low mAP for bookshelf\"},\"evidence\":{\"source\":\"table_13\",\"statement\":\"highest mAP for bookshelf\"}}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"mAP\",\"claim\":{\"source\":\"text\",\"statement\":\"pictures hard\"},\"evidence\":{\"source\":\"table_13\",\"statement\":\"no low mAP for picture\"}}",
            "{\"letter\":\"A\",\"attribute\":\"performance\",\"claim\":{\"source\":\"table_13\",\"statement\":\"lowest on bookshelf\"},\"evidence\":{\"source\":\"table_13\",\"statement\":\"not lowest on bookshelf\"}}",
            "{\"letter\":\"B\",\"attribute\":\"category\",\"claim\":{\"source\":\"text\",\"statement\":\"bookshelfs\"},\"evidence\":{\"source\":\"table_13\",\"statement\":\"no bookshelfs\"}}"
          ],
          "letters": ["D", "C", "A", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"difficulty of bookshelf\",\"target\":\"text\",\"other_involved\":\"table_13\",\"action\":\"modify\",\"edit_statement\":\"align claim\",\"reason\":\"contradiction\"}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"difficulty of picture\",\"target\":\"text\",\"other_involved\":\"table_13\",\"action\":\"modify\",\"edit_statement\":\"align claim\",\"reason\":\"contradiction\"}",
            "{\"letter\":\"A\",\"attribute\":\"performance on bookshelf\",\"target\":\"table_13\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align explanation\",\"reason\":\"contradiction\"}",
            "{\"letter\":\"B\",\"attribute\":\"category bookshelf\",\"target\":\"table_13\",\"other_involved\":\"text\",\"action\":\"add\",\"edit_statement\":\"add category\",\"reason\":\"missing\"}"
          ],
          "letters": ["D", "C", "A", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 13"]
    }
  ],
  "8GMUa79ZKc": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 5,
          "image_id": "8GMUa79ZKc_5_3c514f9a",
          "bbox": {
            "x": 0.16766788765735421,
            "y": 0.09858837023458845,
            "width": 0.6713780918727914,
            "height": 0.2718579234972678
          }
        },
        {
          "type": "image",
          "page": 5,
          "image_id": "8GMUa79ZKc_5_7c0d4ed6",
          "bbox": {
            "x": 0.44681983112025175,
            "y": 0.7280965502796277,
            "width": 0.38869257950530034,
            "height": 0.024590163934426233
          }
        }
      ],
      "review_text": "Figure 3: The calculated value of m* does not seem to correspond correctly to the description in Equation (3).",
      "category": "figure-equation",
      "description": "The Equation (3) shows m^* to be calculated by an element-wise product of w and m, whereas in the Figure 3, it is the cross product between w and m",
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the content of the equation?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the content of the equation?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "8GMUa79ZKc_5_3c514f9a",
          "correct": "8GMUa79ZKc_5_7c0d4ed6",
          "incorrect": [
            "8GMUa79ZKc_4_interline-equation_equation28",
            "8GMUa79ZKc_4_interline-equation_equation10.5",
            "8GMUa79ZKc_6_interline-equation_equation8"
          ],
          "letters": ["D", "B", "C", "A"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"operation symbol\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"(3) and figure_3\",\"statement\":\"inconsistent\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"operation\",\"claim\":{\"source\":\"figure_3\",\"statement\":\"element-wise product\"},\"evidence\":{\"source\":\"(3)\",\"statement\":\"cross product\"}}",
            "{\"letter\":\"A\",\"attribute\":\"normalization\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be mentioned\"},\"evidence\":{\"source\":\"figure_3\",\"statement\":\"not mentioned\"}}",
            "{\"letter\":\"C\",\"attribute\":\"values\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be obtainable\"},\"evidence\":{\"source\":\"figure_3\",\"statement\":\"not obtainable\"}}"
          ],
          "letters": ["D", "B", "A", "C"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"operation_symbol\",\"target\":\"figure_3\",\"other_involved\":\"equation_3\",\"action\":\"replace\",\"edit_statement\":\"substitute symbol\",\"reason\":\"conflict\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"m_star_operation\",\"target\":\"figure_3\",\"other_involved\":\"equation_3\",\"action\":\"replace\",\"edit_statement\":\"substitute symbol\",\"reason\":\"conflict\"}",
            "{\"letter\":\"A\",\"attribute\":\"normalization\",\"target\":\"figure_3\",\"other_involved\":\"equation_3\",\"action\":\"modify\",\"edit_statement\":\"add explicit mention\",\"reason\":\"missing\"}",
            "{\"letter\":\"C\",\"attribute\":\"numerical_values\",\"target\":\"figure_3\",\"other_involved\":\"equation_3\",\"action\":\"modify\",\"edit_statement\":\"correct values\",\"reason\":\"impossible\"}"
          ],
          "letters": ["D", "B", "A", "C"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 3", "(3)"]
    }
  ],
  "8EaDOGMPUL": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 4,
          "image_id": "8EaDOGMPUL_4_b099ed6a",
          "bbox": {
            "x": 0.16766788765735421,
            "y": 0.09230416720030739,
            "width": 0.6625441696113074,
            "height": 0.2677595628415301
          }
        }
      ],
      "review_text": "Fig.4 caption: The figure shows only one face image as output, while the caption mentions the diffusion model generates 'six views of global full-body images and local face images'.",
      "category": "figure-caption",
      "description": "The caption states the diffusion model generates 'six views of global full-body images and local face images', but only two face views can be seen.",
      "mcq": {
        "binary_consistent": {
          "question": "Is the caption of the figure consistent with the content of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the caption of the figure inconsistent with the content of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"number of face images\",\"claim\":{\"source\":\"caption\",\"statement\":\"local face images\"},\"evidence\":{\"source\":\"Figure 4\",\"statement\":\"two face images\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"number of views\",\"claim\":{\"source\":\"caption\",\"statement\":\"six views\"},\"evidence\":{\"source\":\"diagram\",\"statement\":\"three views\"}}",
            "{\"letter\":\"C\",\"attribute\":\"output label\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be images\"},\"evidence\":{\"source\":\"diagram\",\"statement\":\"not images\"}}",
            "{\"letter\":\"A\",\"attribute\":\"number of stages\",\"claim\":{\"source\":\"caption\",\"statement\":\"two stages\"},\"evidence\":{\"source\":\"diagram\",\"statement\":\"three stages\"}}"
          ],
          "letters": ["D", "B", "C", "A"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"local face images count\",\"target\":\"figure_4_caption\",\"other_involved\":\"figure_4\",\"action\":\"modify\",\"edit_statement\":\"update image count\",\"reason\":\"mismatch\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"global full-body views count\",\"target\":\"figure_4_caption\",\"other_involved\":\"figure_4\",\"action\":\"modify\",\"edit_statement\":\"update view count\",\"reason\":\"mismatch\"}",
            "{\"letter\":\"C\",\"attribute\":\"multiview output description\",\"target\":\"figure_4_caption\",\"other_involved\":\"figure_4\",\"action\":\"add\",\"edit_statement\":\"add 'images' clarification\",\"reason\":\"missing\"}",
            "{\"letter\":\"A\",\"attribute\":\"stages number\",\"target\":\"figure_4_caption\",\"other_involved\":\"figure_4\",\"action\":\"modify\",\"edit_statement\":\"update stage count\",\"reason\":\"mismatch\"}"
          ],
          "letters": ["D", "B", "C", "A"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 4"]
    }
  ],
  "7vH8DO2oPk": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 5,
          "image_id": "7vH8DO2oPk_5_7a78d1f6",
          "bbox": {
            "x": 0.16943467210965105,
            "y": 0.3063752221279457,
            "width": 0.6643109540636042,
            "height": 0.19945355191256833
          }
        },
        {
          "type": "text",
          "page": 5,
          "content": "Furthermore, our results in Figure 4 (a) demonstrate that the MLP is indeed nearing convergence throughout the MEL.",
          "line": 269
        }
      ],
      "review_text": "Line 269: The text states 'Figure 4(a)...' but the figure shows the cosine similarity between MLPs, not the convergence of MLPs.",
      "category": "figure-text",
      "description": "Figure 4 (a) shows the cosine between MLPs, not the convergence throughout MEL as stated in the text.",
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {
          "question": "Furthermore, our results in Figure 4 (a) demonstrate that the MLP is indeed nearing convergence throughout the MEL.",
          "correct": "7vH8DO2oPk_5_7a78d1f6",
          "incorrect": [
            "7vH8DO2oPk_4_image_figure3",
            "7vH8DO2oPk_3_image_figure2",
            "7vH8DO2oPk_7_image_figure5"
          ],
          "letters": ["B", "C", "A", "D"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"label\",\"claim\":{\"source\":\"expectation\",\"statement\":\"label matches content\"},\"evidence\":{\"source\":\"figure_4(a)\",\"statement\":\"Cosine Between MLPs\"}}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"metric\",\"claim\":{\"source\":\"text\",\"statement\":\"Cosine Between MLPs\"},\"evidence\":{\"source\":\"figure_4(a)\",\"statement\":\"L2 Distance Between MLPs\"}}",
            "{\"letter\":\"B\",\"attribute\":\"dataset\",\"claim\":{\"source\":\"text\",\"statement\":\"convergence across both datasets\"},\"evidence\":{\"source\":\"figure_4(a)\",\"statement\":\"Amazon dataset only\"}}",
            "{\"letter\":\"A\",\"attribute\":\"epochs\",\"claim\":{\"source\":\"text\",\"statement\":\"convergence throughout MEL\"},\"evidence\":{\"source\":\"figure_4(a)\",\"statement\":\"data up to 15 epochs\"}}"
          ],
          "letters": ["D", "C", "B", "A"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"metric type\",\"target\":\"figure_4a\",\"other_involved\":\"text\",\"action\":\"replace\",\"edit_statement\":\"L2 Distance\",\"reason\":\"mismatch\"}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"metric name\",\"target\":\"text\",\"other_involved\":\"figure_4a\",\"action\":\"replace\",\"edit_statement\":\"L2 Distance\",\"reason\":\"incorrect\"}",
            "{\"letter\":\"B\",\"attribute\":\"datasets shown\",\"target\":\"figure_4a\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"include Taobao data\",\"reason\":\"incomplete\"}",
            "{\"letter\":\"A\",\"attribute\":\"epochs\",\"target\":\"figure_4a\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"show full range\",\"reason\":\"not fully depicted\"}"
          ],
          "letters": ["D", "C", "B", "A"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 4"]
    }
  ],
  "70lFRMBygi": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 14,
          "image_id": "70lFRMBygi_14_b6efbca9",
          "bbox": {
            "x": 0.17120145656194785,
            "y": 0.10132061067174694,
            "width": 0.6643109540636042,
            "height": 0.3265027322404372
          }
        }
      ],
      "review_text": "Figure 4: The content and description are very confusing. It's unclear whether it's KUL or KUL and DTU, and whether it's 0.1s or 1s. The well-performing baseline model results from Table 3 are not included in Figure 4.",
      "category": "figure-caption",
      "description": "The caption mentions a 1s decision window, but the title of the figure mentions a 0.1s decision window",
      "mcq": {
        "binary_consistent": {
          "question": "Is the caption of the figure consistent with the content of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the caption of the figure inconsistent with the content of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"decision window\",\"claim\":{\"source\":\"title\",\"statement\":\"0.1s\"},\"evidence\":{\"source\":\"caption\",\"statement\":\"1s\"}}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"decision window\",\"claim\":{\"source\":\"caption\",\"statement\":\"0.1s\"},\"evidence\":{\"source\":\"title\",\"statement\":\"1s\"}}",
            "{\"letter\":\"C\",\"attribute\":\"accuracy\",\"claim\":{\"source\":\"expectation\",\"statement\":\"below 100%\"},\"evidence\":{\"source\":\"Figure 4\",\"statement\":\"over 100%\"}}",
            "{\"letter\":\"A\",\"attribute\":\"models\",\"claim\":{\"source\":\"caption\",\"statement\":\"enumerated models\"},\"evidence\":{\"source\":\"x-axis\",\"statement\":\"not enumerated\"}}"
          ],
          "letters": ["B", "D", "C", "A"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"decision window\",\"target\":\"figure_4\",\"other_involved\":\"caption\",\"action\":\"modify\",\"edit_statement\":\"update to caption\",\"reason\":\"mismatch\"}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"decision window\",\"target\":\"caption\",\"other_involved\":\"figure_4\",\"action\":\"modify\",\"edit_statement\":\"update to title\",\"reason\":\"mismatch\"}",
            "{\"letter\":\"C\",\"attribute\":\"accuracies\",\"target\":\"figure_4\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"limit 100%\",\"reason\":\"limit above 100%\"}",
            "{\"letter\":\"A\",\"attribute\":\"models\",\"target\":\"caption\",\"other_involved\":\"figure_4\",\"action\":\"add\",\"edit_statement\":\"models enumerated\",\"reason\":\"missing\"}"
          ],
          "letters": ["B", "D", "C", "A"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 4"]
    }
  ],
  "6w2HEMxzq7": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 5,
          "image_id": "6w2HEMxzq7_5_aa2cdcf1",
          "bbox": {
            "x": 0.16943467210965105,
            "y": 0.11313750053364073,
            "width": 0.6625441696113074,
            "height": 0.2978142076502732
          }
        }
      ],
      "review_text": "Figure 2 step 1: There seems to be no change in the graph before and after denoising, is it a mistake or done on purpose?",
      "category": "figure-only",
      "description": "After the denoising step in Figure 2, the frequency of noise and normal changed, but the graph looks the same.",
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the figure that is consistent with a different part of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the figure that is inconsistent with a different part of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"graphical representation\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should change\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"remains the same\"}}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"frequency sum\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be 100%\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"not 100%\"}}",
            "{\"letter\":\"B\",\"attribute\":\"noise percentage\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should decrease\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"increases\"}}",
            "{\"letter\":\"D\",\"attribute\":\"Denoising Step application\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should apply to Denoised Encoder\"},\"evidence\":{\"source\":\"Figure 2\",\"statement\":\"applies to Matching Encoder\"}}"
          ],
          "letters": ["A", "C", "B", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"nodes\",\"target\":\"figure_2\",\"other_involved\":\"bar charts\",\"action\":\"modify\",\"edit_statement\":\"update connections\",\"reason\":\"identical representation\"}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"numerical values\",\"target\":\"bar charts\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"sum to 100%\",\"reason\":\"incorrect sum\"}",
            "{\"letter\":\"B\",\"attribute\":\"noise percentage\",\"target\":\"bar chart\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"reduce noise\",\"reason\":\"incorrect increase\"}",
            "{\"letter\":\"D\",\"attribute\":\"denoising step\",\"target\":\"figure_2\",\"other_involved\":\"matching encoder, denoised encoder\",\"action\":\"reposition\",\"edit_statement\":\"apply to denoised encoder\",\"reason\":\"incorrect application\"}"
          ],
          "letters": ["A", "C", "B", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 2"]
    }
  ],
  "6D30aOdh2U": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 9,
          "image_id": "6D30aOdh2U_9_b67f9853",
          "bbox": {
            "x": 0.1659011032050574,
            "y": 0.09353370874957309,
            "width": 0.6731448763250883,
            "height": 0.34016393442622955
          }
        }
      ],
      "review_text": "Figure 5: The proposed method's results are worse than NADA's in representing certain styles (e.g., green color + wooden texture, exaggerated nose and ears).",
      "category": "figure-caption",
      "description": "The caption claims that the proposed method maintains robust consistency with the source domain, but the generated images show different characters than the source image.",
      "mcq": {
        "binary_consistent": {
          "question": "Is the caption of the figure consistent with the content of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the caption of the figure inconsistent with the content of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"consistency\",\"claim\":{\"source\":\"caption\",\"statement\":\"maintains robust consistency\"},\"evidence\":{\"source\":\"figure_10\",\"statement\":\"change identity\"}}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"diversity\",\"claim\":{\"source\":\"caption\",\"statement\":\"lose diversity\"},\"evidence\":{\"source\":\"figure_10\",\"statement\":\"diverse expressions\"}}",
            "{\"letter\":\"A\",\"attribute\":\"consistency\",\"claim\":{\"source\":\"text\",\"statement\":\"maintains robust consistency\"},\"evidence\":{\"source\":\"figure_10\",\"statement\":\"inconsistent shades and textures\"}}",
            "{\"letter\":\"B\",\"attribute\":\"overfitting\",\"claim\":{\"source\":\"caption\",\"statement\":\"tends to overfit\"},\"evidence\":{\"source\":\"figure_10\",\"statement\":\"diverse characters\"}}"
          ],
          "letters": ["D", "C", "A", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"identity\",\"target\":\"figure_10\",\"other_involved\":\"UniHDA description\",\"action\":\"modify\",\"edit_statement\":\"character identity\",\"reason\":\"inconsistent\"}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"diversity description\",\"target\":\"figure_10_caption\",\"other_involved\":\"figure_10\",\"action\":\"modify\",\"edit_statement\":\"diversity description\",\"reason\":\"contradicted\"}",
            "{\"letter\":\"A\",\"attribute\":\"red hair images\",\"target\":\"figure_10\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"consistency red hair images\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"B\",\"attribute\":\"overfitting description\",\"target\":\"figure_10_caption\",\"other_involved\":\"figure_10\",\"action\":\"modify\",\"edit_statement\":\"overfitting description\",\"reason\":\"contradicted\"}"
          ],
          "letters": ["D", "C", "A", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 10"]
    }
  ],
  "5nldnvvHfw": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 5,
          "image_id": "5nldnvvHfw_5_af44d6d9",
          "bbox": {
            "x": 0.25070675691530475,
            "y": 0.7931238393314549,
            "width": 0.5848056537102473,
            "height": 0.05327868852459017
          }
        },
        {
          "type": "text",
          "page": 5,
          "content": "It is shown that the regret of AdamE is upper bounded by O(\u221aT ), which is similar to Adam and its variants.",
          "line": 266
        }
      ],
      "review_text": "Theorem 2.2: The second term of formula (14) is actually of order O(T) instead of O(\u221aT), contradicting the claimed O(\u221aT) regret bound.",
      "category": "equation-text",
      "description": "The text claims the complexity is O($\\sqrt{T}$) for the regret, but Equation (14) shows complexity O(T).",
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the equation consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the equation inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "It is shown that the regret of AdamE is upper bounded by O(\u221aT ), which is similar to Adam and its variants.",
          "correct": "5nldnvvHfw_5_af44d6d9",
          "incorrect": [
            "5nldnvvHfw_4_interline-equation_equation12.5",
            "5nldnvvHfw_6_interline-equation_equation19",
            "5nldnvvHfw_6_interline-equation_equation24"
          ],
          "letters": ["B", "D", "A", "C"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"regret complexity\",\"claim\":{\"source\":\"text\",\"statement\":\"O($\\\\sqrt{T}$)\"},\"evidence\":{\"source\":\"(14)\",\"statement\":\"O(T)\"}}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"regret complexity\",\"claim\":{\"source\":\"text\",\"statement\":\"O($\\\\sqrt{T}$)\"},\"evidence\":{\"source\":\"(14)\",\"statement\":\"quadratic\"}}",
            "{\"letter\":\"D\",\"attribute\":\"regret complexity\",\"claim\":{\"source\":\"text\",\"statement\":\"O($\\\\sqrt{T}$)\"},\"evidence\":{\"source\":\"(14)\",\"statement\":\"O(n log n)\"}}",
            "{\"letter\":\"B\",\"attribute\":\"regret complexity\",\"claim\":{\"source\":\"expectation\",\"statement\":\"O($\\\\sqrt{T}$)\"},\"evidence\":{\"source\":\"(14)\",\"statement\":\"O(T^3)\"}}"
          ],
          "letters": ["A", "C", "D", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"regret bound\",\"target\":\"equation_14\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"update complexity\",\"reason\":\"different\"}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"regret complexity\",\"target\":\"equation_14\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"update complexity\",\"reason\":\"different\"}",
            "{\"letter\":\"D\",\"attribute\":\"regret complexity\",\"target\":\"equation_14\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"update complexity\",\"reason\":\"different\"}",
            "{\"letter\":\"B\",\"attribute\":\"complexity\",\"target\":\"equation_14\",\"other_involved\":\"text\",\"action\":\"add\",\"edit_statement\":\"missing terms\",\"reason\":\"inconsistent\"}"
          ],
          "letters": ["A", "C", "D", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["(14)"]
    }
  ],
  "5dDYhvt6dY": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 2,
          "image_id": "5dDYhvt6dY_2_20156829",
          "bbox": {
            "x": 0.1659011032050574,
            "y": 0.09968124973317966,
            "width": 0.6643109540636042,
            "height": 0.505464480874317
          }
        },
        {
          "type": "text",
          "page": 1,
          "content": "We present three modifications to enhance the efficiency and performance of the encoder-decoder Transformer architecture. Firstly, we reason that the addition of the token and positional embedding matrix may cause loss of information. To address this, we concatenate the token and positional embedding matrices before the initial encoder and decoder blocks, as shown in Figure 1 (b). Second, we normalize the token embedding matrix across tokens, as shown in Figure 1 (c).",
          "line": 38
        }
      ],
      "review_text": "Figure 1 (b) and 1 (c) have been wrongly reversed in L41-42, which contradicts the intended order of presentation.",
      "category": "figure-text",
      "description": "The reference in the text to Figure 1 (b) and Figure 1 (c) should be flipped, as text 1(b) is (c) in Figure 1 and vice versa.",
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {
          "question": "We present three modifications to enhance the efficiency and performance of the encoder-decoder Transformer architecture. Firstly, we reason that the addition of the token and positional embedding matrix may cause loss of information. To address this, we concatenate the token and positional embedding matrices before the initial encoder and decoder blocks, as shown in Figure 1 (b). Second, we normalize the token embedding matrix across tokens, as shown in Figure 1 (c).",
          "correct": "5dDYhvt6dY_2_20156829",
          "incorrect": [
            "5dDYhvt6dY_2_image_figure2",
            "5dDYhvt6dY_6_image_figure3",
            "5dDYhvt6dY_2_interline-equation_equation18"
          ],
          "letters": ["A", "B", "C", "D"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"labeling\",\"claim\":{\"source\":\"expectation\",\"statement\":\"consistent labeling\"},\"evidence\":{\"source\":\"Figure 1\",\"statement\":\"inconsistent labeling\"}}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"modifications\",\"claim\":{\"source\":\"text\",\"statement\":\"three modifications\"},\"evidence\":{\"source\":\"Figure 1\",\"statement\":\"two diagrams\"}}",
            "{\"letter\":\"A\",\"attribute\":\"representation\",\"claim\":{\"source\":\"text\",\"statement\":\"two separate matrices\"},\"evidence\":{\"source\":\"Figure 1(a)\",\"statement\":\"single matrix\"}}",
            "{\"letter\":\"B\",\"attribute\":\"concatenation process\",\"claim\":{\"source\":\"text\",\"statement\":\"before initial blocks\"},\"evidence\":{\"source\":\"Figure 1(b)\",\"statement\":\"after initial blocks\"}}"
          ],
          "letters": ["D", "C", "A", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"concatenation and normalization\",\"target\":\"figure_1\",\"other_involved\":\"text\",\"action\":\"replace\",\"edit_statement\":\"swap labels\",\"reason\":\"reversed\"}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"modifications\",\"target\":\"text\",\"other_involved\":\"figure_1\",\"action\":\"modify\",\"edit_statement\":\"include third modification\",\"reason\":\"missing\"}",
            "{\"letter\":\"A\",\"attribute\":\"matrices\",\"target\":\"figure_1a\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"separate matrices\",\"reason\":\"merged\"}",
            "{\"letter\":\"B\",\"attribute\":\"concatenation process\",\"target\":\"figure_1b\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"reposition concatenation\",\"reason\":\"misplaced\"}"
          ],
          "letters": ["D", "C", "A", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 1"]
    }
  ],
  "4y6Q98hJzr": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 17,
          "image_id": "4y6Q98hJzr_17_8de20f47",
          "bbox": {
            "x": 0.17120145656194785,
            "y": 0.4525500855159239,
            "width": 0.6572438162544169,
            "height": 0.24590163934426232
          }
        }
      ],
      "review_text": "Figure 6b: The caption does not seem to be correct. The figure seems to show accuracy during law continual pretraining, while the caption is about relative parameter updates during the medical continual pretraining process.",
      "category": "figure-caption",
      "description": "The caption states (b) shows the relative parameter update during the pre-training process, but (b) sees to show accuracy\n",
      "mcq": {
        "binary_consistent": {
          "question": "Is the caption of the figure consistent with the content of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is the caption of the figure inconsistent with the content of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"plot label\",\"claim\":{\"source\":\"caption\",\"statement\":\"relative parameter update\"},\"evidence\":{\"source\":\"plot\",\"statement\":\"shows performance\"}}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"plot content\",\"claim\":{\"source\":\"caption\",\"statement\":\"depicts performance\"},\"evidence\":{\"source\":\"plot\",\"statement\":\"shows relative parameter updates\"}}",
            "{\"letter\":\"A\",\"attribute\":\"x-axis label\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should specify total tokens\"},\"evidence\":{\"source\":\"caption\",\"statement\":\"does not specify total tokens\"}}",
            "{\"letter\":\"B\",\"attribute\":\"baseline\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should match caption\"},\"evidence\":{\"source\":\"caption and legend\",\"statement\":\"do not match\"}}"
          ],
          "letters": ["C", "D", "A", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"caption content\",\"target\":\"caption\",\"other_involved\":\"figure_6b\",\"action\":\"modify\",\"edit_statement\":\"align description\",\"reason\":\"inconsistent\"}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"caption content\",\"target\":\"caption\",\"other_involved\":\"figure_6b\",\"action\":\"modify\",\"edit_statement\":\"align description\",\"reason\":\"incorrect\"}",
            "{\"letter\":\"A\",\"attribute\":\"tokens\",\"target\":\"figure_6b\",\"other_involved\":\"caption\",\"action\":\"add\",\"edit_statement\":\"total number\",\"reason\":\"missing\"}",
            "{\"letter\":\"B\",\"attribute\":\"baseline\",\"target\":\"caption\",\"other_involved\":\"figure_6b\",\"action\":\"modify\",\"edit_statement\":\"update description\",\"reason\":\"different\"}"
          ],
          "letters": ["C", "D", "A", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 6"]
    }
  ],
  "4vm6Nn2DW9": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 7,
          "image_id": "4vm6Nn2DW9_7_1a04e9b5",
          "bbox": {
            "x": 0.16943467210965105,
            "y": 0.09879323823855876,
            "width": 0.6607773851590105,
            "height": 0.27868852459016397
          }
        }
      ],
      "review_text": "Figure 1 and Figure 8: Both figures report results on 5 datasets, but the figure caption and paper introduce there are only 4 datasets instead of 5.",
      "category": "figure-caption",
      "description": "The caption states there are four datasets, but the Figure shows 5 datasets",
      "mcq": {
        "binary_consistent": {
          "question": "Is the caption of the figure consistent with the content of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the caption of the figure inconsistent with the content of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"number of datasets\",\"claim\":{\"source\":\"caption\",\"statement\":\"four datasets\"},\"evidence\":{\"source\":\"figure_1\",\"statement\":\"five datasets\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"number of methods\",\"claim\":{\"source\":\"caption\",\"statement\":\"4 methods\"},\"evidence\":{\"source\":\"legend\",\"statement\":\"8 methods\"}}",
            "{\"letter\":\"D\",\"attribute\":\"x-axis label\",\"claim\":{\"source\":\"expectation\",\"statement\":\"match caption\"},\"evidence\":{\"source\":\"caption\",\"statement\":\"pixel-level missing\"}}",
            "{\"letter\":\"A\",\"attribute\":\"missing data conditions\",\"claim\":{\"source\":\"caption\",\"statement\":\"pixel-level missing\"},\"evidence\":{\"source\":\"figure\",\"statement\":\"MAR and MNAR\"}}"
          ],
          "letters": ["C", "B", "D", "A"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"number of datasets\",\"target\":\"caption\",\"other_involved\":\"figure_1\",\"action\":\"modify\",\"edit_statement\":\"update number\",\"reason\":\"inconsistent\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"number of methods\",\"target\":\"caption\",\"other_involved\":\"legend\",\"action\":\"modify\",\"edit_statement\":\"update number\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"D\",\"attribute\":\"missing data description\",\"target\":\"caption\",\"other_involved\":\"figure_1 x-axis\",\"action\":\"modify\",\"edit_statement\":\"align terminology\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"A\",\"attribute\":\"missing data conditions\",\"target\":\"caption\",\"other_involved\":\"figure_1\",\"action\":\"modify\",\"edit_statement\":\"name conditions\",\"reason\":\"not specified\"}"
          ],
          "letters": ["C", "B", "D", "A"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 1"]
    }
  ],
  "4WsHgA8EG1": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 7,
          "image_id": "4WsHgA8EG1_7_34091f12",
          "bbox": {
            "x": 0.17120145656194785,
            "y": 0.09162110709101777,
            "width": 0.6607773851590105,
            "height": 0.24590163934426232
          }
        }
      ],
      "review_text": "Table 3: Misuse of bold texts, some are not best results, which contradicts the claim of presenting the best results.",
      "category": "table-caption",
      "description": "The caption claims the best result is bolded, but sometimes the best result is not bolded",
      "mcq": {
        "binary_consistent": {
          "question": "Is the caption of the table consistent with the content of the table?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the caption of the table inconsistent with the content of the table?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"bolding\",\"claim\":{\"source\":\"caption\",\"statement\":\"best results are bolded\"},\"evidence\":{\"source\":\"table_3\",\"statement\":\"best results not bolded\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"underlining\",\"claim\":{\"source\":\"expectation\",\"statement\":\"represent best results\"},\"evidence\":{\"source\":\"table_3\",\"statement\":\"not highest value\"}}",
            "{\"letter\":\"A\",\"attribute\":\"bolding\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be clear\"},\"evidence\":{\"source\":\"caption\",\"statement\":\"ambiguous definition\"}}",
            "{\"letter\":\"C\",\"attribute\":\"bolding\",\"claim\":{\"source\":\"expectation\",\"statement\":\"consistent application\"},\"evidence\":{\"source\":\"table_3\",\"statement\":\"inconsistent between backbones\"}}"
          ],
          "letters": ["D", "B", "A", "C"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"bolding\",\"target\":\"table_3\",\"other_involved\":\"table_3_caption\",\"action\":\"add\",\"edit_statement\":\"missing bolding\",\"reason\":\"contradiction\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"underlining\",\"target\":\"table_3\",\"other_involved\":null,\"action\":\"remove\",\"edit_statement\":\"underlining\",\"reason\":\"misrepresentation\"}",
            "{\"letter\":\"A\",\"attribute\":\"definition\",\"target\":\"table_3_caption\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"clarify best results\",\"reason\":\"ambiguous\"}",
            "{\"letter\":\"C\",\"attribute\":\"bolding\",\"target\":\"table_3\",\"other_involved\":\"miniGPT4,BLIP2-OPT\",\"action\":\"modify\",\"edit_statement\":\"bolding application\",\"reason\":\"inconsistent\"}"
          ],
          "letters": ["D", "B", "A", "C"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 3"]
    }
  ],
  "3f8556SIEn": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 5,
          "image_id": "3f8556SIEn_5_f998c26e",
          "bbox": {
            "x": 0.170873786407767,
            "y": 0.08360898584351505,
            "width": 0.6640776699029126,
            "height": 0.43759398496240604
          }
        },
        {
          "type": "image",
          "page": 5,
          "image_id": "3f8556SIEn_5_0551acc8",
          "bbox": {
            "x": 0.3009708737864078,
            "y": 0.6140601136630639,
            "width": 0.5320388349514563,
            "height": 0.02706766917293233
          }
        }
      ],
      "review_text": "Figure 2: inconsistenct notation $M_{\\textrm{tgt}}$ vs. notation in text $M^{\\textrm{tgt}}$.",
      "category": "figure-equation",
      "description": "The notation $M_{\\textrm{tgt}}$ in Figure 2 is inconsistent to the notation in text $M^{\\textrm{tgt}}$.",
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the figure consistent with the content of the equation?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the figure inconsistent with the content of the equation?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {
          "question": "3f8556SIEn_5_f998c26e",
          "correct": "3f8556SIEn_5_0551acc8",
          "incorrect": [
            "3f8556SIEn_4_interline-equation_equation11",
            "3f8556SIEn_4_interline-equation_equation15.5",
            "3f8556SIEn_4_interline-equation_equation12"
          ],
          "letters": ["A", "C", "D", "B"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"notation\",\"claim\":{\"source\":\"figure_2\",\"statement\":\"M_tgt\"},\"evidence\":{\"source\":\"equation_3\",\"statement\":\"M^tgt\"}}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"label\",\"claim\":{\"source\":\"expectation\",\"statement\":\"have corresponding term\"},\"evidence\":{\"source\":\"equation_3\",\"statement\":\"no corresponding term\"}}",
            "{\"letter\":\"B\",\"attribute\":\"label\",\"claim\":{\"source\":\"expectation\",\"statement\":\"count towards threshold\"},\"evidence\":{\"source\":\"figure_2\",\"statement\":\"does not count\"}}",
            "{\"letter\":\"D\",\"attribute\":\"context\",\"claim\":{\"source\":\"expectation\",\"statement\":\"same context\"},\"evidence\":{\"source\":\"figure_2\",\"statement\":\"Target Branch\"}}"
          ],
          "letters": ["A", "C", "B", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"notation\",\"target\":\"figure_2\",\"other_involved\":\"equation_3\",\"action\":\"modify\",\"edit_statement\":\"M_tgt notation\",\"reason\":\"inconsistent\"}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"'rapid violin music' label\",\"target\":\"figure_2\",\"other_involved\":\"equation_3\",\"action\":\"add\",\"edit_statement\":\"variable\",\"reason\":\"missing\"}",
            "{\"letter\":\"B\",\"attribute\":\"'soft guitar music' label\",\"target\":\"figure_2\",\"other_involved\":\"threshold\",\"action\":\"add\",\"edit_statement\":\"threshold\",\"reason\":\"missing\"}",
            "{\"letter\":\"D\",\"attribute\":\"branch name\",\"target\":\"figure_2\",\"other_involved\":\"equation_3\",\"action\":\"modify\",\"edit_statement\":\"m_tgt\",\"reason\":\"inconsistent\"}"
          ],
          "letters": ["A", "C", "B", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 2", "(3)"]
    }
  ],
  "3Q7y9No9VF": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 8,
          "image_id": "3Q7y9No9VF_8_04e66270",
          "bbox": {
            "x": 0.14368932038834953,
            "y": 0.17533835504288064,
            "width": 0.7165048543689321,
            "height": 0.4481203007518797
          }
        }
      ],
      "review_text": "Table 1: The paper claims to use three real-world datasets, but only two are included in the experiments.",
      "category": "table-caption",
      "description": "The caption mentions three real-world dataset, but the table only shows two",
      "mcq": {
        "binary_consistent": {
          "question": "Is the caption of the table consistent with the content of the table?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the caption of the table inconsistent with the content of the table?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"number of datasets\",\"claim\":{\"source\":\"caption\",\"statement\":\"three datasets\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"two datasets\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"number of models\",\"claim\":{\"source\":\"caption\",\"statement\":\"14 baseline models\"},\"evidence\":{\"source\":\"Table 1\",\"statement\":\"13 models\"}}",
            "{\"letter\":\"D\",\"attribute\":\"promotion description\",\"claim\":{\"source\":\"expectation\",\"statement\":\"improvement margin\"},\"evidence\":{\"source\":\"values\",\"statement\":\"absolute performance\"}}",
            "{\"letter\":\"B\",\"attribute\":\"formatting consistency\",\"claim\":{\"source\":\"expectation\",\"statement\":\"consistent formatting\"},\"evidence\":{\"source\":\"table\",\"statement\":\"inconsistent formatting\"}}"
          ],
          "letters": ["C", "A", "D", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"datasets\",\"target\":\"caption\",\"other_involved\":\"table_1\",\"action\":\"modify\",\"edit_statement\":\"count\",\"reason\":\"mismatch\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"baseline models\",\"target\":\"caption\",\"other_involved\":\"table_1\",\"action\":\"modify\",\"edit_statement\":\"count\",\"reason\":\"mismatch\"}",
            "{\"letter\":\"D\",\"attribute\":\"promotion description\",\"target\":\"text\",\"other_involved\":\"table_1\",\"action\":\"modify\",\"edit_statement\":\"align values\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"B\",\"attribute\":\"style conventions\",\"target\":\"table_1\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"apply consistently\",\"reason\":\"inconsistent\"}"
          ],
          "letters": ["C", "A", "D", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 1"]
    }
  ],
  "3MDmM0rMPQ": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 30,
          "image_id": "3MDmM0rMPQ_30_133d7fec",
          "bbox": {
            "x": 0.170873786407767,
            "y": 0.10165414451656486,
            "width": 0.6601941747572816,
            "height": 0.34285714285714286
          }
        }
      ],
      "review_text": "Figure 6: The error bars fall below 0, which contradicts the statement that the value for 'Unique successful jailbreaks' should be greater than 0.",
      "category": "figure-only",
      "description": "The Figure is supposed to show success rate, but the error bars go below 0, which should not be possible.",
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the figure that is consistent with a different part of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the figure that is inconsistent with a different part of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"error bars\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be non-negative\"},\"evidence\":{\"source\":\"figure_6\",\"statement\":\"extend below 0\"}}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"legend\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be clear\"},\"evidence\":{\"source\":\"figure_6\",\"statement\":\"not clear\"}}",
            "{\"letter\":\"B\",\"attribute\":\"y-axis range\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should fit data\"},\"evidence\":{\"source\":\"figure_6\",\"statement\":\"too narrow\"}}",
            "{\"letter\":\"A\",\"attribute\":\"x-axis labels\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be readable\"},\"evidence\":{\"source\":\"figure_6\",\"statement\":\"overlap\"}}"
          ],
          "letters": ["C", "D", "B", "A"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"error bars\",\"target\":\"figure_6\",\"other_involved\":\"y-axis\",\"action\":\"modify\",\"edit_statement\":\"adjust position\",\"reason\":\"negative value\"}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"legend\",\"target\":\"figure_6\",\"other_involved\":null,\"action\":\"add\",\"edit_statement\":\"add legend\",\"reason\":\"missing\"}",
            "{\"letter\":\"B\",\"attribute\":\"y-axis range\",\"target\":\"figure_6\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"expand range\",\"reason\":\"narrow\"}",
            "{\"letter\":\"A\",\"attribute\":\"x-axis labels\",\"target\":\"figure_6\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"adjust labels rotation\",\"reason\":\"overlap\"}"
          ],
          "letters": ["C", "D", "B", "A"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 6"]
    }
  ],
  "3JfvvuPXsH": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 9,
          "image_id": "3JfvvuPXsH_9_e9d582c1",
          "bbox": {
            "x": 0.17281553398058255,
            "y": 0.09563906103148496,
            "width": 0.6582524271844661,
            "height": 0.28721804511278193
          }
        }
      ],
      "review_text": "Table 2: The proposed method is underperformed by significantly faster alternatives in many metrics.",
      "category": "table-only",
      "description": "The PointRecon proposed method is said to be an 'online' method, but a latency of 618 ms per frame can't be considered online",
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the table that is consistent with a different part of the table?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the table that is inconsistent with a different part of the table?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"latency\",\"claim\":{\"source\":\"expectation\",\"statement\":\"low latency\"},\"evidence\":{\"source\":\"table_1\",\"statement\":\"618 ms\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"classification\",\"claim\":{\"source\":\"text\",\"statement\":\"online method\"},\"evidence\":{\"source\":\"text\",\"statement\":\"non-volumetric\"}}",
            "{\"letter\":\"C\",\"attribute\":\"F-Score\",\"claim\":{\"source\":\"expectation\",\"statement\":\"higher than offline\"},\"evidence\":{\"source\":\"table_1\",\"statement\":\"lower than offline\"}}",
            "{\"letter\":\"D\",\"attribute\":\"resolution\",\"claim\":{\"source\":\"text\",\"statement\":\"2cm\"},\"evidence\":{\"source\":\"table_1\",\"statement\":\"4cm\"}}"
          ],
          "letters": ["B", "A", "C", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"latency\",\"target\":\"PointRecon\",\"other_involved\":\"'Online' methods\",\"action\":\"modify\",\"edit_statement\":\"align classification\",\"reason\":\"higher latency\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"characteristic\",\"target\":\"PointRecon's 'Non-Volumetric' characteristic\",\"other_involved\":\"classification 'Online'\",\"action\":\"modify\",\"edit_statement\":\"align classification\",\"reason\":\"contradiction\"}",
            "{\"letter\":\"C\",\"attribute\":\"F-Score\",\"target\":\"PointRecon\",\"other_involved\":\"'Online' methods\",\"action\":\"modify\",\"edit_statement\":\"align classification\",\"reason\":\"lower score\"}",
            "{\"letter\":\"D\",\"attribute\":\"resolution\",\"target\":\"description\",\"other_involved\":\"table\",\"action\":\"add\",\"edit_statement\":\"latency information\",\"reason\":\"missing data\"}"
          ],
          "letters": ["B", "A", "C", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 1"]
    }
  ],
  "324fOKW1wO": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 7,
          "image_id": "324fOKW1wO_7_62e949a2",
          "bbox": {
            "x": 0.16699029126213594,
            "y": 0.09789474602032426,
            "width": 0.6699029126213593,
            "height": 0.29774436090225564
          }
        },
        {
          "type": "text",
          "page": 1,
          "content": "SimDT exhibits 41% reduction in collision rate and 18% improvement in reaching the destination compared with the baseline method.",
          "line": 24
        }
      ],
      "review_text": "Table 1: The performance improvements claimed in the abstract and main text (45.2% for Off-Road Rate and 41% for Collision Rate) are not reflected in the table, where the improvements are much more modest (e.g., ~0.2% for Off-Road Rate and about 2% for Collision Rate).",
      "category": "table-text",
      "description": "The text in the abstract claims improvements over the baseline that can't be validated with Table 1",
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the table consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the table inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "SimDT exhibits 41% reduction in collision rate and 18% improvement in reaching the destination compared with the baseline method.",
          "correct": "324fOKW1wO_7_62e949a2",
          "incorrect": [
            "324fOKW1wO_7_image_figure3",
            "324fOKW1wO_8_image_figure5",
            "324fOKW1wO_8_image_figure4"
          ],
          "letters": ["D", "A", "C", "B"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"Route Progress Ratio\",\"claim\":{\"source\":\"text\",\"statement\":\"18% improvement\"},\"evidence\":{\"source\":\"table_1\",\"statement\":\"invalidates claim\"}}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"collision rate\",\"claim\":{\"source\":\"text\",\"statement\":\"41% reduction\"},\"evidence\":{\"source\":\"table_1\",\"statement\":\"56% reduction\"}}",
            "{\"letter\":\"A\",\"attribute\":\"data sufficiency\",\"claim\":{\"source\":\"expectation\",\"statement\":\"validate claims\"},\"evidence\":{\"source\":\"table_1\",\"statement\":\"insufficient data\"}}",
            "{\"letter\":\"D\",\"attribute\":\"Kinematic Infeasibility\",\"claim\":{\"source\":\"expectation\",\"statement\":\"consistent with improvements\"},\"evidence\":{\"source\":\"table_1\",\"statement\":\"inconsistent\"}}"
          ],
          "letters": ["B", "C", "A", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"improvement in Route Progress Ratio\",\"target\":\"text\",\"other_involved\":\"table_1\",\"action\":\"modify\",\"edit_statement\":\"change percentage\",\"reason\":\"inconsistent\"}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"reduction in collision rate\",\"target\":\"text\",\"other_involved\":\"table_1\",\"action\":\"modify\",\"edit_statement\":\"change percentage\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"A\",\"attribute\":\"data\",\"target\":\"table_1\",\"other_involved\":null,\"action\":\"add\",\"edit_statement\":\"data for validation\",\"reason\":\"insufficient\"}",
            "{\"letter\":\"D\",\"attribute\":\"kinematic_infeasibility\",\"target\":\"simDT\",\"other_involved\":\"improvements\",\"action\":\"modify\",\"edit_statement\":\"update value\",\"reason\":\"inconsistent\"}"
          ],
          "letters": ["B", "C", "A", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 1"]
    }
  ],
  "2orBSi7pvi": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 8,
          "image_id": "2orBSi7pvi_8_4a3637bf",
          "bbox": {
            "x": 0.1766990291262136,
            "y": 0.2377443241893797,
            "width": 0.6621359223300971,
            "height": 0.18195488721804512
          }
        }
      ],
      "review_text": "Table 2 caption: The caption mentions MG-TSD, but this method is not shown in the table content.",
      "category": "table-caption",
      "description": "The caption of the table mentions results for MG-TSD, but the Table does not show results for that method",
      "mcq": {
        "binary_consistent": {
          "question": "Is the caption of the table consistent with the content of the table?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the caption of the table inconsistent with the content of the table?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"models compared\",\"claim\":{\"source\":\"caption\",\"statement\":\"MG-TSD and TimeGrad\"},\"evidence\":{\"source\":\"table_2\",\"statement\":\"TimeGrad only\"}}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"bolded values\",\"claim\":{\"source\":\"table_2\",\"statement\":\"smaller is better\"},\"evidence\":{\"source\":\"table_2\",\"statement\":\"larger bolded\"}}",
            "{\"letter\":\"A\",\"attribute\":\"metrics\",\"claim\":{\"source\":\"caption\",\"statement\":\"CRPSsum and NRMSEsum\"},\"evidence\":{\"source\":\"table_2\",\"statement\":\"one metric\"}}",
            "{\"letter\":\"C\",\"attribute\":\"degradation processes\",\"claim\":{\"source\":\"caption\",\"statement\":\"-DDPM and -STDM\"},\"evidence\":{\"source\":\"table_2\",\"statement\":\"-DDPM\"}}"
          ],
          "letters": ["D", "B", "A", "C"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"results\",\"target\":\"table_2\",\"other_involved\":\"caption_table_2\",\"action\":\"add\",\"edit_statement\":\"MG-TSD results\",\"reason\":\"not present\"}",
          "incorrect": [
            "{\"letter\":\"B\",\"attribute\":\"values\",\"target\":\"table_2\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"bolding of values\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"A\",\"attribute\":\"metrics\",\"target\":\"table_2\",\"other_involved\":\"caption_table_2\",\"action\":\"add\",\"edit_statement\":\"NRMSEsum values\",\"reason\":\"missing\"}",
            "{\"letter\":\"C\",\"attribute\":\"degradation process\",\"target\":\"table_2\",\"other_involved\":\"caption_table_2\",\"action\":\"add\",\"edit_statement\":\"STDM data\",\"reason\":\"missing\"}"
          ],
          "letters": ["D", "B", "A", "C"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 2"]
    }
  ],
  "1eI236MqEA": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 5,
          "image_id": "1eI236MqEA_5_28ec8f7c",
          "bbox": {
            "x": 0.17281553398058255,
            "y": 0.09413530163298874,
            "width": 0.6621359223300971,
            "height": 0.14887218045112782
          }
        }
      ],
      "review_text": "Fig. 3a and Fig. 3b: The labels 'm1-v1' and 'm2-v2' do not match, indicating a contradiction in the presented data.",
      "category": "figure-only",
      "description": "The area for M_1 and M_2 in Figure 3 (b) does not match the areas for V1 and V2 and it seems the color is inverted",
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the figure that is consistent with a different part of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the figure that is inconsistent with a different part of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"area and color\",\"claim\":{\"source\":\"expectation\",\"statement\":\"M2 matches V2\"},\"evidence\":{\"source\":\"Figure 3\",\"statement\":\"M2 doesn't match V2 and color mismatch\"}}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"color\",\"claim\":{\"source\":\"expectation\",\"statement\":\"M1 M2 red green\"},\"evidence\":{\"source\":\"Figure 3(b)\",\"statement\":\"M1 M2 blue\"}}",
            "{\"letter\":\"A\",\"attribute\":\"area\",\"claim\":{\"source\":\"caption\",\"statement\":\"M1 smaller than V1\"},\"evidence\":{\"source\":\"Figure 3\",\"statement\":\"M1 not smaller\"}}",
            "{\"letter\":\"B\",\"attribute\":\"color\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be transparent\"},\"evidence\":{\"source\":\"Figure 3(c)\",\"statement\":\"gray\"}}"
          ],
          "letters": ["C", "D", "A", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"M2 region area\",\"target\":\"figure_3b\",\"other_involved\":\"V2 Mask green region in figure_3c\",\"action\":\"modify\",\"edit_statement\":\"match area and color\",\"reason\":\"does not match\"}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"M1 and M2 regions fill color\",\"target\":\"figure_3b\",\"other_involved\":\"V1 and V2 concept masks in figure_3c\",\"action\":\"modify\",\"edit_statement\":\"match color\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"A\",\"attribute\":\"M1 region area\",\"target\":\"figure_3b\",\"other_involved\":\"V1 Mask red region in figure_3c\",\"action\":\"modify\",\"edit_statement\":\"match area\",\"reason\":\"smaller\"}",
            "{\"letter\":\"B\",\"attribute\":\"gray areas\",\"target\":\"figure_3c\",\"other_involved\":\"caption\",\"action\":\"replace\",\"edit_statement\":\"transparent\",\"reason\":\"incorrectly colored\"}"
          ],
          "letters": ["C", "D", "A", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 3"]
    }
  ],
  "0zZEbHLTwf": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 9,
          "image_id": "0zZEbHLTwf_9_e25fa749",
          "bbox": {
            "x": 0.11456310679611652,
            "y": 0.40390973772321426,
            "width": 0.7786407766990291,
            "height": 0.24661654135338346
          }
        }
      ],
      "review_text": "Fig. 5: The lines start from different initial points, suggesting the authors used different initializations for comparison, which is unfair.",
      "category": "figure-only",
      "description": "The figure should claim that DeepFDM trains to a low error in less epochs, but the initialization is different, making the comparison unfair",
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the figure that is consistent with a different part of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the figure that is inconsistent with a different part of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"initialization\",\"claim\":{\"source\":\"expectation\",\"statement\":\"uniform initialization\"},\"evidence\":{\"source\":\"figure_5\",\"statement\":\"DeepFDM has lower initial MSE\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"parameters\",\"claim\":{\"source\":\"expectation\",\"statement\":\"fewer parameters is advantageous\"},\"evidence\":{\"source\":\"expectation\",\"statement\":\"not necessarily advantageous\"}}",
            "{\"letter\":\"D\",\"attribute\":\"stability\",\"claim\":{\"source\":\"expectation\",\"statement\":\"smooth curve is stable\"},\"evidence\":{\"source\":\"figure_5\",\"statement\":\"DeepFDM and FNO have smooth curves\"}}",
            "{\"letter\":\"B\",\"attribute\":\"convergence\",\"claim\":{\"source\":\"caption\",\"statement\":\"DeepFDM is superior\"},\"evidence\":{\"source\":\"plot\",\"statement\":\"ResNet converged fastest\"}}"
          ],
          "letters": ["C", "A", "D", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"C\",\"attribute\":\"initial MSE\",\"target\":\"figure_5\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"align values\",\"reason\":\"non-uniform initialization\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"parameters\",\"target\":\"text\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"explain advantage\",\"reason\":\"not specified\"}",
            "{\"letter\":\"D\",\"attribute\":\"MSE fluctuations\",\"target\":\"figure_5\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"add explanation\",\"reason\":\"instability\"}",
            "{\"letter\":\"B\",\"attribute\":\"convergence speed\",\"target\":\"text\",\"other_involved\":\"figure_5\",\"action\":\"modify\",\"edit_statement\":\"align statement\",\"reason\":\"contradicts figure\"}"
          ],
          "letters": ["C", "A", "D", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 5"]
    }
  ],
  "0jmFRA64Vw": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 7,
          "image_id": "0jmFRA64Vw_7_bc7c153c",
          "bbox": {
            "x": 0.4990291262135923,
            "y": 0.09864657982847744,
            "width": 0.33592233009708744,
            "height": 0.33082706766917297
          }
        }
      ],
      "review_text": "Figure 6: The results in the table conflict with those in the subfigure within the same figure.",
      "category": "figure-only",
      "description": "The results in the table part of the Figure do not match the lower row of the sub-plots",
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the figure that is consistent with a different part of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["B", "A"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the figure that is inconsistent with a different part of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"Qr values\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be lower\"},\"evidence\":{\"source\":\"table and sub-plots\",\"statement\":\"higher\"}}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"training loss\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should decrease\"},\"evidence\":{\"source\":\"training loss curves\",\"statement\":\"increase\"}}",
            "{\"letter\":\"A\",\"attribute\":\"legend color-coding\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"sub-plots\",\"statement\":\"inconsistent\"}}",
            "{\"letter\":\"C\",\"attribute\":\"x-axis maximum\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"x-axis\",\"statement\":\"inconsistent\"}}"
          ],
          "letters": ["B", "D", "A", "C"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"Qr values\",\"target\":\"table\",\"other_involved\":\"figure_6\",\"action\":\"modify\",\"edit_statement\":\"align Qr values\",\"reason\":\"substantially higher\"}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"training loss curves\",\"target\":\"figure_6\",\"other_involved\":\"bits r = 8,bits r = 16\",\"action\":\"modify\",\"edit_statement\":\"align expected decrease\",\"reason\":\"contradicts expected\"}",
            "{\"letter\":\"A\",\"attribute\":\"color-coding\",\"target\":\"figure_6\",\"other_involved\":\"legend\",\"action\":\"modify\",\"edit_statement\":\"align alpha parameter\",\"reason\":\"inconsistent\"}",
            "{\"letter\":\"C\",\"attribute\":\"Communication bits\",\"target\":\"x-axis\",\"other_involved\":\"Training Loss plots,Testing Accuracy plots\",\"action\":\"modify\",\"edit_statement\":\"align maximum values\",\"reason\":\"different\"}"
          ],
          "letters": ["B", "D", "A", "C"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 6"]
    }
  ],
  "0Xc6o1HKXD": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 5,
          "image_id": "0Xc6o1HKXD_5_42a93d89",
          "bbox": {
            "x": 0.31844660194174756,
            "y": 0.5467668805803572,
            "width": 0.512621359223301,
            "height": 0.030075187969924814
          }
        },
        {
          "type": "text",
          "page": 3,
          "content": "For downstream tasks involving K classes, each class is incorporated into a hard prompt.",
          "line": 161
        }
      ],
      "review_text": "Multiple definitions of $K$: In Line 161, $K$ is defined as the number of classes, while in Line 244 and Equation (6), $K$ is the number of selected regions.",
      "category": "equation-text",
      "description": "In the text, K is specified as the number of classes, but in the Equation (6), it is the number of Top-K regions",
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the equation consistent with the text?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the equation inconsistent with the text?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["B", "A"]
        },
        "part_pair": {
          "question": "For downstream tasks involving K classes, each class is incorporated into a hard prompt.",
          "correct": "0Xc6o1HKXD_5_42a93d89",
          "incorrect": [
            "0Xc6o1HKXD_4_interline-equation_equation10",
            "0Xc6o1HKXD_4_interline-equation_equation38",
            "0Xc6o1HKXD_4_interline-equation_equation18.5"
          ],
          "letters": ["B", "D", "C", "A"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"K\",\"claim\":{\"source\":\"text\",\"statement\":\"number of classes\"},\"evidence\":{\"source\":\"(6)\",\"statement\":\"number of Top-K regions\"}}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"K\",\"claim\":{\"source\":\"text\",\"statement\":\"number of Top-K regions\"},\"evidence\":{\"source\":\"(6)\",\"statement\":\"total number of samples\"}}",
            "{\"letter\":\"A\",\"attribute\":\"K\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be defined\"},\"evidence\":{\"source\":\"(6)\",\"statement\":\"not defined\"}}",
            "{\"letter\":\"B\",\"attribute\":\"K\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be constant\"},\"evidence\":{\"source\":\"(6)\",\"statement\":\"is variable\"}}"
          ],
          "letters": ["D", "C", "A", "B"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"D\",\"attribute\":\"variable K\",\"target\":\"equation_6\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"meaning of K\",\"reason\":\"different\"}",
          "incorrect": [
            "{\"letter\":\"C\",\"attribute\":\"variable K\",\"target\":\"equation_6\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"meaning of K\",\"reason\":\"different\"}",
            "{\"letter\":\"A\",\"attribute\":\"variable K\",\"target\":\"equation_6\",\"other_involved\":\"text\",\"action\":\"add\",\"edit_statement\":\"definition and use\",\"reason\":\"missing\"}",
            "{\"letter\":\"B\",\"attribute\":\"variable K\",\"target\":\"equation_6\",\"other_involved\":\"text\",\"action\":\"modify\",\"edit_statement\":\"type of K\",\"reason\":\"discrepancy\"}"
          ],
          "letters": ["D", "C", "A", "B"]
        }
      },
      "severity": 0,
      "visual_elements": ["(6)"]
    }
  ],
  "0JwxMqKGxa": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 2,
          "image_id": "0JwxMqKGxa_2_ba85f3bf",
          "bbox": {
            "x": 0.14368932038834953,
            "y": 0.5700751512570489,
            "width": 0.7184466019417476,
            "height": 0.2406015037593985
          }
        }
      ],
      "review_text": "Figure 1: The subfigure 'Signal processing method' mentions Machine Learning and Neural Networks separately, but Neural network training is also machine learning.",
      "category": "figure-only",
      "description": "The second sub-plot shows Neural Network training and Machine Learning to be separate, but Neural Networks are a sub-group of Machine Learning",
      "mcq": {
        "binary_consistent": {
          "question": "Is there a part of the figure that is consistent with a different part of the figure?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is there a part of the figure that is inconsistent with a different part of the figure?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {},
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"categories\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be exclusive\"},\"evidence\":{\"source\":\"Figure 1\",\"statement\":\"are not exclusive\"}}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"count\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be 29\"},\"evidence\":{\"source\":\"Figure 1\",\"statement\":\"is 30\"}}",
            "{\"letter\":\"C\",\"attribute\":\"breakdown\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should include others\"},\"evidence\":{\"source\":\"Figure 1\",\"statement\":\"only two categories\"}}",
            "{\"letter\":\"D\",\"attribute\":\"label\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should not imply AI only\"},\"evidence\":{\"source\":\"Figure 1\",\"statement\":\"implies AI only\"}}"
          ],
          "letters": ["B", "A", "C", "D"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"B\",\"attribute\":\"categorization\",\"target\":\"figure_1\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"overlap categories\",\"reason\":\"hierarchy wrong\"}",
          "incorrect": [
            "{\"letter\":\"A\",\"attribute\":\"count\",\"target\":\"figure_1\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"match bar value\",\"reason\":\"mismatch\"}",
            "{\"letter\":\"C\",\"attribute\":\"breakdown\",\"target\":\"figure_1\",\"other_involved\":null,\"action\":\"add\",\"edit_statement\":\"include other techniques\",\"reason\":\"oversimplification\"}",
            "{\"letter\":\"D\",\"attribute\":\"label meaning\",\"target\":\"figure_1\",\"other_involved\":null,\"action\":\"modify\",\"edit_statement\":\"clarify data usage\",\"reason\":\"misleading\"}"
          ],
          "letters": ["B", "A", "C", "D"]
        }
      },
      "severity": 0,
      "visual_elements": ["Figure 1"]
    }
  ],
  "09TI1yUo9K": [
    {
      "inconsistency_parts": [
        {
          "type": "image",
          "page": 8,
          "image_id": "09TI1yUo9K_8_a1471e2b",
          "bbox": {
            "x": 0.16893203883495148,
            "y": 0.09263154223449248,
            "width": 0.6621359223300971,
            "height": 0.1879699248120301
          }
        },
        {
          "type": "image",
          "page": 8,
          "image_id": "09TI1yUo9K_8_67b084bc",
          "bbox": {
            "x": 0.16893203883495148,
            "y": 0.28548868509163533,
            "width": 0.6621359223300971,
            "height": 0.4105263157894737
          }
        }
      ],
      "review_text": "Table 1 and Table 2: The comparison methods differ between the two tables, and some results (CPMF, IMRNet, R3D-AD on the ICD dataset) are missing in Table 2, indicating a discrepancy in the presentation of experimental results.",
      "category": "table-table",
      "description": "Table 1 and Table 2 should compare the proposed method with baselines for two different datasets, but the baselines for comparison are not kept consistent",
      "mcq": {
        "binary_consistent": {
          "question": "Is the content of the first table consistent with the content of the second table?",
          "correct": "Yes",
          "incorrect": ["No"],
          "letters": ["A", "B"]
        },
        "binary_inconsistent": {
          "question": "Is the content of the first table inconsistent with the content of the second table?",
          "correct": "No",
          "incorrect": ["Yes"],
          "letters": ["A", "B"]
        },
        "part_pair": {
          "question": "09TI1yUo9K_8_a1471e2b",
          "correct": "09TI1yUo9K_8_67b084bc",
          "incorrect": [
            "09TI1yUo9K_7_table_table1",
            "09TI1yUo9K_10_table_3ddataset",
            "09TI1yUo9K_12_table_table5"
          ],
          "letters": ["D", "B", "A", "C"]
        },
        "default": {
          "question": "What is the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"baseline methods\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"Table 1 and Table 2\",\"statement\":\"differ\"}}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"performance metrics\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"Table 1 and Table 2\",\"statement\":\"differ\"}}",
            "{\"letter\":\"B\",\"attribute\":\"highlighting scheme\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be uniform\"},\"evidence\":{\"source\":\"Table 1 and Table 2\",\"statement\":\"not uniform\"}}",
            "{\"letter\":\"C\",\"attribute\":\"number of categories\",\"claim\":{\"source\":\"expectation\",\"statement\":\"should be consistent\"},\"evidence\":{\"source\":\"Table 1 and Table 2\",\"statement\":\"differ\"}}"
          ],
          "letters": ["A", "D", "B", "C"]
        },
        "edit": {
          "question": "What action needs to be taken to resolve the inconsistency in these parts of a scientific paper?",
          "correct": "{\"letter\":\"A\",\"attribute\":\"baseline methods\",\"target\":\"table_1\",\"other_involved\":\"table_2\",\"action\":\"add\",\"edit_statement\":\"add missing methods\",\"reason\":\"different\"}",
          "incorrect": [
            "{\"letter\":\"D\",\"attribute\":\"performance metrics\",\"target\":\"table_1\",\"other_involved\":\"table_2\",\"action\":\"modify\",\"edit_statement\":\"align metrics\",\"reason\":\"difficult comparison\"}",
            "{\"letter\":\"B\",\"attribute\":\"highlighting scheme\",\"target\":\"table_1\",\"other_involved\":\"table_2\",\"action\":\"modify\",\"edit_statement\":\"update colors\",\"reason\":\"not uniform\"}",
            "{\"letter\":\"C\",\"attribute\":\"number of categories\",\"target\":\"table_1\",\"other_involved\":\"table_2\",\"action\":\"modify\",\"edit_statement\":\"align categories count\",\"reason\":\"imbalanced comparison\"}"
          ],
          "letters": ["A", "D", "B", "C"]
        }
      },
      "severity": 0,
      "visual_elements": ["Table 1", "Table 2"]
    }
  ]
}
