{
  "@context": {
    "@language": "en",
    "@vocab": "https://schema.org/",
    "citeAs": "cr:citeAs",
    "column": "cr:column",
    "conformsTo": "dct:conformsTo",
    "cr": "http://mlcommons.org/croissant/",
    "rai": "http://mlcommons.org/croissant/RAI/",
    "data": {
      "@id": "cr:data",
      "@type": "@json"
    },
    "dataType": {
      "@id": "cr:dataType",
      "@type": "@vocab"
    },
    "dct": "http://purl.org/dc/terms/",
    "examples": {
      "@id": "cr:examples",
      "@type": "@json"
    },
    "extract": "cr:extract",
    "field": "cr:field",
    "fileProperty": "cr:fileProperty",
    "fileObject": "cr:fileObject",
    "fileSet": "cr:fileSet",
    "format": "cr:format",
    "includes": "cr:includes",
    "isLiveDataset": "cr:isLiveDataset",
    "jsonPath": "cr:jsonPath",
    "key": "cr:key",
    "md5": "cr:md5",
    "parentField": "cr:parentField",
    "path": "cr:path",
    "recordSet": "cr:recordSet",
    "references": "cr:references",
    "regex": "cr:regex",
    "repeated": "cr:repeated",
    "replace": "cr:replace",
    "sc": "https://schema.org/",
    "separator": "cr:separator",
    "source": "cr:source",
    "subField": "cr:subField",
    "transform": "cr:transform"
  },
  "@type": "sc:Dataset",
  "name": "MMDocRAG",
  "description": "In this paper, we presented MMDocRAG, a comprehensive benchmark for multimodal document question answering and retrieval-augmented generation (RAG). MMDocRAG features over 4,000 expert-annotated QA pairs with multimodal evidence chains, as well as novel evaluation metrics for both quote selection and interleaved multimodal answer generation. Through extensive benchmarking of 58 leading LLMs and VLMs along with multiple retrieval methods, we reveal that current models struggle with effective multimodal evidence selection and interleaved image-text answer generation, especially in noisy and diverse document scenarios. Our results indicate that while proprietary models show a significant lead over open-source models, fine-tuning and the use of high-quality visual descriptions can drive substantial improvements. Despite these advances, a significant performance gap remains between current systems and the requirements of comprehensive multimodal DocVQA/DocRAG tasks. We hope that MMDocRAG will inspire future research toward more effective and interpretable multimodal reasoning in document understanding.",
  "conformsTo": "http://mlcommons.org/croissant/1.0",
  "citeAs": "N/A",
  "url": "https://github.com/MMDocRAG/MMDocRAG",
  "distribution": [
    {
      "@type": "cr:FileObject",
      "@id": "github-repository",
      "name": "github-repository",
      "description": "MMDocRAG repository on GitHub.",
      "contentUrl": "https://github.com/MMDocRAG/MMDocRAG",
      "encodingFormat": "git+https",
      "sha256": "main"
    },
    {
      "@type": "cr:FileSet",
      "@id": "jsonl-files",
      "name": "jsonl-files",
      "description": "JSONL files are hosted on the GitHub repository.",
      "containedIn": {
        "@id": "github-repository"
      },
      "encodingFormat": "application/jsonlines",
      "includes": "dataset/*.jsonl"
    }
  ],
  "recordSet": [
    {
      "@type": "cr:RecordSet",
      "@id": "jsonl",
      "name": "jsonl",
      "field": [
        {
          "@type": "cr:Field",
          "@id": "jsonl/q_id",
          "name": "q_id",
          "description": "question id",
          "dataType": "cr:Int32",
          "source": {
            "fileSet": {
              "@id": "jsonl-files"
            },
            "extract": {
              "column": "q_id"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "jsonl/doc_name",
          "name": "doc_name",
          "description": "The document name for the question",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "jsonl-files"
            },
            "extract": {
              "column": "doc_name"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "jsonl/domain",
          "name": "domain",
          "description": "The domain or type of the document",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "jsonl-files"
            },
            "extract": {
              "column": "domain"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "jsonl/question",
          "name": "question",
          "description": "The provided question in string format",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "jsonl-files"
            },
            "extract": {
              "column": "question"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "jsonl/question_type",
          "name": "question_type",
          "description": "The type of provided question in string format",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "jsonl-files"
            },
            "extract": {
              "column": "question_type"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "jsonl/answer_short",
          "name": "answer_short",
          "description": "The short answer to provided question, in string format",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "jsonl-files"
            },
            "extract": {
              "column": "answer_short"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "jsonl/answer_interleaved",
          "name": "answer_interleaved",
          "description": "The short answer to provided question, in string format",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "jsonl-files"
            },
            "extract": {
              "column": "answer_interleaved"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "jsonl/gold_quotes",
          "name": "gold_quotes",
          "description": "Gold quotes in list format",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "jsonl-files"
            },
            "extract": {
              "column": "gold_quotes"
            }
          }
        }
      ]
    }
  ]
}