{
      "@context":"https://schema.org/",
      "@type":"Dataset",
      "name":"DeepForm v2",
      "description":"DeepForm v2 aims to extract information from TV and cable political advertising disclosure forms using deep learning and provide a challenging journalism-relevant dataset for NLP/ML researchers. This public data is valuable to journalists but locked in PDFs. Through this benchmark, we hope to accelerate collaboration on the concrete task of making this data accessible and longer-term solutions for general information extraction from visually-structured documents in fields like medicine, climate science, social science, and beyond. Version 2.0 was based on the data available at url: https://wandb.ai/deepform/political-ad-extraction/benchmark .",
      "url":"https://wandb.ai/deepform/political-ad-extraction/benchmark",
      "identifier": ["https://wandb.ai/deepform/political-ad-extraction/benchmark", "https://github.com/project-deepform/deepform"],
      "keywords":[
         "Document Understanding",
         "Key Information Extraction"
      ],
      "license" : {
        "@type": "CreativeWork",
        "name": "MIT License",
        "url": "https://github.com/project-deepform/deepform/blob/master/LICENSE"
      },
      "isAccessibleForFree" : true,
      "hasPart" : [
        {
          "@type": "Dataset",
          "name": "Train set"
        },
        {
          "@type": "Dataset",
          "name": "Dev set"
        },
        {
          "@type": "Dataset",
          "name": "Test set"
        }
      ],
      "version" : "2.0"
}
