{
    "database_name": "ai_research",
    "description": "This database contains information about AI research papers. Each PDF file is represented or parsed via different views, e.g., pages, sections, figures, tables, and references. We also extract the concrete content inside each concrete element via OCR.",
    "database_schema": [
        {
            "table": {
                "table_name": "metadata",
                "description": "This table stores metadata about each paper, including the number of pages, paper path and paper id."
            },
            "columns": [
                {
                    "column_name": "paper_id",
                    "column_type": "UUID",
                    "description": "A unique identifier for this paper."
                },
                {
                    "column_name": "title",
                    "column_type": "VARCHAR",
                    "description": "The title of this paper.",
                    "encodable": "text"
                },
                {
                    "column_name": "abstract",
                    "column_type": "VARCHAR",
                    "description": "The abstract of this paper.",
                    "encodable": "text"
                },
                {
                    "column_name": "num_pages",
                    "column_type": "INTEGER",
                    "description": "The number of pages in this paper."
                },
                {
                    "column_name": "conference_full",
                    "column_type": "VARCHAR",
                    "description": "The full name of the conference where this paper was published. `uncategorized` if the paper has not been published yet."
                },
                {
                    "column_name": "conference_abbreviation",
                    "column_type": "VARCHAR",
                    "description": "The abbreviation of the conference where this paper was published. `uncategorized` if the paper has not been published yet."
                },
                {
                    "column_name": "pub_year",
                    "column_type": "INTEGER",
                    "description": "The year when this paper was published."
                },
                {
                    "column_name": "volume",
                    "column_type": "VARCHAR",
                    "description": "The volume of the conference where this paper was published. e.g. findings."
                },
                {
                    "column_name": "download_url",
                    "column_type": "VARCHAR",
                    "description": "The url from which the paper can be downloaded."
                },
                {
                    "column_name": "bibtex",
                    "column_type": "VARCHAR",
                    "description": "The bibtex of this paper.",
                    "encodable": "text"
                },
                {
                    "column_name": "authors",
                    "column_type": "VARCHAR[]",
                    "description": "The full names of authors of this paper."
                },
                {
                    "column_name": "pdf_path",
                    "column_type": "VARCHAR",
                    "description": "The path to the PDF file of this paper."
                },
                {
                    "column_name": "tldr",
                    "column_type": "VARCHAR",
                    "description": "A brief summary of the paper's main idea or findings generated by LLM based on title and abstract.",
                    "encodable": "text"
                },
                {
                    "column_name": "tags",
                    "column_type": "VARCHAR[]",
                    "description": "Keywords representing the paper's topics, methods, or applications generated by LLM based on title and abstract."
                }
            ],
            "primary_keys": [
                "paper_id"
            ]
        },
        {
            "table": {
                "table_name": "pages",
                "description": "This table stores information of the pages in the papers, including their content, size and their order within the paper. The pagesare extracted using PyMuPDF."
            },
            "columns": [
                {
                    "column_name": "page_id",
                    "column_type": "UUID",
                    "description": "A unique identifier for each page."
                },
                {
                    "column_name": "page_number",
                    "column_type": "INTEGER",
                    "description": "The page number in the current paper, starting from 1."
                },
                {
                    "column_name": "page_width",
                    "column_type": "INTEGER",
                    "description": "The pixel width of the current page."
                },
                {
                    "column_name": "page_height",
                    "column_type": "INTEGER",
                    "description": "The pixel height of the current page."
                },
                {
                    "column_name": "page_content",
                    "column_type": "VARCHAR",
                    "description": "The content of the page.",
                    "encodable": "text"
                },
                {
                    "column_name": "page_summary",
                    "column_type": "VARCHAR",
                    "description": "A brief summary of the page content, generated by LLM, focusing on key information and describing the page content.",
                    "encodable": "text"
                },
                {
                    "column_name": "ref_paper_id",
                    "column_type": "UUID",
                    "description": "A foreign key linking to the `paper_id` in the `metadata` table."
                }
            ],
            "primary_keys": [
                "page_id"
            ],
            "foreign_keys": [
                [
                    "ref_paper_id",
                    "metadata",
                    "paper_id"
                ]
            ]
        },
        {
            "table": {
                "table_name": "images",
                "description": "This table stores the information about images in each paper. The images are extracted using MinerU."
            },
            "columns": [
                {
                    "column_name": "image_id",
                    "column_type": "UUID",
                    "description": "The unique identifier of the image."
                },
                {
                    "column_name": "image_caption",
                    "column_type": "VARCHAR",
                    "description": "The caption of this image, \"\" if it doesn't have one.",
                    "encodable": "text"
                },
                {
                    "column_name": "image_summary",
                    "column_type": "VARCHAR",
                    "description": "A brief summary of the image, generated by LLM, focusing on key information and describing the image.",
                    "encodable": "text"
                },
                {
                    "column_name": "bounding_box",
                    "column_type": "INTEGER[4]",
                    "description": "The bounding box of the figure in the format [x0, y0, w, h], where (x0, y0) represents the coordinates of the top-left corner and (w, h) represents the width and height which are used to determine the shape of the rectangle.",
                    "encodable": "image"
                },
                {
                    "column_name": "ordinal",
                    "column_type": "INTEGER",
                    "description": "Each figure is labeled with one distinct integer number in the current page, which starts from 0."
                },
                {
                    "column_name": "ref_paper_id",
                    "column_type": "UUID",
                    "description": "A foreign key linking to the `paper_id` in the `metadata` table."
                },
                {
                    "column_name": "ref_page_id",
                    "column_type": "UUID",
                    "description": "A foreign key linking to the `page_id` in the `pages` table."
                }
            ],
            "primary_keys": [
                "image_id"
            ],
            "foreign_keys": [
                [
                    "ref_paper_id",
                    "metadata",
                    "paper_id"
                ],
                [
                    "ref_page_id",
                    "pages",
                    "page_id"
                ]
            ]
        },
        {
            "table": {
                "table_name": "chunks",
                "description": "This table contains the information of each chunk of text (chunk size = 512 tokens with no overlapping) in each page of the paper. A chunk is a sub-text that is extracted from the main text using langchain, such as a sentence or a paragraph."
            },
            "columns": [
                {
                    "column_name": "chunk_id",
                    "column_type": "UUID",
                    "description": "A unique identifier for each chunk of text."
                },
                {
                    "column_name": "text_content",
                    "column_type": "VARCHAR",
                    "description": "The text content of the current chunk.",
                    "encodable": "text"
                },
                {
                    "column_name": "ordinal",
                    "column_type": "INTEGER",
                    "description": "Each chunk is labeled with one distinct integer number in the current page, which starts from 0."
                },
                {
                    "column_name": "ref_paper_id",
                    "column_type": "UUID",
                    "description": "A foreign key linking to the `paper_id` in the `metadata` table."
                },
                {
                    "column_name": "ref_page_id",
                    "column_type": "UUID",
                    "description": "A foreign key linking to the `page_id` in the `pages` table."
                }
            ],
            "primary_keys": [
                "chunk_id"
            ],
            "foreign_keys": [
                [
                    "ref_paper_id",
                    "metadata",
                    "paper_id"
                ],
                [
                    "ref_page_id",
                    "pages",
                    "page_id"
                ]
            ]
        },
        {
            "table": {
                "table_name": "tables",
                "description": "This table stores information about tables extracted from pages using MinerU, including content (in html format), bounding box and summary of each table generated by LLM."
            },
            "columns": [
                {
                    "column_name": "table_id",
                    "column_type": "UUID",
                    "description": "A unique identifier for each table."
                },
                {
                    "column_name": "table_caption",
                    "column_type": "VARCHAR",
                    "description": "Caption of the table, showing key information of the table.",
                    "encodable": "text"
                },
                {
                    "column_name": "table_content",
                    "column_type": "VARCHAR",
                    "description": "The content of the table in html format.",
                    "encodable": "text"
                },
                {
                    "column_name": "table_summary",
                    "column_type": "VARCHAR",
                    "description": "A brief summary of the table content generated by LLM, focusing on key information and describing the table content.",
                    "encodable": "text"
                },
                {
                    "column_name": "bounding_box",
                    "column_type": "INTEGER[4]",
                    "description": "The bounding box of the table in the format [x0, y0, w, h], where (x0, y0) represents the coordinates of the top-left corner and (w, h) represents the width and height.",
                    "encodable": "image"
                },
                {
                    "column_name": "ordinal",
                    "column_type": "INTEGER",
                    "description": "Each table is labeled with one distinct integer number in the current page, which starts from 0."
                },
                {
                    "column_name": "ref_paper_id",
                    "column_type": "UUID",
                    "description": "A foreign key linking to the `paper_id` in the `metadata` table."
                },
                {
                    "column_name": "ref_page_id",
                    "column_type": "UUID",
                    "description": "A foreign key linking to the `page_id` in the `pages` table where this table is located."
                }
            ],
            "primary_keys": [
                "table_id"
            ],
            "foreign_keys": [
                [
                    "ref_paper_id",
                    "metadata",
                    "paper_id"
                ],
                [
                    "ref_page_id",
                    "pages",
                    "page_id"
                ]
            ]
        },
        {
            "table": {
                "table_name": "sections",
                "description": "This table contains the text content of each section in the paper, e.g. `Introduction`, `Experiment`, `Conclusion`, `Reference`. The sections are extracted using PyMuPDF or MinerU."
            },
            "columns": [
                {
                    "column_name": "section_id",
                    "column_type": "UUID",
                    "description": "A unique identifier for each section of text."
                },
                {
                    "column_name": "section_title",
                    "column_type": "VARCHAR",
                    "description": "The title of the current section.",
                    "encodable": "text"
                },
                {
                    "column_name": "section_content",
                    "column_type": "VARCHAR",
                    "description": "The text content of the current section.",
                    "encodable": "text"
                },
                {
                    "column_name": "section_summary",
                    "column_type": "VARCHAR",
                    "description": "A brief summary of the section content generated by LLM, focusing on key information and describing the section content.",
                    "encodable": "text"
                },
                {
                    "column_name": "ordinal",
                    "column_type": "INTEGER",
                    "description": "Each section is labeled with one distinct integer number in the paper, which starts from 0."
                },
                {
                    "column_name": "page_numbers",
                    "column_type": "INTEGER[]",
                    "description": "A list of page numbers where current section is located in the relevant paper."
                },
                {
                    "column_name": "ref_paper_id",
                    "column_type": "UUID",
                    "description": "A foreign key linking to the `paper_id` in the `metadata` table."
                }
            ],
            "primary_keys": [
                "section_id"
            ],
            "foreign_keys": [
                [
                    "ref_paper_id",
                    "metadata",
                    "paper_id"
                ]
            ]
        },
        {
            "table": {
                "table_name": "equations",
                "description": "This table stores information about equations extracted from pages using MinerU."
            },
            "columns": [
                {
                    "column_name": "equation_id",
                    "column_type": "UUID",
                    "description": "A unique identifier for each equation."
                },
                {
                    "column_name": "equation_content",
                    "column_type": "VARCHAR",
                    "description": "Content of the equation in latex format.",
                    "encodable": "text"
                },
                {
                    "column_name": "ordinal",
                    "column_type": "INTEGER",
                    "description": "Each equation is labeled with one distinct integer number in the current page, which starts from 0."
                },
                {
                    "column_name": "ref_paper_id",
                    "column_type": "UUID",
                    "description": "A foreign key linking to the paper ID in the `metadata` table."
                },
                {
                    "column_name": "ref_page_id",
                    "column_type": "UUID",
                    "description": "A foreign key linking to the page ID in the `pages` table where this equation is located."
                }
            ],
            "primary_keys": [
                "equation_id"
            ],
            "foreign_keys": [
                [
                    "ref_paper_id",
                    "metadata",
                    "paper_id"
                ],
                [
                    "ref_page_id",
                    "pages",
                    "page_id"
                ]
            ]
        },
        {
            "table": {
                "table_name": "reference",
                "description": "This table stores information about references extracted using MinerU."
            },
            "columns": [
                {
                    "column_name": "reference_id",
                    "column_type": "UUID",
                    "description": "A unique identifier for each reference."
                },
                {
                    "column_name": "reference_content",
                    "column_type": "VARCHAR",
                    "description": "Content of the reference.",
                    "encodable": "text"
                },
                {
                    "column_name": "ordinal",
                    "column_type": "INTEGER",
                    "description": "Each reference is labeled with one distinct integer number according to its order in the paper, which starts from 0."
                },
                {
                    "column_name": "ref_paper_id",
                    "column_type": "UUID",
                    "description": "A foreign key linking to the `paper_id` in the `metadata` table."
                },
                {
                    "column_name": "ref_page_id",
                    "column_type": "UUID",
                    "description": "A foreign key linking to the `page_id` in the `pages` table where this reference is located."
                }
            ],
            "primary_keys": [
                "reference_id"
            ],
            "foreign_keys": [
                [
                    "ref_paper_id",
                    "metadata",
                    "paper_id"
                ],
                [
                    "ref_page_id",
                    "pages",
                    "page_id"
                ]
            ]
        }
    ]
}