[
    {
        "collection_name": "text_bm25_en",
        "modality": "text",
        "embed_type": "bm25",
        "embed_model": "en",
        "description": "This collection is used to store the sparse embeddings generated by the BM25 model for all encodable text content in another relational database. The similary search is based on field `vector` with metric inner-product (IP).",
        "fields": [
            {
                "name": "id",
                "dtype": "INT64",
                "is_primary": true,
                "description": "The unique id for each encoded text vector in this collection."
            },
            {
                "name": "vector",
                "dtype": "SPARSE_FLOAT_VECTOR",
                "description": "The encoded sparse vector of the text content generated by the BM25 model. Note that, the raw text content can be uniquely identified by the triplet fields below: `table_name`, `column_name`, `primary_key`."
            },
            {
                "name": "pdf_id",
                "dtype": "VARCHAR",
                "max_length": 256,
                "description": "The unique id of the PDF file stored in the relational database that the encoded text belongs to. This field can be used to quickly filter the search space."
            },
            {
                "name": "page_number",
                "dtype": "INT16",
                "description": "The page number in the PDF file that the encoded text belongs to (starting from 1). This field can be used to quickly filter the search space. Value -1 means unknown."
            },
            {
                "name": "table_name",
                "dtype": "VARCHAR",
                "max_length": 256,
                "description": "Which table in the relational database is the text content from."
            },
            {
                "name": "column_name",
                "dtype": "VARCHAR",
                "max_length": 256,
                "description": "Which column under `table_name` in the relational database is the text content from."
            },
            {
                "name": "primary_key",
                "dtype": "VARCHAR",
                "max_length": 512,
                "description": "The primary key value of `table_name` (or which row) in the relational database that the text content belongs to. Note that, the triplet (`table_name`, `column_name`, `primary_key`) can be used to construct a one-to-one mapping between the `text` field in the current vectorstore and the cell entry in the relational database."
            }
        ],
        "indexes": [
            {
                "field_name": "vector",
                "index_type": "SPARSE_INVERTED_INDEX",
                "metric_type": "IP",
                "params": {
                    "drop_ratio_build": 0.2
                }
            },
            {
                "field_name": "pdf_id",
                "index_type": "INVERTED"
            },
            {
                "field_name": "page_number",
                "index_type": "INVERTED"
            },
            {
                "field_name": "table_name",
                "index_type": "INVERTED"
            },
            {
                "field_name": "column_name",
                "index_type": "INVERTED"
            },
            {
                "field_name": "primary_key",
                "index_type": "INVERTED"
            }
        ]
    },
    {
        "collection_name": "text_sentence_transformers_all_minilm_l6_v2",
        "modality": "text",
        "embed_type": "sentence_transformers",
        "embed_model": "all-MiniLM-L6-v2",
        "description": "This collection is used to store the embeddings generated by the all-MiniLM-L6-v2 model for all encodable text content in another relational database. The similary search is based on field `vector` with metric COSINE.",
        "fields": [
            {
                "name": "id",
                "dtype": "INT64",
                "is_primary": true,
                "description": "The unique id for each encoded text vector in this collection."
            },
            {
                "name": "vector",
                "dtype": "FLOAT_VECTOR",
                "dim": 384,
                "description": "The encoded vector of the text content generated by the MiniLM-L6-v2 model. Note that, the raw text content can be uniquely identified by the triplet fields below: `table_name`, `column_name`, `primary_key`."
            },
            {
                "name": "pdf_id",
                "dtype": "VARCHAR",
                "max_length": 256,
                "description": "The unique id of the PDF file stored in the relational database that the encoded text belongs to. This field can be used to quickly filter the search space."
            },
            {
                "name": "page_number",
                "dtype": "INT16",
                "description": "The page number in the PDF file that the encoded text belongs to (starting from 1). This field can be used to quickly filter the search space. Value -1 means unknown."
            },
            {
                "name": "table_name",
                "dtype": "VARCHAR",
                "max_length": 256,
                "description": "Which table in the relational database is the text content from."
            },
            {
                "name": "column_name",
                "dtype": "VARCHAR",
                "max_length": 256,
                "description": "Which column under `table_name` in the relational database is the text content from."
            },
            {
                "name": "primary_key",
                "dtype": "VARCHAR",
                "max_length": 512,
                "description": "The primary key value of `table_name` (or which row) in the relational database that the text content belongs to. Note that, the triplet (`table_name`, `column_name`, `primary_key`) can be used to construct a one-to-one mapping between the `text` field in the current vectorstore and the cell entry in the relational database."
            }
        ],
        "indexes": [
            {
                "field_name": "vector",
                "index_type": "FLAT",
                "metric_type": "COSINE"
            },
            {
                "field_name": "pdf_id",
                "index_type": "INVERTED"
            },
            {
                "field_name": "page_number",
                "index_type": "INVERTED"
            },
            {
                "field_name": "table_name",
                "index_type": "INVERTED"
            },
            {
                "field_name": "column_name",
                "index_type": "INVERTED"
            },
            {
                "field_name": "primary_key",
                "index_type": "INVERTED"
            }
        ]
    },
    {
        "collection_name": "text_sentence_transformers_bge_large_en_v1_5",
        "modality": "text",
        "embed_type": "sentence_transformers",
        "embed_model": "bge-large-en-v1.5",
        "description": "This collection is used to store the embeddings generated by the bge-large-en-v1.5 model for all encodable text content in another relational database. The similary search is based on field `vector` with metric COSINE.",
        "fields": [
            {
                "name": "id",
                "dtype": "INT64",
                "is_primary": true,
                "description": "The unique id for each encoded text vector in this collection."
            },
            {
                "name": "vector",
                "dtype": "FLOAT_VECTOR",
                "dim": 1024,
                "description": "The encoded vector of the text content generated by the bge-large-en-v1.5 model. Note that, the raw text content can be uniquely identified by the triplet fields below: `table_name`, `column_name`, `primary_key`."
            },
            {
                "name": "pdf_id",
                "dtype": "VARCHAR",
                "max_length": 256,
                "description": "The unique id of the PDF file stored in the relational database that the encoded text belongs to. This field can be used to quickly filter the search space."
            },
            {
                "name": "page_number",
                "dtype": "INT16",
                "description": "The page number in the PDF file that the encoded text belongs to (starting from 1). This field can be used to quickly filter the search space. Value -1 means unknown."
            },
            {
                "name": "table_name",
                "dtype": "VARCHAR",
                "max_length": 256,
                "description": "Which table in the relational database is the text content from."
            },
            {
                "name": "column_name",
                "dtype": "VARCHAR",
                "max_length": 256,
                "description": "Which column under `table_name` in the relational database is the text content from."
            },
            {
                "name": "primary_key",
                "dtype": "VARCHAR",
                "max_length": 512,
                "description": "The primary key value of `table_name` (or which row) in the relational database that the text content belongs to. Note that, the triplet (`table_name`, `column_name`, `primary_key`) can be used to construct a one-to-one mapping between the `text` field in the current vectorstore and the cell entry in the relational database."
            }
        ],
        "indexes": [
            {
                "field_name": "vector",
                "index_type": "FLAT",
                "metric_type": "COSINE"
            },
            {
                "field_name": "pdf_id",
                "index_type": "INVERTED"
            },
            {
                "field_name": "page_number",
                "index_type": "INVERTED"
            },
            {
                "field_name": "table_name",
                "index_type": "INVERTED"
            },
            {
                "field_name": "column_name",
                "index_type": "INVERTED"
            },
            {
                "field_name": "primary_key",
                "index_type": "INVERTED"
            }
        ]
    },
    {
        "collection_name": "image_clip_clip_vit_base_patch32",
        "modality": "image",
        "embed_type": "clip",
        "embed_model": "clip-vit-base-patch32",
        "description": "This collection is used to store the embeddings generated by the CLIP model `clip-vit-base-patch32` for all encodable images with bounding boxes in another relational database. The similary search is based on field `vector` with metric COSINE.",
        "fields": [
            {
                "name": "id",
                "dtype": "INT64",
                "is_primary": true,
                "description": "The unique id for each encoded image vector in this collection."
            },
            {
                "name": "vector",
                "dtype": "FLOAT_VECTOR",
                "dim": 512,
                "description": "The encoded vector of the image generated by the CLIP model."
            },
            {
                "name": "bbox",
                "dtype": "ARRAY",
                "etype": "INT16",
                "max_capacity": 4,
                "description": "The bounding box of the image in the format of [x0, y0, w, h], where (x0, y0) represents the coordinates of the top-left corner and (w, h) represents the width and height."
            },
            {
                "name": "pdf_id",
                "dtype": "VARCHAR",
                "max_length": 256,
                "description": "The unique id of the PDF file stored in the relational database that the image `bbox` belongs to or is cropped from. This field can be used to quickly filter the search space."
            },
            {
                "name": "page_number",
                "dtype": "INT16",
                "description": "The page number in the PDF file that the image `bbox` belongs to or is cropped from (starting from 1). This field can be used to quickly filter the search space and must be a positive integer."
            },
            {
                "name": "table_name",
                "dtype": "VARCHAR",
                "max_length": 256,
                "description": "Which table in the relational database is the image `bbox` from."
            },
            {
                "name": "column_name",
                "dtype": "VARCHAR",
                "max_length": 256,
                "description": "Which column under `table_name` in the relational database is the image `bbox` from."
            },
            {
                "name": "primary_key",
                "dtype": "VARCHAR",
                "max_length": 512,
                "description": "The primary key value of `table_name` (or which row) in the relational database that the image `bbox` belongs to. Note that, the triplet (`table_name`, `column_name`, `primary_key`) can be used to construct a one-to-one mapping between the image `bbox` in the current vectorstore and the cell entry in the relational database."
            }
        ],
        "indexes": [
            {
                "field_name": "vector",
                "index_type": "FLAT",
                "metric_type": "COSINE"
            },
            {
                "field_name": "pdf_id",
                "index_type": "INVERTED"
            },
            {
                "field_name": "page_number",
                "index_type": "INVERTED"
            },
            {
                "field_name": "table_name",
                "index_type": "INVERTED"
            },
            {
                "field_name": "column_name",
                "index_type": "INVERTED"
            },
            {
                "field_name": "primary_key",
                "index_type": "INVERTED"
            }
        ]
    }
]