from typing import List, Optional

from pydantic import BaseModel, Field

CHUNK_TYPE = ["text", "table", "figure"]


class BaseChunk(BaseModel):
    filename: str = Field(..., description="path to PDF from which the chunk was extracted")

    type: Optional[str] = Field(..., description="The type of the chunk", enum=CHUNK_TYPE)

    page: Optional[int] = Field(default=None, description="The page number of the chunk")

    timestamp: str = Field(..., description="The timestamp of the chunk generation")

    text: Optional[str] = Field(default=None, description="ONLY if type is text: put the text content of the chunk")

    bbox: Optional[tuple] = Field(
        default=None,
        description="If type is chunk or figure : \
                                        The bounding box coordinates of the chunk in the format [[x1, y1], [x2, y2], [x3, y3], [x4, y4]] with size 4",
    )

    imagepath: Optional[str] = Field(
        default=None, description="ONLY if type is table or figure: put the path to the image"
    )


class CaptionChunk(BaseChunk):
    caption: str = Field(..., description="The caption of the chunk generated by an LLM Vision")

    model: str = Field(..., description="The model used to generate the caption")

    prompt: str = Field(..., description="The prompt used to generate the caption")


class RagQA(BaseModel):
    question: str = Field(..., description="The question of the QA")

    type: Optional[str] = Field(None, description="Taxonomy of the question - following the documentation -")

    method: str = Field(..., description="The method used to generate the question")

    language: str = Field(..., description="The language of the question")

    answer: List = Field(..., description="The answer to the question")

    timestamp: str = Field(..., description="The timestamp of the QA generation")

    reference: List[str] = Field(
        ..., description="UUIDs of the chunks that needs to be retrieved to answer the question"
    )
