IMAGE_TOKEN_INDEX = -200
IMAGE_TOKEN_LENGTH = 576
MINIGPT4_IMAGE_TOKEN_LENGTH = 32
DEFAULT_IMAGE_PATCH_TOKEN = "<im_patch>"
SHIKRA_IMAGE_TOKEN_LENGTH = 256
SHIKRA_IMG_START_TOKEN = 32001
SHIKRA_IMG_END_TOKEN = 32002

IMAGE_TOKEN_LENGTH = {
    "llava-1.5": 576,      # CLIP ViT-L/14 @ 336px:  (336/14)^2 = 576
    "minigpt4":  32,
    "shikra": 256,         # CLIP ViT-L/14 @ 224px: (224/14)^2 = 256
    "qwen-vl": 256,
}

DEFAULT_IMAGE_PATCH_TOKEN = "<im_patch>"
DEFAULT_IM_START_TOKEN = "<im_start>"
DEFAULT_IM_END_TOKEN = "<im_end>"

INSTRUCTION_TEMPLATE_NO_IMG = {
    "minigpt4": "###Human:<question> ###Assistant:",
    "instructblip": "<question>",
    "lrv_instruct": "###Human: <question> ###Assistant:",
    "shikra": "USER: <question> ASSISTANT:",
    "llava-1.5": "USER: <question> ASSISTANT:",
    "internvl": "USER: <question> ASSISTANT:",
}

INSTRUCTION_TEMPLATE = {
    "minigpt4": "###Human: <Img><ImageHere></Img> <question> ###Assistant:",
    "shikra": "USER: <im_start><ImageHere><im_end> <question> ASSISTANT:",
    "llava-1.5": "USER: <ImageHere> <question> ASSISTANT:",
    "qwen-vl": "<img><ImageHere></img><question>",
}

SYSTEM_MESSAGE = {
    "llava-1.5": "A chat between a curious user and an artificial intelligence assistant.  The assistant gives helpful, detailed, and polite answers to the user's questions.",
    "minigpt4": "A chat between a curious user and an artificial intelligence assistant.  The assistant gives helpful, detailed, and polite answers to the user's questions.",
    "shikra": "A chat between a curious user and an artificial intelligence assistant.  The assistant gives helpful, detailed, and polite answers to the user's questions.",
    "qwen-vl": "You are a helpful assistant.",
}

MODEL_PATHS = {
    "llava-1.5": "path",
    "minigpt4":  "path",
    "shikra": "path",
    "qwen-vl": "path",
}

