from typing import Union, Literal


# dataset
huggingface_eval_real_data_name = "VisOnlyQA_Eval_Real"
huggingface_eval_synthetic_data_name = "VisOnlyQA_Eval_Synthetic"
huggingface_train_data_name = "VisOnlyQA_Train"

# evaluation dataset
visonlyqa_real_splits = [
    "geometry__triangle", "geometry__quadrilateral", "geometry__length", "geometry__angle", "geometry__area",
    "geometry__diameter_radius", 
    "chemistry__shape_single", "chemistry__shape_multi",
    "charts__extraction", "charts__intersection",
]

visonlyqa_synthetic_splits = [
    "syntheticgeometry__triangle", "syntheticgeometry__quadrilateral", "syntheticgeometry__length", "syntheticgeometry__angle", "syntheticgeometry__area",
    "3d__size", "3d__angle",
]

visonlyqa_response_type_dir: dict[str, tuple[str, list[str]]] = {
    #
    # Real
    #
    "geometry__triangle": ("single_answer", ["True", "False"]),
    "geometry__quadrilateral": ("single_answer", ["True", "False"]),
    "geometry__length": ("single_answer", ["a", "b", "c", "d", "e"]),
    "geometry__angle": ("single_answer", ["a", "b", "c", "d", "e"]),
    "geometry__area": ("single_answer", ["a", "b", "c", "d", "e"]),
    "geometry__diameter_radius": ("single_answer", ["True", "False"]),
    #
    "chemistry__shape_single": ("single_answer", ["True", "False"]),
    "chemistry__shape_multi": ("multiple_answers", ["a", "b", "c", "d"]),
    #
    "charts__extraction": ("single_answer", ["a", "b", "c", "d", "e"]),
    "charts__intersection": ("single_answer", ["True", "False"]),
    #
    # Synthetic
    #
    "3d__size": ("single_answer", ["a", "b", "c"]),
    "3d__angle": ("single_answer", ["a", "b", "c", "d", "e"]),
    #
    "syntheticgeometry__triangle": ("single_answer", ["True", "False"]),
    "syntheticgeometry__quadrilateral": ("single_answer", ["True", "False"]),
    "syntheticgeometry__length": ("single_answer", ["a", "b", "c", "d", "e"]),
    "syntheticgeometry__angle": ("single_answer", ["a", "b", "c", "d", "e"]),
    "syntheticgeometry__area": ("single_answer", ["a", "b", "c", "d", "e"]),
}

# train dataset
finetuning_splits_dict: dict[Literal["finetuning_splits", "corresponding_real_splits"], list[Union[str, None]]] = {
    "finetuning_splits": [
        "syntheticgeometry__triangle", "syntheticgeometry__quadrilateral", "syntheticgeometry__length", "syntheticgeometry__angle", "syntheticgeometry__area",
        "3d__size", "3d__angle"
    ],
    "corresponding_real_splits": [
        "geometry__triangle", "geometry__quadrilateral", "geometry__length", "geometry__angle", "geometry__area",
        None, None,
    ]
}
train_data_splits = finetuning_splits_dict["finetuning_splits"]

# models
open_models_list = [
    "microsoft/Phi-3.5-vision-instruct",  # "xgen-mm-phi3-interleave-r-v1.5",
    "llava_next_llama3", "llava_next_yi_34b",
    "molmo-7B-D-0924", "molmo-72B-0924",
    "Llama-3.2-11B-Vision-Instruct", "Llama-3.2-90B-Vision-Instruct",
    "OpenGVLab/InternVL2-4B", "OpenGVLab/InternVL2-8B", "OpenGVLab/InternVL2-26B", "OpenGVLab/InternVL2-40B", "OpenGVLab/InternVL2-Llama3-76B",
]
proprietary_models_list = ["claude-3-5-sonnet-20240620", "gpt-4o-mini-2024-07-18", "gpt-4o-2024-08-06", "gemini-1.5-flash-002", "gemini-1.5-pro-002"]  # "claude-3-5-sonnet-20240620",
models_list = open_models_list + proprietary_models_list
convert_model_name = {
    "microsoft/Phi-3.5-vision-instruct": "Phi-3.5-vision",
    "xgen-mm-phi3-interleave-r-v1.5": "xGen-MM",
    "llava_next_llama3": "LLaVA-Next 8B",
    "llava_next_yi_34b": "LLaVA-Next 34B",
    #
    "molmo-7B-D-0924": "MolMo 7B-D",
    "molmo-72B-0924": "MolMo 72B",
    #
    "Llama-3.2-11B-Vision-Instruct": "Llama 3.2 11B",
    "Llama-3.2-90B-Vision-Instruct": "Llama 3.2 90B",
    #
    "OpenGVLab/InternVL2-4B": "InternVL2-4B",
    "OpenGVLab/InternVL2-8B": "InternVL2-8B",
    "OpenGVLab/InternVL2-26B": "InternVL2-26B",
    "OpenGVLab/InternVL2-40B": "InternVL2-40B",
    "OpenGVLab/InternVL2-Llama3-76B": "InternVL2-76B",
    #
    "claude-3-5-sonnet-20240620": "Claude 3.5 Sonnet",
    #
    "gpt-4o-mini-2024-07-18": "GPT-4o-mini",
    "gpt-4o-2024-08-06": "GPT-4o",
    #
    "gemini-1.5-flash-002": "Gemini 1.5 Flash",
    "gemini-1.5-pro-002": "Gemini 1.5 Pro",
}


finetuning_base_models_list = ["microsoft/Phi-3.5-vision-instruct", "OpenGVLab/InternVL2-4B", "OpenGVLab/InternVL2-8B"]
base_model_to_finetuned_model_dict: dict[str, dict[str, str]] = {
    "OpenGVLab/InternVL2-4B": {
        "syntheticgeometry__triangle": "finetuning_results/internvl_finetuning_log/InternVL2-4B_syntheticgeometry__triangle_20241106_213204",
        "syntheticgeometry__quadrilateral": "finetuning_results/internvl_finetuning_log/InternVL2-4B_syntheticgeometry__quadrilateral_20241106_222740",
        "syntheticgeometry__length": "finetuning_results/internvl_finetuning_log/InternVL2-4B_syntheticgeometry__length_20241107_011728",
        "syntheticgeometry__angle": "finetuning_results/internvl_finetuning_log/InternVL2-4B_syntheticgeometry__angle_20241106_232458",
        "syntheticgeometry__area": "finetuning_results/internvl_finetuning_log/InternVL2-4B_syntheticgeometry__area_20241107_002108",
        "3d__size": "finetuning_results/internvl_finetuning_log/InternVL2-4B_3d__size_20241106_193554",
        "3d__angle": "finetuning_results/internvl_finetuning_log/InternVL2-4B_3d__angle_20241106_203320",
    },
    "OpenGVLab/InternVL2-8B": {
        "syntheticgeometry__triangle": "finetuning_results/internvl_finetuning_log/InternVL2-8B_syntheticgeometry__triangle_20241106_200045",
        "syntheticgeometry__quadrilateral": "finetuning_results/internvl_finetuning_log/InternVL2-8B_syntheticgeometry__quadrilateral_20241106_213050",
        "syntheticgeometry__length": "finetuning_results/internvl_finetuning_log/InternVL2-8B_syntheticgeometry__length_20241107_020956",
        "syntheticgeometry__angle": "finetuning_results/internvl_finetuning_log/InternVL2-8B_syntheticgeometry__angle_20241106_230117",
        "syntheticgeometry__area": "finetuning_results/internvl_finetuning_log/InternVL2-8B_syntheticgeometry__area_20241107_003601",
        "3d__size": "finetuning_results/internvl_finetuning_log/InternVL2-8B_3d__size_20241106_165308",
        "3d__angle": "finetuning_results/internvl_finetuning_log/InternVL2-8B_3d__angle_20241106_182532",
    },
    "microsoft/Phi-3.5-vision-instruct": {
        "syntheticgeometry__triangle": "finetuning_results/phi35v_finetuning_log/Phi-3.5-vision-instruct_syntheticgeometry__triangle_2024-11-05_10-47-55",
        "syntheticgeometry__quadrilateral": "finetuning_results/phi35v_finetuning_log/Phi-3.5-vision-instruct_syntheticgeometry__quadrilateral_2024-11-05_11-50-56",
        "syntheticgeometry__length": "finetuning_results/phi35v_finetuning_log/Phi-3.5-vision-instruct_syntheticgeometry__length_2024-11-05_12-53-35",
        "syntheticgeometry__angle": "finetuning_results/phi35v_finetuning_log/Phi-3.5-vision-instruct_syntheticgeometry__angle_2024-11-05_14-58-51",
        "syntheticgeometry__area": "finetuning_results/phi35v_finetuning_log/Phi-3.5-vision-instruct_syntheticgeometry__area_2024-11-05_13-55-38",
        "3d__size": "finetuning_results/phi35v_finetuning_log/Phi-3.5-vision-instruct_3d__size_2024-11-05_16-02-56",
        "3d__angle": "finetuning_results/phi35v_finetuning_log/Phi-3.5-vision-instruct_3d__angle_2024-11-05_16-51-11",
    }
}


model_image_tokens_dict: dict[str, str] = {
    "InternVL": "<image>",
    "Phi-3.5": "<|image_1|>"
}


def convert_split_name(split_name: str) -> str:
    split_name = split_name.split("__")[-1]
    
    if split_name == "diameter_radius":
        return "Diameter"

    if split_name == "shape_single":
        return "Shape (s)"
    
    if split_name == "shape_multi":
        return "Shape (m)"
    
    return split_name.replace("_", " ").title()

eval_real_splits_capitalized = [convert_split_name(split) for split in visonlyqa_real_splits]
eval_synthetic_splits_capitalized = [convert_split_name(split) for split in visonlyqa_synthetic_splits]
finetuning_splits_capitalized = [convert_split_name(split) for split in finetuning_splits_dict["finetuning_splits"]]
