import logging
import sys
import os

from utils_api import (
    encode_image,
    OPENAI_MODELS,
    GOOGLE_MODELS,
    MIMO_MODELS,
)

from utils import (
    convert_time,
    convert_angle,
    sample_frame,
    get_fps,
)

from google.genai import types
from google import genai


def tool_sample_frame(args, example, start, end, angle):
    """
    sample specified number of frames from a recording of a specified angle

    """

    dirname_angle = convert_angle(angle, "string2file")
    dirpath = args.dirpath_frame / example["sequence_id"] / dirname_angle

    filepaths = sample_frame(dirpath, start, end, args.max_frames)
    fps = get_fps(dirpath, start, end, args.max_frames)

    filepaths_and_ids = []
    for filepath in filepaths:
        second = int(filepath.stem)
        second_adjusted = f"{(second-int(example['video']['start'])):04d}"
        filepaths_and_ids.append([str(filepath), second_adjusted])

    return filepaths_and_ids, fps


def call_function_openai(
    args, example, template_components, instructions, tool_name
) -> list:
    """
    sample_frame: uniform sample from each video
    No zoom_in

    """

    content, content_text = [], ""

    match tool_name:
        case "sample_frame":
            # add user-action intro message
            template_prefix = template_components["tools"]["sample_frame"]
            start = convert_time(
                example["video"]["start"] - example["video"]["start"], "second2string"
            )
            end = convert_time(
                example["video"]["end"] - example["video"]["start"], "second2string"
            )
            template_prefix = template_prefix.replace("{start}", start).replace(
                "{end}", end
            )
            content.append({"type": "input_text", "text": template_prefix})
            content_text += template_prefix + "\n"

            filepaths_and_ids, fps = tool_sample_frame(
                args,
                example,
                example["video"]["start"],
                example["video"]["end"],
                "center",
            )
            for filepath, idx in filepaths_and_ids:
                content.append({"type": "input_text", "text": f"Frame {idx}"})
                image_url = f"data:image/jpeg;base64,{encode_image(filepath)}"
                content.append({"type": "input_image", "image_url": image_url})

                content_text += f"Frame {idx}: <{str(filepath)}>\n"

            template_fps = template_components["tools"]["fps_info"]
            template_fps = template_fps.replace("{fps}", f"{fps:.2f}")
            content.append({"type": "input_text", "text": template_fps})
            content_text += template_fps + "\n"

            template_affix = template_components["others"]["affix"]
            content.append({"type": "input_text", "text": template_affix})
            content_text += template_affix + "\n"
        case "check_instruction":
            # assume the mode is text
            template = template_components["tools"]["check_instruction"]
            template = template.replace("{graph}", instructions["dot"])
            content.append({"type": "input_text", "text": template})
            content_text += template + "\n"

            template_affix = template_components["others"]["affix"]
            content.append({"type": "input_text", "text": template_affix})
            content_text += template_affix + "\n"
        case "check_final_picture":
            template = template_components["tools"]["check_final_picture"]
            content.append({"type": "input_text", "text": template})
            content_text += template + "\n"

            image_url = (
                f"data:image/jpeg;base64,{encode_image(str(instructions['parts']))}"
            )
            content.append({"type": "input_image", "image_url": image_url})
            content_text += str(instructions["parts"]) + "\n"

            template_affix = template_components["others"]["affix"]
            content.append({"type": "input_text", "text": template_affix})
            content_text += template_affix + "\n"
        case "finish":
            template = template_components["user"]["answer"]
            template = template.replace("{question}", example["question"])
            content.append({"type": "input_text", "text": template})
            content_text += template + "\n"
        case _:
            logging.error(f"Undefined tool: {tool_name}")

    return content, content_text


def call_function_google(
    args, example, template_components, instructions, tool_name
) -> list:
    """
    sample_frame: uniform sample from each video
    No zoom_in

    """
    client = genai.Client(api_key=os.environ["GEMINI_API_KEY"])

    parts, parts_text = [], ""
    uploaded_files = []

    match tool_name:
        case "sample_frame":
            # add user-action intro message
            template_prefix = template_components["tools"]["sample_frame"]
            start = convert_time(
                example["video"]["start"] - example["video"]["start"], "second2string"
            )
            end = convert_time(
                example["video"]["end"] - example["video"]["start"], "second2string"
            )
            template_prefix = template_prefix.replace("{start}", start).replace(
                "{end}", end
            )
            parts.append(types.Part(text=template_prefix))
            parts_text += template_prefix + "\n"

            filepaths_and_ids, fps = tool_sample_frame(
                args,
                example,
                example["video"]["start"],
                example["video"]["end"],
                "center",
            )
            for filepath, idx in filepaths_and_ids:
                parts.append(types.Part(text=f"Frame {idx}"))
                file = client.files.upload(file=filepath)
                part_from_uri = types.Part.from_uri(
                    file_uri=file.uri,
                    mime_type=file.mime_type,
                )
                parts.append(part_from_uri)
                parts_text += f"Frame {idx}: <{str(filepath)}>\n"
                uploaded_files.append(file)

            template_fps = template_components["tools"]["fps_info"]
            template_fps = template_fps.replace("{fps}", f"{fps:.2f}")
            parts.append(types.Part(text=template_fps))
            parts_text += template_fps + "\n"

            template_affix = template_components["others"]["affix"]
            parts.append(types.Part(text=template_affix))
            parts_text += template_affix + "\n"
        case "check_instruction":
            # assume the mode is text
            template = template_components["tools"]["check_instruction"]
            template = template.replace("{graph}", instructions["dot"])
            parts.append(types.Part(text=template))
            parts_text += template + "\n"

            template_affix = template_components["others"]["affix"]
            parts.append(types.Part(text=template_affix))
            parts_text += template_affix + "\n"
        case "check_final_picture":
            template = template_components["tools"]["check_final_picture"]
            parts.append(types.Part(text=template))
            parts_text += template + "\n"

            file = client.files.upload(file=instructions["parts"])
            part_from_uri = types.Part.from_uri(
                file_uri=file.uri,
                mime_type=file.mime_type,
            )
            parts_text += str(instructions["parts"]) + "\n"
            uploaded_files.append(file)

            template_affix = template_components["others"]["affix"]
            parts.append(types.Part(text=template_affix))
            parts_text += template_affix + "\n"
        case "finish":
            template = template_components["user"]["answer"]
            template = template.replace("{question}", example["question"])
            parts.append(types.Part(text=template))
            parts_text += template + "\n"
        case _:
            logging.error(f"Undefined tool: {tool_name}")

    return parts, parts_text, uploaded_files


def call_function_mimo(
    args, example, template_components, instructions, tool_name
) -> list:
    """
    sample_frame: uniform sample from each video
    No zoom_in

    """

    content, content_text = [], ""

    match tool_name:
        case "sample_frame":
            # add user-action intro message
            template_prefix = template_components["tools"]["sample_frame"]
            start = convert_time(
                example["video"]["start"] - example["video"]["start"], "second2string"
            )
            end = convert_time(
                example["video"]["end"] - example["video"]["start"], "second2string"
            )
            template_prefix = template_prefix.replace("{start}", start).replace(
                "{end}", end
            )
            content.append({"type": "text", "text": template_prefix})
            content_text += template_prefix + "\n"

            filepaths_and_ids, fps = tool_sample_frame(
                args,
                example,
                example["video"]["start"],
                example["video"]["end"],
                "center",
            )
            for filepath, idx in filepaths_and_ids:
                content.append({"type": "text", "text": f"Frame {idx}"})
                image_url = f"data:image/jpeg;base64,{encode_image(filepath)}"
                content.append({"type": "image_url", "image_url": {"url": image_url}})

                content_text += f"Frame {idx}: <{str(filepath)}>\n"

            template_fps = template_components["tools"]["fps_info"]
            template_fps = template_fps.replace("{fps}", f"{fps:.2f}")
            content.append({"type": "text", "text": template_fps})
            content_text += template_fps + "\n"

            template_affix = template_components["others"]["affix"]
            content.append({"type": "text", "text": template_affix})
            content_text += template_affix + "\n"
        case "check_instruction":
            # assume the mode is text
            template = template_components["tools"]["check_instruction"]
            template = template.replace("{graph}", instructions["dot"])
            content.append({"type": "text", "text": template})
            content_text += template + "\n"

            template_affix = template_components["others"]["affix"]
            content.append({"type": "text", "text": template_affix})
            content_text += template_affix + "\n"
        case "check_final_picture":
            template = template_components["tools"]["check_final_picture"]
            content.append({"type": "text", "text": template})
            content_text += template + "\n"

            image_url = (
                f"data:image/jpeg;base64,{encode_image(str(instructions['parts']))}"
            )
            content.append({"type": "image_url", "image_url": {"url": image_url}})
            content_text += str(instructions["parts"]) + "\n"

            template_affix = template_components["others"]["affix"]
            content.append({"type": "text", "text": template_affix})
            content_text += template_affix + "\n"
        case "finish":
            template = template_components["user"]["answer"]
            template = template.replace("{question}", example["question"])
            content.append({"type": "text", "text": template})
            content_text += template + "\n"
        case _:
            logging.error(f"Undefined tool: {tool_name}")

    return content, content_text


def call_function(args, example, template_components, instructions, tool_name):
    uploaded_files = []
    match args.model_id:
        case x if x in OPENAI_MODELS:
            output, output_text = call_function_openai(
                args, example, template_components, instructions, tool_name
            )
        case x if x in GOOGLE_MODELS:
            output, output_text, uploaded_files = call_function_google(
                args, example, template_components, instructions, tool_name
            )
        case x if x in MIMO_MODELS:
            output, output_text = call_function_mimo(
                args, example, template_components, instructions, tool_name
            )
        case _:
            sys.exit(f"Undefined (call function) {args.model_id}")

    return output, output_text, uploaded_files
