import logging
import yaml
from PIL import Image
from pathlib import Path
import json
import sys
import os

from utils_api import (
    encode_image,
    OPENAI_MODELS,
    ANTHROPIC_MODELS,
    GOOGLE_MODELS,
    QWEN_MODELS,
    MIMO_MODELS,
    INTERNVL_MODELS,
)

from utils import (
    convert_time,
    convert_angle,
    sample_frame,
    # get_fps,
)

from google import genai
from google.genai import types


def load_tools_openai(tools_original):
    tools = []
    for tool_original in tools_original:
        args = {}
        if tool_original["args"]:
            for arg_name, arg_original in tool_original["args"].items():
                args[arg_name] = {
                    "type": arg_original["type"],
                    "description": arg_original["description"],
                }
                if "enum" in arg_original:
                    args[arg_name]["enum"] = arg_original["enum"]
                if "items" in arg_original:
                    args[arg_name]["items"] = arg_original["items"]
        else:
            pass

        tools.append(
            {
                "type": "function",
                "name": tool_original["name"],
                "description": tool_original["description"],
                "strict": True,
                "parameters": {
                    "type": "object",
                    "properties": args if args else {},
                    "required": (
                        tool_original["required"] if tool_original["required"] else []
                    ),
                    "additionalProperties": False,
                },
            }
        )
    return tools


def load_tools_anthropic(tools_original):
    tools = []
    for tool_original in tools_original:
        args = {}
        if tool_original["args"]:
            for arg_name, arg_original in tool_original["args"].items():
                args[arg_name] = {
                    "type": arg_original["type"],
                    "description": arg_original["description"],
                }
                if "enum" in arg_original:
                    args[arg_name]["enum"] = arg_original["enum"]
        else:
            pass

        tools.append(
            {
                "name": tool_original["name"],
                "description": tool_original["description"],
                "input_schema": {
                    "type": "object",
                    "properties": args if args else {},
                    "required": (
                        tool_original["required"] if tool_original["required"] else []
                    ),
                },
            }
        )
    return tools


def load_tools_google(tools_original):
    tools = []
    for tool_original in tools_original:
        args = {}
        if tool_original["args"]:
            for arg_name, arg_original in tool_original["args"].items():
                args[arg_name] = {
                    "type": arg_original["type"],
                    "description": arg_original["description"],
                }
                if "enum" in arg_original:
                    args[arg_name]["enum"] = arg_original["enum"]
                if "items" in arg_original:
                    args[arg_name]["items"] = arg_original["items"]
        else:
            pass

        tools.append(
            {
                "name": tool_original["name"],
                "description": tool_original["description"],
                "parameters": {
                    "type": "object",
                    "properties": args if args else {},
                    "required": (
                        tool_original["required"] if tool_original["required"] else []
                    ),
                },
            }
        )

    tools = [genai.types.Tool(function_declarations=tools)]

    return tools


def load_tools_qwen(tools_original):
    tools = []
    for tool_original in tools_original:
        args = {}
        if tool_original["args"]:
            for arg_name, arg_original in tool_original["args"].items():
                args[arg_name] = {
                    "type": arg_original["type"],
                    "description": arg_original["description"],
                }
                if "enum" in arg_original:
                    args[arg_name]["enum"] = arg_original["enum"]
                if "items" in arg_original:
                    args[arg_name]["items"] = arg_original["items"]
        else:
            pass

        tools.append(
            {
                "type": "function",
                "function": {
                    "name": tool_original["name"],
                    "description": tool_original["description"],
                    "parameters": {
                        "type": "object",
                        "properties": args if args else {},
                        "required": (
                            tool_original["required"] if tool_original["required"] else []
                        ),
                    },
                },
            }
        )

    return tools


def load_tools(args):
    logging.info("Load tools ...")

    with open(args.filepath_tool, "r") as f:
        tools_original = yaml.safe_load(f)

    tools = []
    match args.model_id:
        case x if x in OPENAI_MODELS:
            tools = load_tools_openai(tools_original)
        case x if x in ANTHROPIC_MODELS:
            tools = load_tools_anthropic(tools_original)
        case x if x in GOOGLE_MODELS:
            tools = load_tools_google(tools_original)
        case x if x in QWEN_MODELS + MIMO_MODELS + INTERNVL_MODELS:
            tools = load_tools_qwen(tools_original)
        case _:
            logging.error(f"Uudefined (load_tools) {args.model_id}")

    return tools


def tool_sample_frame(args, example, tool_args):
    """
    sample specified number of frames from a recording of a specified angle

    """

    try:
        start = (
            convert_time(tool_args["time_range"][0], "string2second")
            + example["video"]["start"]
        )
        end = (
            convert_time(tool_args["time_range"][1], "string2second")
            + example["video"]["start"]
        )
    except Exception:
        logging.exception("Error in tool_sample_frame")
        logging.error(f"{tool_args=}")

    dirpath = (
        args.dirpath_frame
        / example["sequence_id"]
        / convert_angle(tool_args["angle"], "string2file")
    )
    filepaths_sampled_frames = sample_frame(
        dirpath,
        start,
        end,
        args.max_frames,
    )
    filepaths_and_ids = []
    for filepath_frame in filepaths_sampled_frames:
        second = int(filepath_frame.stem)
        second_adjusted = f"{(second-int(example['video']['start'])):04d}"
        filepaths_and_ids.append([str(filepath_frame), second_adjusted])

    # fps
    if start == end:
        duration = 1
    else:
        duration = end - start
    fps = len(filepaths_and_ids) / duration

    return filepaths_and_ids, fps


def tool_zoom_in(args, example, tool_args):
    """
    zoom in one frame from a specific based on the specified region

    """
    frame_id = tool_args["frame_id"]
    angle = tool_args["angle"]
    x1, y1, x2, y2 = tool_args["bounding_box"]

    for num in [x1, y1, x2, y2]:
        if num < 0 or 1 < num:
            return "Coordinates for normalized bounding box should be between 0 and 1."
    if x1 == x2 or y1 == y2:
        return "Please select non-zero positive area for bounding box."

    image = Image.open(
        args.dirpath_frame
        / example["sequence_id"]
        / convert_angle(tool_args["angle"], "string2file")
        / f"{int(frame_id)+int(example['video']['start'])}.png"
    )
    width, height = image.size

    left = int(float(x1) * width)
    right = int(float(x2) * width)
    top = int(float(y1) * height)
    bottom = int(float(y2) * height)

    image_cropped = image.crop((left, top, right, bottom))

    sequence_id = example["sequence_id"]
    angle = args.angle
    resolution = args.dirpath_frame.name
    color = args.dirpath_frame.parent.name
    box = f"{x1}-{y1}-{x2}-{y2}"
    filepath_output_image_cropped = Path(
        args.dirpath_intermediate_output
        / "cropped_image"
        / f"{color}_{resolution}_{sequence_id}_{angle}_{frame_id}_{box}.png"
    )
    if not filepath_output_image_cropped.parent.exists():
        filepath_output_image_cropped.parent.mkdir(parents=True)
    image_cropped.save(filepath_output_image_cropped, format="png")

    return str(filepath_output_image_cropped)


def call_function_openai(
    args, example, template_components, instructions, response
) -> list:
    messages, messages_text = [], []

    tool_name = response.name
    tool_args = json.loads(response.arguments)
    tool_call_id = response.call_id

    match tool_name:
        case "sample_frame":
            # add user-action intro message
            messages.append(
                {
                    "type": "function_call_output",
                    "call_id": tool_call_id,
                    "output": template_components["tools"]["sample_frame"],
                }
            )
            messages_text.append(
                ["function_call_output", template_components["tools"]["sample_frame"]]
            )

            # TODO
            # call function
            content, filepaths_text = [], ""
            filepaths_and_ids, fps = tool_sample_frame(args, example, tool_args)  # todo
            for filepath, idx in filepaths_and_ids:
                content.append(
                    {
                        "type": "input_text",
                        "text": f"Frame {idx}",
                    }
                )
                content.append(
                    {
                        "type": "input_image",
                        "image_url": f"data:image/jpeg;base64,{encode_image(filepath)}",
                    }
                )
                filepaths_text += f"Frame {idx}: <{filepath}>\n"

            if fps == 1:
                template = template_components["tools"]["fps_info"]
            else:
                template = (
                    template_components["tools"]["change_of_fps"]
                    .replace("{max_frames}", str(args.max_frames))
                    .replace("{fps}", f"{fps:.1f}")
                )
            content.append({"type": "input_text", "text": template})
            filepaths_text += template + "\n"

            # add function_output as user's message because of images
            messages.append({"role": "user", "content": content})
            messages_text.append(["user", filepaths_text])

        case "zoom_in":
            messages.append(
                {
                    "type": "function_call_output",
                    "call_id": tool_call_id,
                    "output": template_components["tools"]["zoom_in"],
                }
            )
            messages_text.append(
                ["function_call_output", template_components["tools"]["zoom_in"]]
            )

            # TODO
            filepath = tool_zoom_in(args, example, tool_args)
            content = [
                {
                    "type": "input_image",
                    "image_url": f"data:image/jpeg;base64,{encode_image(filepath)}",
                }
            ]

            messages.append({"role": "user", "content": content})
            messages_text.append(["user", filepath])

        case "check_instruction":
            if tool_args["mode"] == "text":
                output = template_components["tools"]["check_instruction"][
                    "text"
                ].replace("{graph}", instructions["dot"])
                messages.append(
                    {
                        "type": "function_call_output",
                        "call_id": tool_call_id,
                        "output": output,
                    }
                )
                messages_text.append(["function_call_output", output])

            elif tool_args["mode"] == "image":
                messages.append(
                    {
                        "type": "function_call_output",
                        "call_id": tool_call_id,
                        "output": template_components["tools"]["check_instruction"][
                            "image"
                        ],
                    }
                )
                messages_text.append(
                    [
                        "function_call_output",
                        template_components["tools"]["check_instruction"]["image"],
                    ]
                )

                content = [
                    {
                        "type": "input_image",
                        "image_url": f"data:image/jpeg;base64,{encode_image(str(instructions['dag']))}",
                    }
                ]
                messages.append({"role": "user", "content": content})
                messages_text.append(["user", str(instructions["dag"])])

            else:
                logging.error(f"Undefined mode: {tool_args['mode']}")
        case "check_final_picture":
            messages.append(
                {
                    "type": "function_call_output",
                    "call_id": tool_call_id,
                    "output": template_components["tools"]["check_final_picture"],
                }
            )
            messages_text.append(
                [
                    "function_call_output",
                    template_components["tools"]["check_final_picture"],
                ]
            )

            content = [
                {
                    "type": "input_image",
                    "image_url": f"data:image/jpeg;base64,{encode_image(str(instructions['parts']))}",
                }
            ]
            messages.append({"role": "user", "content": content})
            messages_text.append(["user", str(instructions["parts"])])
        case _:
            logging.error(f"Undefined tool: {tool_name} {tool_args}")

    return messages, messages_text


def call_function_anthropic(
    args, example, template_components, instructions, response
) -> list:
    content, content_text = [], ""

    tool_name = response.name
    tool_args = response.input

    match tool_name:
        case "sample_frame":
            # ad-hoc process
            if isinstance(tool_args["time_range"], str):
                splits = [
                    x.strip() for x in tool_args["time_range"].strip("[]").split(",")
                ]
                tool_args["time_range"] = splits

            # function call
            filepaths_and_ids, fps = tool_sample_frame(args, example, tool_args)  # todo

            # process
            for filepath, idx in filepaths_and_ids:
                content.append(
                    {
                        "type": "text",
                        "text": f"Frame {idx}",
                    }
                )
                extension = Path(filepath).suffix.replace(".", "").lower()
                content.append(
                    {
                        "type": "image",
                        "source": {
                            "type": "base64",
                            "media_type": f"image/{extension}",
                            "data": encode_image(filepath),
                        },
                    }
                )
                content_text += f"Frame {idx}: <{filepath}>\n"

            if fps == 1:
                template = template_components["tools"]["fps_info"]
            else:
                template = (
                    template_components["tools"]["change_of_fps"]
                    .replace("{max_frames}", str(args.max_frames))
                    .replace("{fps}", f"{fps:.1f}")
                )
            # add fps-related info
            content.append({"type": "text", "text": template})
            content_text += template + "\n"
        case "zoom_in":
            # function call
            filepath = tool_zoom_in(args, example, tool_args)

            extension = Path(filepath).suffix.replace(".", "").lower()
            content.append(
                {
                    "type": "image",
                    "source": {
                        "type": "base64",
                        "media_type": f"image/{extension}",
                        "data": encode_image(filepath),
                    },
                }
            )
            content_text += f"<{filepath}>"
        case "check_instruction":
            if tool_args["mode"] == "text":
                content.append({"type": "text", "text": instructions["dot"]})
                content_text += f"<{instructions['dot']}>\n"
            elif tool_args["mode"] == "image":
                extension = Path(instructions["dag"]).suffix.replace(".", "").lower()
                content.append(
                    {
                        "type": "image",
                        "source": {
                            "type": "base64",
                            "media_type": f"image/{extension}",
                            "data": encode_image(instructions["dag"]),
                        },
                    }
                )
                content_text += f"<{instructions['dag']}>\n"
            else:
                logging.error(f"Undefined mode: {tool_args['mode']}")
        case "check_final_picture":
            extension = Path(instructions["parts"]).suffix.replace(".", "").lower()
            content.append(
                {
                    "type": "image",
                    "source": {
                        "type": "base64",
                        "media_type": f"image/{extension}",
                        "data": encode_image(instructions["parts"]),
                    },
                }
            )
            content_text += f"<{instructions['parts']}>\n"
        case _:
            logging.error(f"Undefined tool: {tool_name} {tool_args}")

    return content, content_text


def call_function_google(
    args, example, template_components, instructions, response
) -> list:
    client = genai.Client(api_key=os.environ["GEMINI_API_KEY"])

    parts, parts_text, uploaded_files = [], [], []

    tool_name = response.name
    tool_args = response.args

    match tool_name:
        case "sample_frame":
            # function call
            filepaths_and_ids, fps = tool_sample_frame(args, example, tool_args)  # todo

            # process
            sampled_image_filepaths_text = ""
            for filepath, idx in filepaths_and_ids:
                text_part = types.Part.from_function_response(
                    name=tool_name,
                    response={"result": f"Frame {idx}"},
                )
                parts.append(text_part)

                file = client.files.upload(file=filepath)
                part_from_uri = types.Part.from_uri(
                    file_uri=file.uri,
                    mime_type=file.mime_type,
                )
                part_from_function_response = types.Part.from_function_response(
                    name=tool_name,
                    response={"result": part_from_uri},
                )
                parts.append(part_from_function_response)
                uploaded_files.append(file)

                sampled_image_filepaths_text += f"Frame {idx}: <{filepath}>\n"
            parts_text.append(sampled_image_filepaths_text)

            if fps == 1:
                template = template_components["tools"]["fps_info"]
            else:
                template = (
                    template_components["tools"]["change_of_fps"]
                    .replace("{max_frames}", str(args.max_frames))
                    .replace("{fps}", f"{fps:.1f}")
                )
            # add fps-related info
            parts.append(types.Part(text=template))
            parts_text.append(template)

        case "zoom_in":
            # function call
            filepath = tool_zoom_in(args, example, tool_args)

            # convert uploaded file to Part (uri) then Part (function_response)
            file_uploaded = client.files.upload(file=filepath)
            part_from_uri = types.Part.from_uri(
                file_uri=file_uploaded.uri,
                mime_type=file_uploaded.mime_type,
            )
            part_from_function_response = types.Part.from_function_response(
                name=tool_name,
                response={"result": part_from_uri},
            )
            parts.append(part_from_function_response)
            uploaded_files.append(file_uploaded)
            parts_text.append(f"<{filepath}>")

            logging.info(f"debug {filepath=}")

        case "check_instruction":
            if tool_args["mode"] == "text":
                part_from_function_response = types.Part.from_function_response(
                    name=tool_name,
                    response={"result": instructions["dot"]},
                )
                parts.append(part_from_function_response)
                parts_text.append(["function_response", instructions["dot"]])
            elif tool_args["mode"] == "image":
                file_uploaded = client.files.upload(file=instructions["dag"])
                part_from_uri = types.Part.from_uri(
                    file_uri=file_uploaded.uri,
                    mime_type=file_uploaded.mime_type,
                )
                part_from_function_response = types.Part.from_function_response(
                    name=tool_name,
                    response={"result": part_from_uri},
                )
                parts.append(part_from_function_response)
                uploaded_files.append(file_uploaded)
                parts_text.append(f"<{str(instructions['dag'])}>")
            else:
                logging.error(f"Undefined mode: {tool_args['mode']}")
        case "check_final_picture":
            file_uploaded = client.files.upload(file=instructions["parts"])
            part_from_uri = types.Part.from_uri(
                file_uri=file_uploaded.uri,
                mime_type=file_uploaded.mime_type,
            )
            part_from_function_response = types.Part.from_function_response(
                name=tool_name,
                response={"result": part_from_uri},
            )
            parts.append(part_from_function_response)
            uploaded_files.append(file_uploaded)
            parts_text.append(f"<{str(instructions['parts'])}>")
        case _:
            logging.error(f"Undefined tool: {tool_name} {tool_args}")

    return parts, parts_text, uploaded_files


def call_function_qwen(
    args, example, template_components, instructions, response
) -> list:
    message, message_text = None, None

    tool_name = response.name
    tool_args = json.loads(response.arguments)
    # tool_call_id = response.id

    match tool_name:
        case "sample_frame":
            # call function
            content, filepaths_text = [], ""
            filepaths_and_ids, fps = tool_sample_frame(args, example, tool_args)  # todo
            # put video then list of frame text
            for filepath, idx in filepaths_and_ids:
                content.append({"type": "text", "text": f"Frame {idx}"})
                file_data = f"data:image/jpeg;base64,{encode_image(filepath)}"
                content.append({"type": "image_url", "image_url": {"url": file_data}})
                filepaths_text += f"Frame {idx}: <{filepath}>\n"

            # add fps related info
            if fps == 1:
                template = template_components["tools"]["fps_info"]
            else:
                template = (
                    template_components["tools"]["change_of_fps"]
                    .replace("{max_frames}", str(args.max_frames))
                    .replace("{fps}", f"{fps:.1f}")
                )
            content.append({"type": "text", "text": template})
            filepaths_text += template + "\n"

            # add function_output as user's message because of images
            message = {"role": "function", "content": content}
            message_text = ["function", filepaths_text]

        case "zoom_in":
            filepath = tool_zoom_in(args, example, tool_args)
            if filepath.startswith("Coordinates"):
                content = [{"type": "text", "text": filepath}]
            else:
                file_data = f"data:image/jpeg;base64,{encode_image(filepath)}"
                content = [{"type": "image_url", "image_url": {"url": file_data}}]

            message = {"role": "function", "content": content}
            message_text = ["function", f"<{filepath}>"]

        case "check_instruction":
            if tool_args["mode"] == "text":
                content = template_components["tools"]["check_instruction"][
                    "text"
                ].replace("{graph}", instructions["dot"])
                message = {"role": "function", "content": content}
                message_text = ["function", content]

            elif tool_args["mode"] == "image":
                file_data = f"data:image/jpeg;base64,{encode_image(instructions['dag'])}"
                content = [{"type": "image_url", "image_url": {"url": file_data}}]

                message = {"role": "function", "content": content}
                message_text = ["function", f"<{str(instructions['dag'])}>"]
            else:
                logging.error(f"Undefined mode: {tool_args['mode']}")
        case "check_final_picture":
            file_data = f"data:image/jpeg;base64,{encode_image(instructions['parts'])}"
            content = [{"type": "image_url", "image_url": {"url": file_data}}]
            message = {"role": "function", "content": content}
            message_text = ["function", f"<{str(instructions['parts'])}>"]
        case _:
            logging.error(f"Undefined tool: {tool_name} {tool_args}")

    return message, message_text


def call_function_qwen_user_for_image(
    args, example, template_components, instructions, response
) -> list:
    """
    Include images from functions under users.

    """
    messages, messages_text = [], []

    tool_name = response.name
    tool_args = json.loads(response.arguments)
    # tool_call_id = response.id

    match tool_name:
        case "sample_frame":
            content_function = ""

            # call function
            filepaths_and_ids, fps = tool_sample_frame(args, example, tool_args)  # todo

            # add fps related info
            if fps == 1:
                content_function += template_components["tools"]["fps_info"] + "\n"
            else:
                content_function += (
                    template_components["tools"]["change_of_fps"]
                    .replace("{max_frames}", str(args.max_frames))
                    .replace("{fps}", f"{fps:.1f}")
                ) + "\n"
            # add user-action intro message
            content_function += template_components["tools"]["sample_frame"]

            messages.append({"role": "function", "content": content_function})
            messages_text.append(["function", content_function])

            content, filepaths_text = [], ""
            # put video then list of frame text
            for filepath, idx in filepaths_and_ids:
                content.append({"type": "text", "text": f"Frame {idx}"})
                file_data = f"data:image/jpeg;base64,{encode_image(filepath)}"
                content.append({"type": "image_url", "image_url": {"url": file_data}})
                filepaths_text += f"Frame {idx}: <{filepath}>\n"

            # add function_output as user's message because of images
            messages.append({"role": "user", "content": content})
            messages_text.append(["user", filepaths_text])

        case "zoom_in":
            content_intro = template_components["tools"]["zoom_in"]
            messages.append({"role": "function", "content": content_intro})
            messages_text.append(["function", content_intro])

            filepath = tool_zoom_in(args, example, tool_args)
            if filepath.startswith("Coordinates"):
                content = [{"type": "text", "text": filepath}]
            else:
                file_data = f"data:image/jpeg;base64,{encode_image(filepath)}"
                content = [{"type": "image_url", "image_url": {"url": file_data}}]

            messages.append({"role": "user", "content": content})
            messages_text.append(["user", f"<{filepath}>"])

        case "check_instruction":
            if tool_args["mode"] == "text":
                content = template_components["tools"]["check_instruction"][
                    "text"
                ].replace("{graph}", instructions["dot"])
                messages.append({"role": "function", "content": content})
                messages_text.append(["function", content])

            elif tool_args["mode"] == "image":
                content_intro = template_components["tools"]["check_instruction"]["image"]
                messages.append({"role": "function", "content": content_intro})
                messages_text.append(["function", content_intro])

                file_data = f"data:image/jpeg;base64,{encode_image(instructions['dag'])}"
                content = [{"type": "image_url", "image_url": {"url": file_data}}]

                messages.append({"role": "user", "content": content})
                messages_text.append(["user", f"<{str(instructions['dag'])}>"])
            else:
                logging.error(f"Undefined mode: {tool_args['mode']}")
        case "check_final_picture":
            content_intro = template_components["tools"]["check_final_picture"]
            messages.append({"role": "function", "content": content_intro})
            messages_text.append(["function", content_intro])

            file_data = f"data:image/jpeg;base64,{encode_image(instructions['parts'])}"
            content = [{"type": "image_url", "image_url": {"url": file_data}}]
            messages.append({"role": "user", "content": content})
            messages_text.append(["user", f"<{str(instructions['parts'])}>"])
        case _:
            logging.error(f"Undefined tool: {tool_name} {tool_args}")

    return messages, messages_text


def call_function_mimo(
    args, example, template_components, instructions, response
) -> list:
    message, message_text = None, None

    tool_name = response.name
    tool_args = json.loads(response.arguments)
    # tool_call_id = response.id

    match tool_name:
        case "sample_frame":
            # call function
            content, filepaths_text = [], ""
            filepaths_and_ids, fps = tool_sample_frame(args, example, tool_args)  # todo
            # put video then list of frame text
            for filepath, idx in filepaths_and_ids:
                content.append({"type": "text", "text": f"Frame {idx}"})
                file_data = f"data:image/jpeg;base64,{encode_image(filepath)}"
                content.append({"type": "image_url", "image_url": {"url": file_data}})
                filepaths_text += f"Frame {idx}: <{filepath}>\n"

            # add fps related info
            if fps == 1:
                template = template_components["tools"]["fps_info"]
            else:
                template = (
                    template_components["tools"]["change_of_fps"]
                    .replace("{max_frames}", str(args.max_frames))
                    .replace("{fps}", f"{fps:.1f}")
                )
            content.append({"type": "text", "text": template})
            filepaths_text += template + "\n"

            # add function_output as user's message because of images
            message = {"role": "tool", "content": content}
            message_text = ["tool", filepaths_text]

        case "zoom_in":
            filepath = tool_zoom_in(args, example, tool_args)
            if filepath.startswith("Coordinates"):
                content = [{"type": "text", "text": filepath}]
            else:
                file_data = f"data:image/jpeg;base64,{encode_image(filepath)}"
                content = [{"type": "image_url", "image_url": {"url": file_data}}]

            message = {"role": "tool", "content": content}
            message_text = ["tool", f"<{filepath}>"]

        case "check_instruction":
            if tool_args["mode"] == "text":
                content = template_components["tools"]["check_instruction"][
                    "text"
                ].replace("{graph}", instructions["dot"])
                message = {"role": "tool", "content": content}
                message_text = ["tool", content]

            elif tool_args["mode"] == "image":
                file_data = f"data:image/jpeg;base64,{encode_image(instructions['dag'])}"
                content = [{"type": "image_url", "image_url": {"url": file_data}}]

                message = {"role": "tool", "content": content}
                message_text = ["tool", f"<{str(instructions['dag'])}>"]
            else:
                logging.error(f"Undefined mode: {tool_args['mode']}")
        case "check_final_picture":
            file_data = f"data:image/jpeg;base64,{encode_image(instructions['parts'])}"
            content = [{"type": "image_url", "image_url": {"url": file_data}}]
            message = {"role": "tool", "content": content}
            message_text = ["tool", f"<{str(instructions['parts'])}>"]
        case _:
            logging.error(f"Undefined tool: {tool_name} {tool_args}")

    return message, message_text


def call_function(args, example, template_components, instructions, response):
    uploaded_files = []
    match args.model_id:
        case x if x in OPENAI_MODELS:
            output, output_text = call_function_openai(
                args, example, template_components, instructions, response
            )
        case x if x in ANTHROPIC_MODELS:
            output, output_text = call_function_anthropic(
                args, example, template_components, instructions, response
            )
        case x if x in GOOGLE_MODELS:
            output, output_text, uploaded_files = call_function_google(
                args, example, template_components, instructions, response
            )
        case x if x in QWEN_MODELS:
            # check if this version works better <= no it did not
            # output, output_text = call_function_qwen_user_for_image(
            #     args, example, template_components, instructions, response
            # )
            output, output_text = call_function_qwen(
                args, example, template_components, instructions, response
            )
        case x if x in MIMO_MODELS + INTERNVL_MODELS:
            output, output_text = call_function_mimo(
                args, example, template_components, instructions, response
            )
        case _:
            sys.exit(f"Undefined (call function) {args.model_id}")

    return output, output_text, uploaded_files
