"""
text-oriented tools that

"""

import logging
import json
import sys
import os
from collections import defaultdict

from utils import (
    sample_frame,
    convert_time,
    convert_angle,
    get_fps,
)

from utils_api import (
    encode_image,
    OPENAI_MODELS,
    # ANTHROPIC_MODELS,
    GOOGLE_MODELS,
    QWEN_MODELS,
    MIMO_MODELS,
)

# from utils import (
#     # convert_time,
#     # convert_angle,
#     # sample_frame,
#     # get_fps,
# )
from tools import (
    # tool_sample_frame,
    tool_zoom_in,
)
from utils_api import (
    call_api_single,
)

from google import genai
from google.genai import types


def tool_sample_frame(args, example, tool_args):
    """
    sample specified number of frames from a recording of a specified angle

    """

    try:
        start = (
            convert_time(tool_args["start_time"], "string2second")
            + example["video"]["start"]
        )
        end = (
            convert_time(tool_args["end_time"], "string2second")
            + example["video"]["start"]
        )
    except Exception:
        logging.exception("Error in tool_sample_frame")
        logging.error(f"{tool_args=}")

    dirpath = (
        args.dirpath_frame
        / example["sequence_id"]
        / convert_angle(tool_args["angle"], "string2file")
    )
    filepaths_sampled_frames = sample_frame(
        dirpath,
        start,
        end,
        args.max_frames,
    )
    fps = get_fps(dirpath, start, end, args.max_frames)
    filepaths_and_ids = []
    for filepath_frame in filepaths_sampled_frames:
        second = int(filepath_frame.stem)
        second_adjusted = f"{(second-int(example['video']['start'])):04d}"
        filepaths_and_ids.append([str(filepath_frame), second_adjusted])
    return filepaths_and_ids, fps


def call_function_openai(
    args, example, template_components, instructions, response
) -> list:
    record = []

    tool_name = response.name
    tool_args = json.loads(response.arguments)
    tool_call_id = response.call_id

    # prepare input
    content, content_text = [], ""
    match tool_name:
        case "sample_frame":
            content.append(
                {
                    "type": "input_text",
                    "text": template_components["caption"]["sample_frame"],
                }
            )
            filepaths_and_ids, fps = tool_sample_frame(args, example, tool_args)
            for filepath, idx in filepaths_and_ids:
                content.append({"type": "input_text", "text": f"Frame {idx}"})
                content.append(
                    {
                        "type": "input_image",
                        "image_url": f"data:image/jpeg;base64,{encode_image(filepath)}",
                    }
                )
                content_text += f"Frame {idx}: <{filepath}>\n"
        case "zoom_in":
            filepath = tool_zoom_in(args, example, tool_args)
            content = [
                {
                    "type": "input_text",
                    "text": template_components["caption"]["zoom_in"],
                },
                {
                    "type": "input_image",
                    "image_url": f"data:image/jpeg;base64,{encode_image(filepath)}",
                },
            ]
            content_text = (
                template_components["caption"]["zoom_in"] + f"\n{str(filepath)}"
            )
        case "check_instruction":
            pass
        case "check_final_picture":
            filepath = instructions["parts"]
            content = [
                {
                    "type": "input_text",
                    "text": template_components["caption"]["check_final_picture"],
                },
                {
                    "type": "input_image",
                    "image_url": f"data:image/jpeg;base64,{encode_image(filepath)}",
                },
            ]
            content_text = (
                template_components["caption"]["check_final_picture"]
                + f"\n{str(filepath)}"
            )
        case _:
            logging.error(f"Undefined tool: {tool_name} {tool_args}")

    # caption
    caption, summary, usage = "", "", defaultdict(int)
    if content:
        messages_caption = [{"role": "user", "content": content}]
        record.append(["caption", content_text])
        response, usage = call_api_single(args, "", messages_caption)
        ## postprocess
        for _response in response.output:
            match _response.type:
                case "message":
                    caption += "\n".join([x.text for x in _response.content])
                case "reasoning":
                    summary += "\n".join([x.text for x in _response.summary])
                case _:
                    logging.error(f"Undefined {_response.type=}")
        record.append(["assistant", [summary, caption]])

    # return message
    messages = []
    match tool_name:
        case "sample_frame":
            template = template_components["tools"]["sample_frame"]
            output_text = template.replace("{caption}", caption)
            messages.append(
                {
                    "type": "function_call_output",
                    "call_id": tool_call_id,
                    "output": output_text,
                }
            )
            record.append(["function_call_output", output_text])
        case "zoom_in":
            template = template_components["tools"]["zoom_in"]
            output_text = template.replace("{caption}", caption)
            messages.append(
                {
                    "type": "function_call_output",
                    "call_id": tool_call_id,
                    "output": output_text,
                }
            )
            record.append(["function_call_output", output_text])
        case "check_instruction":
            template = template_components["tools"]["check_instruction"]["text"]
            output_text = template.replace("{graph}", instructions["dot"])
            messages.append(
                {
                    "type": "function_call_output",
                    "call_id": tool_call_id,
                    "output": output_text,
                }
            )
            record.append(["function_call_output", output_text])
        case "check_final_picture":
            template = template_components["tools"]["check_final_picture"]
            output_text = template.replace("{caption}", caption)
            messages.append(
                {
                    "type": "function_call_output",
                    "call_id": tool_call_id,
                    "output": output_text,
                }
            )
            record.append(["function_call_output", output_text])
        case _:
            logging.error(f"Undefined tool: {tool_name} {tool_args}")

    return messages, record, usage


def call_function_google(
    args, example, template_components, instructions, response
) -> list:
    record = []

    client = genai.Client(api_key=os.environ["GEMINI_API_KEY"])

    tool_name = response.name
    tool_args = response.args
    # tool_call_id = response.call_id

    # prepare input
    parts_caption, parts_caption_text = [], ""
    uploaded_files = []
    match tool_name:
        case "sample_frame":
            template_sample = template_components["caption"]["sample_frame"]
            parts_caption.append(types.Part(text=template_sample))

            filepaths_and_ids, fps = tool_sample_frame(args, example, tool_args)
            for filepath, idx in filepaths_and_ids:
                parts_caption.append(types.Part(text=f"Frame {idx}"))
                file = client.files.upload(file=filepath)
                parts_caption.append(
                    types.Part.from_uri(
                        file_uri=file.uri,
                        mime_type=file.mime_type,
                    )
                )
                parts_caption_text += f"Frame {idx}: {filepath}\n"
                uploaded_files.append(file)
        case "zoom_in":
            template_zoom = template_components["caption"]["zoom_in"]
            parts_caption.append(types.Part(text=template_zoom))

            filepath = tool_zoom_in(args, example, tool_args)
            file = client.files.upload(file=filepath)
            parts_caption.append(
                types.Part.from_uri(
                    file_uri=file.uri,
                    mime_type=file.mime_type,
                )
            )
            parts_caption_text += f"{template_zoom} {str(filepath)}\n"
            uploaded_files.append(file)
        case "check_instruction":
            pass
        case "check_final_picture":
            template_pic = template_components["caption"]["check_final_picture"]
            parts_caption.append(types.Part(text=template_pic))

            filepath = instructions["parts"]
            file = client.files.upload(file=filepath)
            parts_caption.append(
                types.Part.from_uri(
                    file_uri=file.uri,
                    mime_type=file.mime_type,
                )
            )
            parts_caption_text += f"{template_pic} {str(filepath)}\n"
            uploaded_files.append(file)
        case _:
            logging.error(f"Undefined tool: {tool_name} {tool_args}")

    # caption
    caption, summary, usage = "", "", defaultdict(int)
    if parts_caption:
        contents = [types.Content(role="user", parts=parts_caption)]
        record.append(["caption", parts_caption_text])
        response_caption, usage = call_api_single(args, "", contents)

        if response_caption.candidates:
            ## postprocess
            logging.info(f"debug: {response_caption=}")
            for part in response_caption.candidates[0].content.parts:
                if part.thought:
                    summary += part.text
                elif part.text:
                    caption += part.text
                else:
                    logging.error(f"Undefined {part=}")
            record.append(["assistant", [summary, caption]])
        else:
            caption = "No caption returned."
            record.append(["assistant", [summary, caption]])

    # return message
    match tool_name:
        case "sample_frame":
            template = template_components["tools"]["sample_frame"]
            output_text = template.replace("{caption}", caption)
        case "zoom_in":
            template = template_components["tools"]["zoom_in"]
            output_text = template.replace("{caption}", caption)
        case "check_instruction":
            template = template_components["tools"]["check_instruction"]["text"]
            output_text = template.replace("{graph}", instructions["dot"])
        case "check_final_picture":
            template = template_components["tools"]["check_final_picture"]
            output_text = template.replace("{caption}", caption)
        case _:
            logging.error(f"Undefined tool: {tool_name} {tool_args}")
    parts_output = [
        types.Part.from_function_response(
            name=tool_name, response={"result": output_text}
        )
    ]
    record.append(["function_call_output", output_text])

    return parts_output, record, uploaded_files, usage


def call_function_qwen(
    args, example, template_components, instructions, response
) -> list:
    record = []

    tool_name = response.name
    tool_args = json.loads(response.arguments)
    # tool_call_id = response.id

    # prepare input
    content_caption, content_caption_text = [], ""
    match tool_name:
        case "sample_frame":
            content_caption.append(
                {
                    "type": "text",
                    "text": template_components["caption"]["sample_frame"],
                }
            )
            content_caption_text += template_components["caption"]["sample_frame"]

            filepaths_and_ids, fps = tool_sample_frame(args, example, tool_args)
            for filepath, idx in filepaths_and_ids:
                content_caption.append({"type": "text", "text": f"Frame {idx}"})
                file_data = f"data:image/jpeg;base64,{encode_image(filepath)}"
                content_caption.append(
                    {"type": "image_url", "image_url": {"url": file_data}}
                )
                content_caption_text += f"Frame {idx}: <{filepath}>\n"
        case "zoom_in":
            filepath = tool_zoom_in(args, example, tool_args)
            if filepath.startswith("Coordinates"):  # handling non-valid request
                content_caption.append({"type": "text", "text": filepath})
            else:
                content_caption.append(
                    {
                        "type": "text",
                        "text": template_components["caption"]["zoom_in"],
                    }
                )
                file_data = f"data:image/jpeg;base64,{encode_image(filepath)}"
                content_caption.append(
                    {"type": "image_url", "image_url": {"url": file_data}}
                )
                content_caption_text += (
                    template_components["caption"]["zoom_in"] + f"\n{str(filepath)}"
                )
        case "check_instruction":
            pass
        case "check_final_picture":
            content_caption.append(
                {
                    "type": "text",
                    "text": template_components["caption"]["check_final_picture"],
                }
            )
            file_data = f"data:image/jpeg;base64,{encode_image(instructions['parts'])}"
            content_caption.append({"type": "image_url", "image_url": {"url": file_data}})
            content_caption_text = (
                template_components["caption"]["check_final_picture"]
                + f"\n{str(instructions['parts'])}"
            )
        case _:
            logging.error(f"Undefined tool: {tool_name} {tool_args}")

    # caption
    caption = ""
    if content_caption:
        messages_caption = [{"role": "user", "content": content_caption}]
        record.append(["caption-user", content_caption_text])
        response, usage = call_api_single(args, "", messages_caption)
        ## postprocess
        if len(response.choices) > 1:  # sanity check
            logging.warning("[Debug] Multiple choices/candidates in response")
        message = response.choices[0].message
        if message.content:
            caption = message.content
            record.append(["caption-assistant", message.content])

    messages = []
    match tool_name:
        case "sample_frame":
            template = template_components["tools"]["sample_frame"]
            output = template.replace("{caption}", caption)
        case "zoom_in":
            template = template_components["tools"]["zoom_in"]
            output = template.replace("{caption}", caption)
        case "check_instruction":
            template = template_components["tools"]["check_instruction"]["text"]
            output = template.replace("{graph}", instructions["dot"])
        case "check_final_picture":
            template = template_components["tools"]["check_final_picture"]
            output = template.replace("{caption}", caption)
        case _:
            logging.error(f"Undefined tool: {tool_name} {tool_args}")
    messages.append({"role": "function", "content": output})
    record.append(["function", output])

    return messages, record


def call_function_mimo(
    args, example, template_components, instructions, response
) -> list:
    record = []

    tool_name = response.name
    tool_args = json.loads(response.arguments)
    # tool_call_id = response.id

    # prepare input
    content_caption, content_caption_text = [], ""
    match tool_name:
        case "sample_frame":
            content_caption.append(
                {
                    "type": "text",
                    "text": template_components["caption"]["sample_frame"],
                }
            )
            content_caption_text += template_components["caption"]["sample_frame"]

            filepaths_and_ids, fps = tool_sample_frame(args, example, tool_args)
            for filepath, idx in filepaths_and_ids:
                content_caption.append({"type": "text", "text": f"Frame {idx}"})
                file_data = f"data:image/jpeg;base64,{encode_image(filepath)}"
                content_caption.append(
                    {"type": "image_url", "image_url": {"url": file_data}}
                )
                content_caption_text += f"Frame {idx}: <{filepath}>\n"
        case "zoom_in":
            filepath = tool_zoom_in(args, example, tool_args)
            if filepath.startswith("Coordinates"):  # handling non-valid request
                content_caption.append({"type": "text", "text": filepath})
            else:
                content_caption.append(
                    {
                        "type": "text",
                        "text": template_components["caption"]["zoom_in"],
                    }
                )
                file_data = f"data:image/jpeg;base64,{encode_image(filepath)}"
                content_caption.append(
                    {"type": "image_url", "image_url": {"url": file_data}}
                )
                content_caption_text += (
                    template_components["caption"]["zoom_in"] + f"\n{str(filepath)}"
                )
        case "check_instruction":
            pass
        case "check_final_picture":
            content_caption.append(
                {
                    "type": "text",
                    "text": template_components["caption"]["check_final_picture"],
                }
            )
            file_data = f"data:image/jpeg;base64,{encode_image(instructions['parts'])}"
            content_caption.append({"type": "image_url", "image_url": {"url": file_data}})
            content_caption_text = (
                template_components["caption"]["check_final_picture"]
                + f"\n{str(instructions['parts'])}"
            )
        case _:
            logging.error(f"Undefined tool: {tool_name} {tool_args}")

    # caption
    caption = ""
    if content_caption:
        messages_caption = [{"role": "user", "content": content_caption}]
        record.append(["caption-user", content_caption_text])
        response, usage = call_api_single(args, "", messages_caption)
        ## postprocess
        if len(response.choices) > 1:  # sanity check
            logging.warning("[Debug] Multiple choices/candidates in response")
        message = response.choices[0].message
        if message.content:
            caption = message.content
            record.append(["caption-assistant", message.content])

    messages = []
    match tool_name:
        case "sample_frame":
            template = template_components["tools"]["sample_frame"]
            output = template.replace("{caption}", caption)
        case "zoom_in":
            template = template_components["tools"]["zoom_in"]
            output = template.replace("{caption}", caption)
        case "check_instruction":
            template = template_components["tools"]["check_instruction"]["text"]
            output = template.replace("{graph}", instructions["dot"])
        case "check_final_picture":
            template = template_components["tools"]["check_final_picture"]
            output = template.replace("{caption}", caption)
        case _:
            logging.error(f"Undefined tool: {tool_name} {tool_args}")
    messages.append({"role": "tool", "content": output})
    record.append(["tool", output])

    return messages, record


def call_function(args, example, template_components, instructions, response):
    usage = defaultdict(int)
    uploaded_files = []
    match args.model_id:
        case x if x in OPENAI_MODELS:
            output, output_text, usage = call_function_openai(
                args, example, template_components, instructions, response
            )
        case x if x in GOOGLE_MODELS:
            output, output_text, uploaded_files, usage = call_function_google(
                args, example, template_components, instructions, response
            )
        case x if x in QWEN_MODELS:
            output, output_text = call_function_qwen(
                args, example, template_components, instructions, response
            )
        case x if x in MIMO_MODELS:
            output, output_text = call_function_mimo(
                args, example, template_components, instructions, response
            )
        case _:
            sys.exit(f"Undefined (call function) {args.model_id}")

    return output, output_text, uploaded_files, usage
