# -*- coding:utf-8 -*-

"""
agent calling methods
"""

import requests
import json
import uuid
import logging
import base64
import numpy as np
import pandas as pd
import struct
import re
import time
from openai import OpenAI
import faiss


SYSTEM = """"""

USER = """"""





def llm_qwen32b(user_input, instruction='', stream=False):
    """
    internal service that call qwen32b model to complete some tasks (eg. get intention, rewrite query)
    :param user_input: user query
    :param instruction: instruction of current task
    :param stream: if output in stream format
    :return: output
    """
    return None

def call_qwen_messages(model, messages):
    """
    internal service that call random qwen model to complete scoring tasks
    :param model: model_name
    :param messages: input messages
    :return: output
    """
    return None

def call_llm_messages(model, messages):
    """
    internal service that call big llm model to complete scoring tasks
    :param model: model_name
    :param messages: input messages
    :return: output
    """
    return None

def call_intention_service(query, history):
    # qwen32b model

    system_prompt = """"""
    if len(history) > 0:
        instruction_chat = 'Historical dialogue：\n{0}\nCurrent Issues：\n{1}'
        template = ""
        for (q, a) in history:
            template += "user：{}\nassistant：{}n".format(q, a)
        conv = instruction_chat.format(template, query)
    else:
        conv = "Historical dialogue：\nCurrent Issues：" + query

    intention_pt = llm_qwen32b(conv, system_prompt)
    try:
        intention = eval(intention_pt)['intention']
    except Exception as e:
        intention = intention_pt
    return intention


def call_memory_retrieval(query, memory_json_path, memory_faiss_path):
    def truncate_strings(string_list, max_length=1024):
        return [s[:max_length] for s in string_list]

    def trion20_onnx_embedding_clip(querys):
        """
        a internal service that convert query into embedding
        :param querys: input query
        :return: embedding of the query
        """
        return None

    # read JSON file
    def load_json(file_path):
        with open(file_path, "r", encoding="utf-8") as f:
            data = json.load(f)
        return data

    topn = 3  # find the top-n neighbors
    similarity_threshold = 0.8  # similarity threshold
    q_texts = [query]
    # use service to get embedding of the query
    input_embeddings = trion20_onnx_embedding_clip(q_texts).astype('float32')
    # load index from faiss
    loaded_index = faiss.read_index(memory_faiss_path)
    scores, indices = loaded_index.search(input_embeddings, topn)
    filtered_indices = [idx for idx, score in zip(indices[0], scores[0]) if score >= similarity_threshold]
    # load memory from json
    loaded_json = load_json(memory_json_path)
    if len(filtered_indices) > 0:
        # recalled_memories = np.array(loaded_json)[filtered_indices].tolist()
        recalled_memories = []
        for idx in filtered_indices:
            entry = loaded_json[idx].copy()
            if isinstance(idx, np.int64):
                idx = int(idx)
            entry['Index'] = idx
            recalled_memories.append(entry)
        return recalled_memories
    else:
        print('There is no recalled memory.')
        return []

