import json
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from collections import Counter
import openai
import os
from openai import OpenAI
import re

#return the tool lists in the form"Tool: tool description"
def getToolCombinations(file_name, tool_descriptions):
    with open(file_name, 'r') as file:
        data = json.load(file)

    # Create a set to store unique tool combinations
    tool_combinations = set()

    # Iterate through each item in the JSON data
    for item in data:
        # Sort the tool list to ensure order does not matter
        sorted_tools = tuple(sorted(item['tool']))
        # Add the sorted tuple to the set
        tool_combinations.add(sorted_tools)

    # Convert the set back to a list of lists
    unique_combinations = [list(combination) for combination in tool_combinations]

    # Print or return the unique combinations
    sentences = []
    for combination in tool_combinations:
        sentence = ", ".join([f"{tool}: {tool_descriptions[tool]}" for tool in combination])
        sentences.append(sentence)
    return sentences

def testAndtrain(groundTruth_file_name):
    with open(groundTruth_file_name, 'r') as f:
        groundTruth = json.load(f)
    queries_test = []
    tools_test = []

    # Iterate through each item in the JSON data
    test_size = int(len(groundTruth) / 5)
    for item in groundTruth[:test_size]:
        queries_test.append(item['query'])
        tools_test.append(item['tool'])

    queries_train = []
    tools_train = []
    query_tool_dict_train = {}

    # Process each item in the JSON data
    for item in groundTruth[test_size:]:
        # Extract the content after "query" and remove the surrounding quotes
        query = item["query"].strip(':')
        queries_train.append(query)

        # Extract the content after "tool" and add the list to tools
        query_tool_list = item["tool"]
        tools_train.append(query_tool_list)

        # Populate the dictionary
        query_tool_dict_train[query] = query_tool_list
    return queries_test, tools_test, queries_train, tools_train, query_tool_dict_train

#return two items, the first is a tool list, the format is "tool:description", the second is a tool dictionary
def toolDescription(toolDescription_file_name):
    with open(toolDescription_file_name, 'r') as f:
        data_tool = json.load(f)
    # Initialize lists to hold queries and tools

    tool_list = []
    tool_dict = {}

    # Iterate through each item in the JSON data
    for key, value in data_tool.items():
        # Combine the key and value into a single string
        combined_string = f"{key}: {value}"
        tool_list.append(combined_string)
        tool_dict[key] = value
    return tool_list, tool_dict

def unique_tool(queries, query_tool_dict_train):# Initialize a set to store unique items
    unique_tools = set()

    # Iterate over the queries and add corresponding tools to the set
    for query in queries:
        unique_tools.update(query_tool_dict_train[query])

    # Convert the set back to a list (if required)
    unique_tools_list = list(unique_tools)
    return unique_tools_list


def find_toolCandidate(queriesTrain, tool_list, queriesTest, query_tool_dict_train):
    HuggingFace_embedding = HuggingFaceEmbeddings()  # 向量长度--768
    embedding_model = HuggingFace_embedding
    vectordb_QueryTrain = FAISS.from_texts(texts=queriesTrain, embedding=embedding_model)
    vectordb_Tool = FAISS.from_texts(texts=tool_list, embedding=embedding_model)

    potential_tool_list = []
    for query in queriesTest:
        simi_search_tool = vectordb_Tool.similarity_search(query, 7)
        tool_list1 = [docu.page_content for docu in simi_search_tool]
        top_tool = tool_list1[0]
        extracted_toolName1 = [item.split(":")[0] for item in tool_list1]
#        print(extracted_toolName1)

        simi_search_Query = vectordb_QueryTrain.similarity_search(query, 7)
        Query_list = [docu.page_content for docu in simi_search_Query]
        unique = unique_tool(Query_list, query_tool_dict_train)
#        print(unique)

        simi_search_ToolTool = vectordb_Tool.similarity_search(top_tool, 7)
        toolTool_list = [docu.page_content for docu in simi_search_ToolTool]
        extracted_toolName2 = [item.split(":")[0] for item in toolTool_list]
#       print(extracted_toolName2)

        combined_list = extracted_toolName1 + unique + extracted_toolName2

# Count the frequency of each element
        element_counts = Counter(combined_list)

# Sort elements by their frequency (from high to low)
        sorted_elements = [item for item, count in element_counts.most_common()]

# Print the result
        potential_tool_list.append(sorted_elements)
    return potential_tool_list

"""
def answer_AI(question, tool):
    os.environ["OPENAI_API_KEY"] = "sk-proj-yqfdABXy181oLK5yF1BZT3BlbkFJm9FMBg1aWZ9RkzqRfXNJ"
    openai.api_key = os.environ["OPENAI_API_KEY"]
    client = OpenAI()
    completion = client.chat.completions.create(
        model="gpt-4o",
        # fmt: off
        messages=[
            {"role": "user", "content": f"""Give a question and a tool in the format Question -> …. Tool -> …. First, you need to decide how many sub-question it contains. Then, you need to decide how this tool can help finish this question. Your answer should be one of the following three patterns. First pattern: you think the given tool can solve this problem once and for all. You answer should be started with yes and give a brief reason about how it can solve it. Second pattern: You think the given tool is not useful for solving the problem at all, i.e. it does not solve any small aspect of the problem. Your answer should be started with No, and give a brief reason about why it doesn't help solve the problem; Third pattern: You think this tool can only solve part of the problem, but it cannot completely solve the problem, and other tools are needed. Your answer should be started with piece and you should give a brief illustration about which part of the problem does this tool solve?.Next, since the original problem has been partially solved, just write down the remaining problems that need to be solved, and present them in the form of questions, not in the form of narratives (The answer of this part start with word Remain:). Finally, Regarding the unresolved issues, write your answer about do you need to use the results of the current tool, or can you solve them independently with other tools (That is, when other tools are used later to solve the remaining problems, do we need to use the execution results of the current tool)? If you think need to use them, then answer S and give a brief reason; if you don't need to use them, then answer I and give a brief reason. Remember, your job is to determine whether the current tool can solve the problem directly raised. You don’t need to do too much in-depth thinking or divergent thinking. Just look at the most superficial problems. Here is some example about different patterns(The answer of this part start with Seq:):

        (The first pattern)
        Question->Can you provide me with SEO Insights? Tool->SEOTool: Tool that provides users with SEO analytics content.
        Two subquestion.
        Yes. the SEOTool can solve this problem once and for all. It provides users with SEO analytics content, which directly answers the question of providing SEO insights.

        (The second pattern)
        Question->I need to know the time in Sydney, Australia. Can you help? Tool-> Discount: Discover discounts and coupon codes to save money on products.
        One question.
        No, the Discount tool does not help solve the problem because it is designed to discover discounts and coupon codes for products, which is unrelated to checking the time in Sydney, Australia.

        (The third pattern)
        Question->Can I find academic research papers on this topic? Tool-> ResearchHelper: Tool that offers additional functions beyond searching academic papers, such as generating mind maps, answering user questions and storing them in specific formats.
        Two subquestions.
        Piece. the ResearchHelper tool can assist in finding academic research papers by searching databases and repositories; however, it cannot guarantee that the search will yield all relevant or most recent papers on the topic. Moreover, additional tools may be needed for evaluating the relevance and credibility of the papers found. 
        Remain: How to determine the relevance of the papers found and how to ensure that the papers are the most recent and credible? (For this part, it doesn't need to be two questions necessary, the question can be 1,2,3...) 
        Seq: S. The results of the ResearchHelper tool are needed to proceed with relevance and credibility checks since these checks depend on the papers initially found.

        Query: Question-> {question}. Can you provide that information? Tool-> {tool}"""}
        ]

    )
    print(completion.choices[0].message.content)
    return completion.choices[0].message.content




def answer_AI_bundle(question, tool):
    tool_str = str(tool)
    os.environ["OPENAI_API_KEY"] = "sk-proj-yqfdABXy181oLK5yF1BZT3BlbkFJm9FMBg1aWZ9RkzqRfXNJ"
    openai.api_key = os.environ["OPENAI_API_KEY"]
    client = OpenAI()
    completion = client.chat.completions.create(
        model="gpt-4o",
        # fmt: off
        messages=[
            {"role": "user", "content": f"""Give a question and a list of tools in the format Question: …, Tool: …, you need to decide how these tools can help finish this question. Your answer should be one of the following three patterns. First pattern: you think the givens tool can solve this problem once and for all. You answer should be started with yes and give a brief reason about how it can solve it. Second pattern: You think the given tools are not useful for solving the problem at all, i.e. it does not solve any small aspect of the problem. Your answer should be started with No, and give a brief reason about why it doesn't help solve the problem; Third pattern: You think these tools can only solve part of the problem, but it cannot completely solve the problem, and other tools are needed. In this pattern, there will be two situations: First situation, all the given tools can be helpful to help solve some parts problem, can contribute in some way, just can't be completely solved. Second situation, for the give tools, only some of those tools can be useful, the rest is useless. For the first siutation in third pattern, your answer should be started with Piece1; else, you answer should be started with Piece2. Then you should give a brief illustration about which part of the problem does these tools solve and which tools are used?. Also, since the original problem has been partially solved, just write down the remaining problems that need to be solved, and present them in the form of questions, not in the form of narratives. Here is some example about different patterns:

(The first pattern)
Question: I want to know the latest news about Tesla and how it has impacted the stock market. Tool: [FinanceTool: Stay informed with the latest financial updates, real-time insights, and analysis on a wide range of options, stocks, cryptocurrencies, and more; NewsTool": "Stay connected to global events with our up-to-date news around the world.]

Yes. The given tools can solve this problem once and for all. The NewsTool can provide you with the latest news about Tesla, while the FinanceTool can give insights into how that news has impacted Tesla's stock in the market. Together, they cover both aspects of the question comprehensively.

(The second pattern)
Question: I want to know the latest news about Tesla and how it has impacted the stock market. Tool: [GiftTool: Provide suggestions for gift selection.; StrologyTool: Povides strology services for you.]

No. The given tools are not useful for solving the problem. GiftTool is meant for gift selection, and StrologyTool offers astrology services, neither of which helps in finding the latest news about Tesla or understanding its impact on the stock market.

(The third pattern)
Question: I want to know the latest news about Tesla and how it has impacted the stock market. Tool: [FinanceTool: Stay informed with the latest financial updates, real-time insights, and analysis on a wide range of options, stocks, cryptocurrencies, and more; StrologyTool: Povides strology services for you.]

Piece1. The FinanceTool can help solve part of the problem by providing insights into how the latest news about Tesla has impacted the stock market. However, the StrologyTool is not relevant and does not contribute to solving the problem.
Remaining problems:
•	What is the latest news about Tesla?


        Question: {question}. Tool: {tool_str}"""}
        ]

    )
    return completion.choices[0].message.content


def check_pattern(input_string):
    if "Yes" in input_string:
        return ["Yes"]
    elif "No" in input_string:
        return ["No"]
    elif "Piece1" in input_string:
        return ["Piece1"]
    elif "Piece2" in input_string:
        return ["Piece2"]
    return []


def check_pattern(input_string):
    if "Yes" in input_string:
        return ["Yes"]
    elif "No" in input_string:
        return ["No"]
    elif "Piece" in input_string:
        if "I" in input_string:
            return ["Piece", "I"]
        elif "S" in input_string:
            return ["Piece", "S"]
    return []

def extract_nextQuestion(input_string):
    # Use regex to extract the sentence between "Remain:" and "Seq:"
    match = re.search(r'Remain:\s*(.*?)\s*Seq:', input_string, re.DOTALL)
    
    # Check if there is a match
    if match:
        middle_sentence = match.group(1).strip()
        return middle_sentence
    else:
        return "No match found."

def get_recTool(questions, tools, tool_dictionary):
    length = len(questions)
    tool_for_eachQuery = []
    for x in range(0, length):
        temp = []
        flag = "None"
        remain_question = questions[x]
        for y in range(len(tools[x])):
            tool_entire = f"{tools[x][y]}: {tool_dictionary[tools[x][y]]}"
            pattern = answer_AI(remain_question, tool_entire)
            pattern_list = check_pattern(pattern)
            flag = pattern_list[0]
            print(pattern_list)
            if y == len(tools[x]) - 1:
                tool_for_eachQuery.append(temp)
            elif flag == "Yes":
                temp.append(tools[x][y])
                tool_for_eachQuery.append(temp)
                break
            elif flag == "No":
                continue
            elif flag == "Piece":
                temp.append(tools[x][y])
                temp.append(pattern_list[1])
                remain_question = extract_nextQuestion(pattern)
    return tool_for_eachQuery


def extract_toolDescription(tool_list, tool_dictionary):
    entire_toolDes = []
    for x in tool_list:
        temp = f"{x}: {tool_dictionary[x]}"
        entire_toolDes.append(temp)
    return entire_toolDes

def extract_tools(input_string):
    # Use regular expression to find the second position (after "Piece. ")
    match = re.search(r'Piece\.\s([\w, ]+)\.', input_string)
    
    if match:
        # Extract the tools and return them as a list
        tools = match.group(1).split(', ')
        return tools
    else:
        return []

def get_recToolBundle(questions, tools, tool_dictionary):
    length = len(questions)
    tool_for_eachQuery = []
    for x in range(0, length):
        temp = []
        flag = "None"
        query_tool_temp = []
        for y in range(len(tools[x])):
            #query_tool_temp = temp
            temp.append(tools[x][y])
            temp_toolDescription = extract_toolDescription(temp, tool_dictionary)
            
            pattern = answer_AI_bundle(questions[x], temp_toolDescription)
            print(pattern)
            pattern_list = check_pattern(pattern)
            flag = pattern_list[0]
            print(pattern_list)
            if y == len(tools[x]) - 1:
                tool_for_eachQuery.append(temp)
            elif flag == "Yes":
                tool_for_eachQuery.append(temp)
                print(temp)
                break
            elif flag == "No":
                temp.pop()
                print(temp)
                continue
            elif flag == "Piece1":
                print(temp)
                continue
            elif flag == "Piece2":
                temp.pop()
                print(temp)
    return tool_for_eachQuery

"""

def retrieve_toolBundle(query, toolBundle_list):
    HuggingFace_embedding = HuggingFaceEmbeddings()
    embedding_model = HuggingFace_embedding
    vectordb_toolBundle = FAISS.from_texts(texts=toolBundle_list, embedding=embedding_model)
    simi_search_bundle = vectordb_toolBundle.similarity_search(query, 1)
    tool_list3 = [docu.page_content for docu in simi_search_bundle]
    top_tool = tool_list3[0]
    #extracted_toolName3 = [item.split(":")[0] for item in tool_list3]    
    return top_tool

queries_test, tools_test, queries_train, tools_train, query_tool_dict_train = testAndtrain("./multi_tool_query_golden.json")
tool_list, tool_dict = toolDescription("./big_tool_des.json")
queries_test = "Please provide me with the current stock price of Apple and any recent news related to the company."
toolBundleList = getToolCombinations("multi_tool_query_golden.json", tool_dict)
print(retrieve_toolBundle(queries_test, toolBundleList))

#queries_test = queries_test[:10]
#tool_Candidate = find_toolCandidate(queries_train, tool_list, queries_test, query_tool_dict_train)
#print(tool_Candidate)
#toolRec = get_recTool(queries_test, tool_Candidate, tool_dict)
#print(toolRec)

"""
with open('listsAno.txt', 'w') as file:
    # Write the first list to the file
    file.write(str(tools_test[:10]) + '\n')

    # Write the second list to the file
    file.write(str(toolRec) + '\n')
"""
