from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from pydantic import BaseModel,Field
from typing import Optional, List, Union, Dict, Any
import json
import os
import time
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from langchain_core.output_parsers import PydanticOutputParser
import re

class SolutionTask(BaseModel):
    toolname: str = Field(description="the name of the tool")
    function: str = Field(description="What problem the tool solves in the workflow")
    description: str = Field(description="the detailed description of the tool")
    inputformat: str = Field(description="the input data format of the tool")
    outputformat: str = Field(description="the output data format of the tool")

prompt = '''
You are a bioinformatics expert.
Your task is to classify the NEXT bioinformatics tool (to be used in a workflow) based on the user's requirement, available input file(s), expected final output file (including its format and content), already used tools, and current available output file(s).

The user’s requirement is: {user_requirement}.
The user's input file(s) are: {input_file_info}.
The expected final output file (including its format and content) is: {output_file_requirement}.
The workflow has already used the following tools: {used_tools_list}.
Currently, the workflow has produced the following output file(s): {output_file_info}.

Based on this context, you must propose and describe exactly ONE next tool, unless the workflow has already fully satisfied the user's final output requirement. 
The tool you propose must be consistent with the provided context and logically follow the workflow towards producing the required output file format/content.  

When describing the tool, include:
- The specific problem or gap it solves in the workflow  
- A detailed explanation, such as the key algorithm(s) or method(s) it uses, typical bioinformatics applications, strengths, and possible limitations  
- Its input and output data formats, with explicit mapping to the user's output requirement  

Ensure your description is rich and comprehensive enough to support retrieval-augmented generation.

Output JSON format:
{format_instructions}
'''



class SolutionAgent:
    def __init__(self, temperature: float = 0.5):
        self.parser = PydanticOutputParser(pydantic_object=SolutionTask)
        self.prompt = ChatPromptTemplate.from_messages([
            ("system", prompt)
        ])

        from langchain_openai import ChatOpenAI

        self.llm = ChatOpenAI(
            base_url="",
            api_key="",
            model="",
            temperature=temperature
        )

    def solution(self,user_requirement,input_file_info,output_file_requirement,used_tools_list,output_file_info):
        chain = self.prompt | self.llm | self.parser
        try:
            result = chain.invoke({"user_requirement": user_requirement, "input_file_info":input_file_info, "output_file_requirement":output_file_requirement, "used_tools_list":used_tools_list, "output_file_info":output_file_info, "format_instructions": self.parser.get_format_instructions()})
            return result
        except Exception as e:
            print(e)
            return None

def workflow_solution(user_requirement,input_file_info,output_file_requirement,used_tools_list,output_file_info):
    solutionagent = SolutionAgent()
    result = solutionagent.solution(user_requirement,input_file_info,output_file_requirement,used_tools_list,output_file_info)
    if(result):
        return result.model_dump()
    else:
        return None

    