from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from pydantic import BaseModel,Field
from typing import Optional, List, Union, Dict, Any
import json
import os
import time
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from langchain_core.output_parsers import PydanticOutputParser
import re

class Selected_input_files(BaseModel):
    file_name: str = Field(description="The name of the file actually selected for this step in Available input files.")
    file_format: str = Field(description="The format of the file (e.g., FASTQ, BAM, VCF).")

class SelectTask(BaseModel):
    toolname: str = Field(description="The name of the selected tool.")
    description: str = Field(description="An explanation of the tool’s role in the workflow,including Function,What the tool does and an example.(e.g. STAR:RNA-Seq read alignment,Maps sequenced fragments to the genome,Read aligns to exon1–exon2)")
    used_reference_tool: bool = Field(description="true if the tool was chosen from the reference list, false if it is a new tool.")
    toolid: int = Field(description="The ID of the tool if it comes from the reference list; use -1 if not.")
    selected_input_files: List[Selected_input_files] = Field(description="Only include the minimal subset of input files required for this step.but you must choose at least one file.")
    expected_outputs_info: str = Field(description="The format of the file(s) this tool produces (e.g., BAM, VCF, FASTA).")



prompt = '''
You are a bioinformatics expert.
Your task is to select ONE suitable bioinformatics tool based on the workflow task, already used tools, and the available input files.
You may choose from the reference tools list or propose a different tool.

Context:
- Workflow task: {workflow_task}
- Already used tools in the workflow: {used_tools_list}
- Available input files: {available_input_files}
  (Important: This is a superset of candidate files from previous tools. The current tool should select ONLY a minimal subset needed as its inputs, NOT all files.)
- Tool description: {tool_description}

Reference tools (JSON array of objects):
{reference_tools_list}
# Each object:
# {{
#   "toolid": "int",
#   "toolname": "string",
#   "description": "string"
# }}

Rules:
1. Select exactly ONE tool.
2. You MAY choose a tool outside the reference list if it is more suitable.
3. Input selection: From "Available input files"{available_input_files}, choose ONLY the minimal subset required by the selected tool. Do NOT assume all files are used. But You must choose at least one file.
4. I/O compatibility: The selected tool must accept the chosen files by format/content. If uncertain, use "Unknown".
5. Workflow consistency: The tool should logically follow the already used tools and their outputs.
6. Expected outputs: Provide a clear description of outputs after running this tool within the workflow (field "expected_outputs_info").
7. If any required field cannot be determined, use "Unknown".
8. Output MUST be a single JSON object with EXACTLY the fields defined below (no extra text). It MUST include the field "selected_input_files" listing the chosen files (e.g., file name, format, and source tool).

Output (fill exactly these fields, no others):
{format_instructions}
'''


class SelectAgent:
    def __init__(self, temperature: float = 0.5):
        self.parser = PydanticOutputParser(pydantic_object=SelectTask)
        self.prompt = ChatPromptTemplate.from_messages([
            ("system", prompt)
        ])

        from langchain_openai import ChatOpenAI

        self.llm = ChatOpenAI(
            base_url="",
            api_key="",
            model="",
            temperature=temperature
        )

    def select(self,workflow_task,used_tools_list,available_input_files,tool_description,reference_tools_list):
        chain = self.prompt | self.llm | self.parser
        try:
            result = chain.invoke({"workflow_task": workflow_task, "used_tools_list": used_tools_list, "available_input_files": available_input_files, "tool_description":tool_description, "reference_tools_list":reference_tools_list, "format_instructions": self.parser.get_format_instructions()})
            return result
        except Exception as e:
            print(e)
            return None

def select_tool(workflow_task,used_tools_list,available_input_files,tool_description,reference_tools_list):
    select_agent = SelectAgent()
    result = select_agent.select(workflow_task,used_tools_list,available_input_files,tool_description,reference_tools_list)
    if(result):
        return result.model_dump()
    else:
        return None
