import os
import json
from langchain_core.callbacks import file
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from pydantic import BaseModel,Field
from typing import Optional, List, Union, Dict, Any
import json
import os
from langchain_core.output_parsers import PydanticOutputParser
import tqdm
import xml.etree.ElementTree as ET
import re
import os


def extract_xml_help(xml_path):
    try:
        if not os.path.exists(xml_path):
            return ''
        with open(xml_path, 'r', encoding='utf-8') as f:
            xml_content = f.read()
        xml_content = re.sub(r'<!\[CDATA\[', '', xml_content)
        xml_content = re.sub(r'\]\]>', '', xml_content)
        match = re.search(r'<help>(.*?)</help>', xml_content, re.DOTALL)
        if match:
            return match.group(1).strip()
        else:
            return ''
    except Exception as e:
        return ''

def gain_filedata(file_os_path):
    description = ""
    filedata_list = os.listdir(file_os_path)
    for filedata in filedata_list:
        if(filedata.endswith(".yml")):
            with open(os.path.join(file_os_path,filedata), "r") as f:
                data = f.read()
                description += data
        if(filedata.endswith(".xml")):
            data = extract_xml_help(os.path.join(file_os_path,filedata))
            description += data
        if(filedata.endswith(".md")):
            with open(os.path.join(file_os_path,filedata), "r") as f:
                data = f.read()
                description += data[:200]
    return description

    

class tool_description(BaseModel):
    description1: str = Field(description="Main function / core capability of the tool")
    description2: str = Field(description="Typical applications / use cases of the tool")
    description3: str = Field(description="Advantages, performance, or limitations of the tool")
    description4: str = Field(description="Target users (e.g., researchers, clinicians, bioinformaticians) of the tool and the reason")
    description5: str = Field(description="Integration with workflows or other tools")


prompt = '''
You are a bioinformatics expert.  
I will provide you with a description of a bioinformatics tool:  

{tool_description}  

Your task is to generate **5 short alternative descriptions** of this tool, each from a **different perspective**.  
- Each description should be **1–2 sentences long**.  
- Focus on distinct aspects, such as:  
  1. Main function / core capability  
  2. Typical applications / use cases  
  3. Advantages, performance, or limitations  
  4. Target users (e.g., researchers, clinicians, bioinformaticians) and the reason why they use the tool  
  5. Integration with workflows or other tools
- Avoid repeating the same wording across descriptions.  
- Keep the descriptions **concise, clear, and non-overlapping**.  

Output format:  
{format_instructions}

'''


class ToolSummarize:
    def __init__(self, temperature: float = 0.5):
        self.parser = PydanticOutputParser(pydantic_object=tool_description)
        self.prompt = ChatPromptTemplate.from_messages([
            ("system", prompt),
        ])
        self.llm = ChatOpenAI(
            base_url="",
            api_key="",
            model="",
            temperature=temperature
        )

    def summarize(self,tooldescription: str):
        chain = self.prompt | self.llm | self.parser
        try:
            result = chain.invoke({"tool_description": tooldescription, "format_instructions": self.parser.get_format_instructions()})
            return result
        except Exception as e:
            print(e)
            return None


filepath_list = [
]

f2write = open("./WorkflowAgent/embedding_test/tool_desdata/tool_des.jsonl", "w")

for filepath in filepath_list:
    filename_list = os.listdir(filepath)
    for filename in tqdm.tqdm(filename_list):
        file_os_path = os.path.join(filepath,filename)
        description = gain_filedata(file_os_path)
        tool_summarize = ToolSummarize()
        result = tool_summarize.summarize(description)
        if(result):
            data = {}
            data[file_os_path] = result.dict()
            f2write.write(json.dumps(data) + "\n")
            f2write.flush()


        


        
              
                

        
            
