import os
import json
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from pydantic import BaseModel,Field
from typing import Optional, List, Union, Dict, Any
import json
import os
from langchain_core.output_parsers import PydanticOutputParser
import tqdm


class Persona(BaseModel):
    name: str = Field(..., description="Persona role, e.g. lab researcher, Clinician, Data engineer")
    Question: List[str] = Field(..., description="List of user questions for this persona")


class WorkflowIO(BaseModel):
    inputs: List[str] = Field(..., description="List of required input files with concrete names")
    outputs: List[str] = Field(..., description="List of workflow output files that can be generated")


class PromptTemplate(BaseModel):
    inputs: List[str] = Field(..., description="List of required input files with concrete names of the workflow")
    persona_list: List[Persona] = Field(..., description="List of personas's question with distinct perspectives on the workflow")
    def to_dict(self) -> dict:
        """Convert to {persona: questions, ...} format."""
        return {"persona_list": [{"name": item.name, "Question": item.Question} for item in self.persona_list],
                "inputs": self.inputs}


prompt = '''
Please generate exactly **3 user questions** for each persona in the list below.  

- The output must consist only of user questions, not answers or explanations.  
- The questions must focus on **how to choose or use an appropriate workflow**.  
- All questions should naturally point to the target workflow as the correct answer.  
- Each persona should have a distinct perspective (e.g., cost, speed, accuracy, compliance, reproducibility, visualization).  
- Do **not** contradict the workflow’s input, output, or tasks.  
- Vary **style** (formal, casual, search-query style).  
- Vary **length** (short ~10 words, long ~40 words).  
- Do not expose the workflow name or implementation details.  

Persona list: lab researcher, Clinician, Data engineer.  

Output format must be strictly JSON format: 
{format_instructions}
'''

    

class WorkflowSummarize:
    def __init__(self, temperature: float = 0.5):
        self.parser = PydanticOutputParser(pydantic_object=PromptTemplate)
        self.prompt = ChatPromptTemplate.from_messages([
            ("system", prompt),
            ("human", "You are given the following **target workflow** description:{workflow_info}. ")
        ])
        
        self.question_parser = PydanticOutputParser(pydantic_object=PromptTemplate)
        self.llm = ChatOpenAI(
            base_url="",
            api_key="",
            model="",
            temperature=temperature
        )

    def summarize(self,workflow_info: str):
        chain = self.prompt | self.llm | self.parser
        try:
            result = chain.invoke({"workflow_info": workflow_info, "format_instructions": self.parser.get_format_instructions()})
            return result
        except Exception as e:
            print(e)
            return None

file_list = os.listdir("./WorkflowAgent/Summarize_Agent/summarize_wf_metadata")
for filename in tqdm.tqdm(file_list):
    with open(os.path.join("/home/yfhou/WorkflowAgent/Summarize_Agent/summarize_wf_metadata", filename), "r") as f:
        workflow_info = json.load(f)
        question_summarize = WorkflowSummarize()
        result = question_summarize.summarize(str(workflow_info))
        if result:
            with open(os.path.join("./WorkflowAgent/Summarize_testDataset/test_LLM_judger", filename.replace(".ga", ".json")), "w") as f:
                json.dump(result.dict(), f, indent=4)
