import os
import json
import requests
from io import BytesIO
import time
import PyPDF2
from claude_api import call_bedrock

def load_json(file_path):
    with open(file_path, "r") as f:
        return json.load(f)

def save_json(data, file_path):
    with open(file_path, "w") as f:
        json.dump(data, f, indent=4)

def process_json(json_string):
    if '```json' in json_string:
        json_string = json_string.split("```json")[1].split("```")[0]
    if json_string.startswith('"') and json_string.endswith('"'):
        json_string = json_string[1:-1]
    try:
        json_string = json.loads(json_string)
    except Exception as e:
        print(e)
    return json_string

def read_pdf_content(file_path):
    """Read content from a PDF file."""
    try:
        if file_path.startswith("http://") or file_path.startswith("https://"):
            # Download the file content from the URL
            response = requests.get(file_path)
            pdf_file = BytesIO(response.content)
        else:
            # Open local file
            pdf_file = open(file_path, "rb")
        
        # Create PDF reader object
        pdf_reader = PyPDF2.PdfReader(pdf_file)
        
        # Extract text from all pages
        text_content = ""
        for page in pdf_reader.pages:
            text_content += page.extract_text() + "\n"
        
        if not isinstance(pdf_file, BytesIO):
            pdf_file.close()
            
        return text_content
    except Exception as e:
        print(f"Error reading PDF {file_path}: {e}")
        return None

def identify_opinions(task_description, paper_content):
    messages = [
        {"role": "system", "content": "You are a helpful AI assistant that analyzes papers to identify stakeholders and their perspectives."},
        {"role": "user", "content": f"""You need to identify or construct a diverse and comprehensive set of stakeholders, their characteristics, and their perspectives or opinions for the following evaluation task:
***{task_description}***

Here is the paper content to analyze:
{paper_content}

**Guidelines**
- For this given paper, read one paragraph at a time. Ignore the related work section and references list.
Step 1 - Identify *ALL* mentioned name entities, excluding the authors and their institutions, as well as non-human entities.
Step 2 - For each name entity (i.e., stakeholder) you identified, generate the descriptive characteristics for this stakeholder. Then extract their perspectives or opinions that are **relevant to the aforementioned evaluation task**. Each entry should be directly derived from the texts with supporting evidence. 

** Important Reminders**
- If in the provided paper, no relevant information is mentioned about the evaluation task, output nothing.
- In generation, prioritize capturing a wide range of stakeholders and their perspectives, including those that might emerge from different roles, backgrounds, and needs.
- The stakeholder's perspectives or opinions should be relevant to the aforementioned evaluation task.
- The final generated stakeholder entry should each clearly include:
     1. The stakeholder name (e.g., role or representative group),
     2. The stakeholder's characteristics,
     3. The stakeholder's perspectives or opinions regarding the aforementioned evaluation task,
     4. The supporting evidence from the provided papers.

**Output Format**
- If the provided paper contains relevant information about the evaluation task, present the output as a structured JSON dict, with each item formatted as an object containing the following fields:
```json
{{
    "stakeholder name": {{
        "characteristics": "use one sentence to describe the stakeholder's characteristics",
        "perspectives": [
            {{
                "perspective": "use a sentence to describe the stakeholder's perspectives or opinions regarding the aforementioned evaluation task",
                "evidence": "supporting evidence from the provided paper"
            }},
            {{
                "perspective": "use a sentence to describe the stakeholder's perspectives or opinions regarding the aforementioned evaluation task",
                "evidence": "supporting evidence from the provided paper"
            }},
            ...
        ]
    }},
  "stakeholder name": {{
        "characteristics": "use one sentence to describe the stakeholder's characteristics",
        "perspectives": [
            {{
                "perspective": "use a sentence to describe the stakeholder's perspectives or opinions regarding the aforementioned evaluation task",
                "evidence": "supporting evidence from the provided paper"
            }},
            {{
                "perspective": "use a sentence to describe the stakeholder's perspectives or opinions regarding the aforementioned evaluation task",
                "evidence": "supporting evidence from the provided paper"
            }},
            ...
        ]
    }},
  ...
}}
```

- Otherwise, output an empty list:
```json
[]
```"""}
    ]
    response = call_bedrock(
        messages=messages,
        model_id="your model id"
    )
    return response['output']['message']['content'][0]['text']

def merge_same_opinions(task_description, all_identified_opinions):
    messages = [
        {"role": "system", "content": "You are a helpful AI assistant that merges and consolidates stakeholder perspectives."},
        {"role": "user", "content": f"""You need to merge the same stakeholder perspectives for the following evaluation task:
***{task_description}***

**Stakeholder Opinion List**
{all_identified_opinions}

**Guidelines**
- Read through the provided stakeholder perspective list. For each stakeholder, read both their listed characteristics and all associated perspectives or opinions.
- If multiple stakeholders belong to the same or closely related category (e.g., similar occupation, background, or role), treat them as the same individual or group and merge all perspective entries accordingly.
- When merging multiple entries for the same stakeholder, combine all their listed characteristics into a single, unified description.
- If the stakeholder has identical perspectives across different entries, merge them into one unified perspective.
- As long as there exists notable differences in the stakeholder's perspectives, keep them as separate perspectives.

**Output Format**
Use a dict to include all stakeholders and their perspectives.
Present the output as ONE structured JSON dict, with each stakeholder item formatted as an object containing the following fields:
```json
{{
    "stakeholder name": {{
        "characteristics": "Description of the stakeholder's characteristics",
        "perspectives": [
            {{
                "perspective": "...",
                "evidence": "..."
            }},
            {{
                "perspective": "...",
                "evidence": "..."
            }},
            ...
        ]
    }},
    "stakeholder name": {{
        "characteristics": "Description of the stakeholder's characteristics",
        "perspectives": [
            {{
                "perspective": "...",
                "evidence": "..."
            }},
            {{
                "perspective": "...",
                "evidence": "..."
            }},
            ...
        ]
    }},
    ...
}}  
```"""}
    ]
    response = call_bedrock(
        messages=messages,
        model_id="your model id"
    )
    return response['output']['message']['content'][0]['text']

def create_persona_from_opinions(task_description, opinions):
    messages = [
        {"role": "system", "content": "You are a helpful AI assistant that creates detailed personas based on stakeholder information."},
        {"role": "user", "content": f"""You need to create stakeholder personas for the following evaluation task:
***{task_description}***

IMPORTANT: 
- You must immediately generate the complete JSON output following the structure below.
- Do not ask any questions, do not confirm any details, and do not provide any explanation. 
- Begin your response directly with the JSON object. 
- Do not use any natural language before or after the output.

**Guidelines**
- For EACH mentioned perspective of the stakeholder, generate a distinct persona that embodies the corresponding perspective.
- For EACH generated persona, you must include the following attributes following these steps:
1. Based on the name and characteristic of this stakeholder type, as well as the corresponding perspective and evidence, generate the persona's demographic information. The generated demographic information should include name, age, education, career, and personality. The generated demographic information should be diverse and realistic.
2. Based on the exact perspective, rephrase the perspective to be suitable for this persona.
3. Based on the stakeholder type and the generated demographic information, generate a specialty for this persona. The specialty could be this persona's skill, expertise, or proficiency in real-life related to the aforementioned task.
4. Based on the stakeholder type and the generated demographic information, generate the persona's psychological traits, such as the characteristics related to personality, emotions, interests, and cognitive tendencies.
5. Based on the stakeholder type and the generated demographic information, generate the social relationships for this persona. The social relationships could be the nature and dynamics of interactions with others, including roles, connections, and communication styles, such as parenting styles, interactions with players. The generated relationship must be within the provided stakeholder types.

**Important Reminders**:
- The generated personas should be diverse, realistic, and grounded in the provided characteristics, perspectives and evidence.
- *Do not omit any perspective*. You must create a corresponding persona for each perspective.
- You should not merge the perspectives of different stakeholders into one persona. For each stakeholder, the total number of personas should be equal to the total number of perspectives. For instance, if one stakeholder has six perspectives, you should create six personas under this stakeholder.

**Stakeholder Perspective List**
{opinions}

IMPORTANT: You must process **ALL** stakeholders in the list, not just the first few.

**Output Format**
Directly output a structured JSON list including personas for *ALL* stakeholders in the provided list. Each key corresponds to a stakeholder name (as provided), and the value is a list of persona objects.
```json
{{
  "Stakeholder Name (use the exact name from the provided stakeholder information list)": [
    {{
      "Name": "Full name of the persona",
      "Demographic Information": "One to two sentences describing the persona's demographic profile.",
      "Perspective": "One to two sentences outlining the persona's perspective.",
      "Specialty": "One to two sentences describing the persona's skill, expertise, or proficiency in certain fields.",
      "Psychological Traits": "One to two sentences describing the persona's characteristics related to personality, emotions, interests, and cognitive tendencies.",
      "Social Relationships": "One to two sentences describing the persona's connection or interaction with others (the related people should be within the provided stakeholder types)."
    }},
    {{
      "Name": "Another persona name",
      ...
    }},
    {{
      "Name": "Another persona name",
      ...
    }},
    ... (Do not omit any persona. The number of personas should be equal to the total number of perspectives)
  ],
  "Stakeholder Name (use the exact name from the provided stakeholder information list)": [
    {{
      "Name": "Full name of the persona",
      "Demographic Information": "One to two sentences describing the persona's demographic profile.",
      "Perspective": "One to two sentences outlining the persona's perspective.",
      "Specialty": "One to two sentences describing the persona's skill, expertise, or proficiency in certain fields.",
      "Psychological Traits": "One to two sentences describing the persona's characteristics related to personality, emotions, interests, and cognitive tendencies.",
      "Social Relationships": "One to two sentences describing the persona's connection or interaction with others (the related people should be within the provided stakeholder types)."
    }},
    {{
      "Name": "Another persona name",
      ...
    }},
    {{
      "Name": "Another persona name",
      ...
    }},
    ... (Do not omit any persona. The number of personas should be equal to the total number of perspectives)
  ],
  "Stakeholder Name": [
    ... (Do not omit any persona. The number of personas should be equal to the total number of perspectives)
  ],
  ... (Include all stakeholders personas)
}}
```

Final Notes:
- Do not skip any stakeholder group.
- Every stakeholder must appear as a top-level key.
- Each persona in the list must match exactly one unique perspective.
- You must directly output a complete JSON dictionary with ALL stakeholder personas included. No question, placeholder or “more would follow” comments allowed.
"""}
    ]
    response = call_bedrock(
        messages=messages,
        model_id="your model id"
    )
    return response['output']['message']['content'][0]['text']

def create_personas_from_paper(task_description, document_path):
    # Create directory if not exists
    os.makedirs("./ClaudeOpinions2Personas/IdentifiedOpinions", exist_ok=True)
    
    all_identified_opinions = {}
    for file in os.listdir(document_path):
        if file.endswith(".pdf"):
            paper_name = file.split(".")[0]
            
            # Read and process PDF content
            if not os.path.exists(f"./ClaudeOpinions2Personas/IdentifiedOpinions/{paper_name}.json"):
                paper_content = read_pdf_content(f"{document_path}/{file}")
                if paper_content:
                    identified_opinions = identify_opinions(task_description, paper_content)
                    identified_opinions = process_json(identified_opinions)
                    for stakeholder_name, stakeholder_info in identified_opinions.items():
                        if stakeholder_name not in all_identified_opinions:
                            all_identified_opinions[stakeholder_name] = stakeholder_info
                        else:
                            all_identified_opinions[stakeholder_name]["perspectives"] += stakeholder_info["perspectives"]
                            all_identified_opinions[stakeholder_name]["characteristics"] += stakeholder_info["characteristics"]
                    save_json(identified_opinions, f"./ClaudeOpinions2Personas/IdentifiedOpinions/{paper_name}.json")
            else:
                identified_opinions = load_json(f"./ClaudeOpinions2Personas/IdentifiedOpinions/{paper_name}.json")
                for stakeholder_name, stakeholder_info in identified_opinions.items():
                    if stakeholder_name not in all_identified_opinions:
                        all_identified_opinions[stakeholder_name] = stakeholder_info
                    else:
                        all_identified_opinions[stakeholder_name]["perspectives"] += stakeholder_info["perspectives"]
                        all_identified_opinions[stakeholder_name]["characteristics"] += stakeholder_info["characteristics"]
    
    all_identified_opinions = json.dumps(all_identified_opinions, indent=4)
    print(all_identified_opinions)
    
    # Merge same opinions
    if not os.path.exists(f"./ClaudeOpinions2Personas/MergedIdentifiedOpinions.json"):
        merged_identified_opinions = merge_same_opinions(task_description, all_identified_opinions)
        print(merged_identified_opinions)
        merged_identified_opinions = process_json(merged_identified_opinions)
        save_json(merged_identified_opinions, f"./ClaudeOpinions2Personas/MergedIdentifiedOpinions.json")
    else:
        merged_identified_opinions = load_json(f"./ClaudeOpinions2Personas/MergedIdentifiedOpinions.json")
    
    # Create personas
    if not os.path.exists(f"./ClaudeOpinions2Personas/CreatedPersonas.json"):
        personas = create_persona_from_opinions(task_description, merged_identified_opinions)
        personas = process_json(personas)
        save_json(personas, f"./ClaudeOpinions2Personas/CreatedPersonas.json")
    else:
        personas = load_json(f"./ClaudeOpinions2Personas/CreatedPersonas.json")
    return personas

def main():
    task_description = "Evaluate the quality of AI-generated question-answer pairs from the storybook content. These AI-generated question-answer pairs are designed for the interactive storybook reading activity between parents and children aged 3 to 6, and should be grammatically correct and fluent in English. Parents expect to ask questions that are grounded in the storybook content, but introduce real-world common knowledge beyond the story content."
    document_path = "./ChildEduPapers"
    # for file in os.listdir(document_path):
    #     if file.endswith(".pdf"):
    #         paper_name = file.split(".")[0]
    #         paper_content = read_pdf_content(f"{document_path}/{file}")
    #         if paper_content:
    #             identified_opinions = identify_opinions(task_description, paper_content)
    #             identified_opinions = process_json(identified_opinions)
    #             save_json(identified_opinions, f"./ClaudeOpinions2Personas/IdentifiedOpinions/{paper_name}.json")
    #             print(f"\nOpinions from {paper_name}:")
    #             print(identified_opinions)
    personas = create_personas_from_paper(task_description, document_path)
    print(personas)

if __name__ == "__main__":
    main()
