import os
import json
import yaml
import sys
import subprocess
from typing import List
from datetime import datetime
from langchain_core.runnables import RunnableConfig
from langchain_core.messages import HumanMessage
from agents.state import OverallState
from agents.utils import get_user_question
from agents.openai.building_blocks import next_sub_question_writer
from agents.openai.advanced_building_blocks import next_sub_question_writer_with_information_gain
from instructor.openai.generate_instructions import *
from orchestrator.openai.orchestrator import Orchestrator
from json_converter.openai.converter import JSONConverter
from yaml_converter.openai.converter import YAMLConverter
from workflow.utils import load_state_dict_to_overall_state
from memory.memory_manager import MemoryManager

MAX_ROUNDS = 10

class WorkflowManager:
    def __init__(self, 
                first_user_message: str, 
                record_memory: bool = False, 
                base_workspace: str = "workspace", 
                memory_manager: MemoryManager = None,
                pre_loaded_memory_paths: List[str] = None):
        self.state = OverallState(messages=[HumanMessage(content=first_user_message)])
        # Initialize with empty config - will use defaults
        self.config = RunnableConfig()
        self.orchestrator = Orchestrator(self.config)
        self.record_memory = record_memory
        self.base_workspace = base_workspace
        self.memory_manager = memory_manager
        if pre_loaded_memory_paths is not None:
            for memory_path in pre_loaded_memory_paths:
                self.memory_manager.load_workflow_execution_memory(memory_path)
    # def _create_workspace(self, graph_json: str, round_number: int):
    def _create_workspace(self, graph_yaml: str, round_number: int):
        
        # Create a safe directory name from the question
        question_name = get_user_question(self.state['messages'])
        
        # More comprehensive Windows path sanitization
        # Remove invalid characters for Windows file paths and replace with underscores
        invalid_chars = '<>:"/\\|?*'
        safe_question_name = ''.join(c if c not in invalid_chars else '_' for c in question_name)
        
        # Replace spaces with underscores to avoid path issues
        safe_question_name = safe_question_name.replace(' ', '_')
        
        # Remove multiple consecutive underscores
        import re
        safe_question_name = re.sub('_+', '_', safe_question_name)
        
        # Remove leading/trailing underscores
        safe_question_name = safe_question_name.strip('_')
        
        # Limit length to avoid Windows path length issues, cut at underscore boundaries
        max_length = 40  # Conservative length to avoid issues
        if len(safe_question_name) > max_length:
            # Try to cut at an underscore boundary
            truncated = safe_question_name[:max_length]
            last_underscore = truncated.rfind('_')
            if last_underscore > max_length * 0.6:  # Only use boundary if it's not too short
                safe_question_name = truncated[:last_underscore]
            else:
                safe_question_name = truncated
        
        # Final cleanup - remove trailing underscores
        safe_question_name = safe_question_name.strip('_')
        
        # Ensure we have a valid name (no empty strings, no reserved names)
        reserved_names = {'CON', 'PRN', 'AUX', 'NUL', 'COM1', 'COM2', 'COM3', 'COM4', 'COM5', 'COM6', 'COM7', 'COM8', 'COM9', 'LPT1', 'LPT2', 'LPT3', 'LPT4', 'LPT5', 'LPT6', 'LPT7', 'LPT8', 'LPT9'}
        if not safe_question_name or safe_question_name.upper() in reserved_names:
            safe_question_name = 'unknown_question'
        
        # Ensure base workspace directory exists
        base_workspace = self.base_workspace
        os.makedirs(base_workspace, exist_ok=True)
        
        # Create the full workspace path
        question_dir = f"question_{safe_question_name}"
        workspace_path = os.path.join(base_workspace, question_dir, f"round_{round_number}")
        
        # Create directories recursively
        os.makedirs(workspace_path, exist_ok=True)
        print(f"✅ Folder created at: {workspace_path}")

        # save the current state to the workspace as before_state.json
        def serialize_pydantic(obj):
            """Custom serializer for Pydantic models and other objects"""
            if hasattr(obj, 'dict'):  # Pydantic model
                return obj.dict()
            elif hasattr(obj, '__dict__'):  # Regular object
                return obj.__dict__
            elif isinstance(obj, (list, tuple)):
                return [serialize_pydantic(item) for item in obj]
            elif isinstance(obj, dict):
                return {k: serialize_pydantic(v) for k, v in obj.items()}
            else:
                # For primitive types (int, str, bool, None), return as-is
                # Only convert to string as last resort for complex objects
                if isinstance(obj, (int, float, str, bool, type(None))):
                    return obj
                else:
                    return str(obj)
        
        state_dict = dict(self.state)
        with open(os.path.join(workspace_path, "before_state.json"), "w") as f:
            json.dump(state_dict, f, indent=2, default=serialize_pydantic)
        print(f"✅ State saved to: {os.path.join(workspace_path, "before_state.json")}")


        # save the graph json to the workspace as workflow.json
        # graph_dict = json.loads(graph_json) if isinstance(graph_json, str) else graph_json
        # with open(os.path.join(workspace_path, "graph.json"), "w") as f:
        #     json.dump(graph_dict, f, indent=2)
        # print(f"✅ Graph saved to: {os.path.join(workspace_path, "graph.json")}")
        with open(os.path.join(workspace_path, "graph.yaml"), "w") as f:
            f.write(graph_yaml)
        print(f"✅ Graph saved to: {os.path.join(workspace_path, "graph.yaml")}")

        # use the JSONConverter to convert the graph json to a Python file and save it to the workspace as workflow.py
        # converter = JSONConverter()
        # converter.save_to_file(graph_dict, os.path.join(workspace_path, "graph.py"))
        # safe load the graph yaml and save it to the workspace as graph.py
        try:
            graph_dict = yaml.safe_load(graph_yaml)
        except Exception as e:
            print(f"❌ Error loading graph yaml: {e}")
            return None
        converter = YAMLConverter()
        converter.save_to_file(graph_dict, os.path.join(workspace_path, "graph.py"))
        print(f"✅ Workflow Python file saved to: {os.path.join(workspace_path, "graph.py")}")

        # create a running python file for the workflow: load the current state, instantiate the workflow, and run it
        # save it as run.py
        # Get the absolute path to the project root for imports
        project_root = os.path.abspath(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
        
        run_script = f'''import json
import sys
import os

# Add project root to Python path for imports
project_root = r"{project_root}"
if project_root not in sys.path:
    sys.path.insert(0, project_root)

from agents.state import OverallState
from graph import graph
from workflow.utils import load_state_dict_to_overall_state, save_state

def main():
    # Load initial state
    with open("before_state.json", "r") as f:
        initial_state = load_state_dict_to_overall_state(json.load(f))
    # Run the workflow
    print("Starting workflow execution...")
    result = graph.invoke(initial_state)
    
    # Save final state
    save_state(result, "after_state.json")
    print("Workflow execution completed. Results saved to after_state.json")
    
    return result

if __name__ == "__main__":
    main()
'''
        
        with open(os.path.join(workspace_path, "run.py"), "w") as f:
            f.write(run_script)

        print(f"✅ Run script saved to: {os.path.join(workspace_path, "run.py")}")

        # return the workspace path
        return workspace_path

    def _run_one_step(self, round_number: int):
        # call only the next_sub_question_writer block and yield the next sub-question
        print("📝 Generating next sub-question...")
        next_sub_question = next_sub_question_writer_with_information_gain(self.state, self.config)
        self.state.update(next_sub_question)
        
        print(f"🔍 Next sub-question: {self.state.get('current_sub_question', 'No sub-question generated')}")

        # put current search search_results into history_search_results if there's any
        self._move_current_to_history()

        # retrieve the workflow execution memory by the current question and sub-question
        workflow_execution_memory = self.memory_manager.retrieve_workflow_execution_memory_by_embedding(get_user_question(self.state['messages']), self.state.get('current_sub_question', ''))
        print(f"🔍 Retrieved {len(workflow_execution_memory)} workflow execution memory entries")

        # generate the instructions for the next step
        if len(workflow_execution_memory) > 0:
            orchestrator_instructions = generate_orchestrator_instructions(get_user_question(self.state['messages']), self.state.get('current_sub_question', ''), workflow_execution_memory)
            searcher_instructions = generate_search_instructions(get_user_question(self.state['messages']), self.state.get('current_sub_question', ''), workflow_execution_memory)
            browser_instructions = generate_browse_instructions(get_user_question(self.state['messages']), self.state.get('current_sub_question', ''), workflow_execution_memory)
            instructions = {'orchestrator_instructions': orchestrator_instructions, 'searcher_instructions': searcher_instructions, 'browser_instructions': browser_instructions}
            self.state.update({'instruction_state': instructions})

        # call the orchestrator block and yield the workflow json for the next step
        print("🎯 Orchestrating workflow for next step...")
        workflow_yaml = self.orchestrator.orchestrate_loop(self.state)
        print(f"📊 Generated workflow YAML: {workflow_yaml[:200]}...")

        # create a workspace for the next step
        print("📁 Creating workspace for workflow execution...")
        workspace_path = self._create_workspace(workflow_yaml, round_number)
        print(f"✅ Workspace created at: {workspace_path}")

        # run the next step using subprocess only
        print("🚀 Executing workflow via subprocess...")
        try:
            # Get the absolute path to run.py
            run_script_path = os.path.join(workspace_path, "run.py")
            # print(f"🔍 Debug - workspace_path: {workspace_path}")
            # print(f"🔍 Debug - run_script_path: {run_script_path}")
            # print(f"🔍 Debug - file exists: {os.path.exists(run_script_path)}")
            
            # Set up environment with PYTHONPATH
            env = os.environ.copy()
            project_root = os.path.abspath(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
            
            # Add project root to PYTHONPATH
            if 'PYTHONPATH' in env:
                env['PYTHONPATH'] = f"{project_root}{os.pathsep}{env['PYTHONPATH']}"
            else:
                env['PYTHONPATH'] = project_root
            
            # Run the subprocess with proper working directory and environment
            # Since we're setting cwd=workspace_path, we can use just the filename
            result = subprocess.run(
                [sys.executable, "run.py"], 
                stdout=subprocess.PIPE,
                stderr=subprocess.STDOUT,  # Merge stderr with stdout
                text=True, 
                cwd=workspace_path,
                env=env,
                universal_newlines=True,
                bufsize=1  # Line buffered
            )
            
            if result.returncode == 0:
                print("✅ Workflow executed successfully via subprocess")
                print(f"📋 Subprocess output: {result.stdout}")
                
                # Load the result from after_state.json
                after_state_path = os.path.join(workspace_path, "after_state.json")
                if os.path.exists(after_state_path):
                    with open(after_state_path, "r") as f:
                        result_dict = json.load(f)
                    
                    # load result_dict into OverallState correctly
                    result_state = load_state_dict_to_overall_state(result_dict)

                    # record the workflow execution memory if self.record_memory is True
                    if self.record_memory and self.memory_manager is not None:
                        print("📝 Recording workflow execution memory...")
                        # Save the before state before updating self.state
                        before_state = self.state.copy()
                        self.memory_manager.update_workflow_execution_memory(before_state, result_state, workflow_yaml)

                    # update the self.state with the next state
                    print("🔄 Updating internal state...")
                    self.state.update(result_state)
                else:
                    print("⚠️ after_state.json not found, using current state")
                    result_state = self.state
            else:
                print(f"❌ Subprocess execution failed with return code {result.returncode}")
                print(f"❌ Error output: {result.stderr}")
                print(f"📋 Standard output: {result.stdout}")
                return self.state
                    
        except Exception as subprocess_error:
            print(f"❌ Subprocess execution failed: {str(subprocess_error)}")
            return self.state

    def _move_current_to_history(self):
        """Move current search results and found references to history fields before starting a new step."""
        
        # Handle searcher state - move current search_results to history_search_results
        if 'searcher_state' in self.state and self.state['searcher_state'] is not None:
            current_searcher_state = self.state['searcher_state']
            
            # Get current search results
            current_search_results = current_searcher_state.get('search_results', [])
            
            if current_search_results:
                # Get existing history or initialize empty list
                existing_history_search_results = current_searcher_state.get('history_search_results', [])
                
                # Combine current results with history (avoid duplicates by URL)
                history_dict = {result.url: result for result in existing_history_search_results}
                for result in current_search_results:
                    history_dict[result.url] = result
                
                # Update searcher state with history and clear current results
                updated_searcher_state = current_searcher_state.copy()
                updated_searcher_state['history_search_results'] = list(history_dict.values())
                updated_searcher_state['search_results'] = []
                
                self.state['searcher_state'] = updated_searcher_state
                print(f"📚 Moved {len(current_search_results)} search results to history")
        
        # Handle browser state - move current found_references to history_found_references  
        if 'browser_state' in self.state and self.state['browser_state'] is not None:
            current_browser_state = self.state['browser_state']
            
            # Get current found references
            current_found_references = current_browser_state.get('found_references', [])
            
            if current_found_references:
                # Get existing history or initialize empty list
                existing_history_found_references = current_browser_state.get('history_found_references', [])
                
                # Combine current references with history (avoid duplicates by URL)
                history_dict = {ref.url: ref for ref in existing_history_found_references}
                for ref in current_found_references:
                    if ref.url in history_dict:
                        # Merge information lists and deduplicate
                        existing_info = history_dict[ref.url].information_list
                        combined_info = existing_info + ref.information_list
                        unique_info = list(dict.fromkeys(combined_info))  # Deduplicate info items
                        
                        # Create new Reference with merged information
                        from agents.schemas import Reference
                        history_dict[ref.url] = Reference(
                            url=ref.url,
                            information_list=unique_info
                        )
                    else:
                        history_dict[ref.url] = ref
                
                # Update browser state with history and clear current references
                updated_browser_state = current_browser_state.copy()
                updated_browser_state['history_found_references'] = list(history_dict.values())
                updated_browser_state['found_references'] = []
                
                self.state['browser_state'] = updated_browser_state
                print(f"📚 Moved {len(current_found_references)} found references to history")

    def run(self):
        round_number = 1
        while self.state.get('final_verified', False) is False and round_number < MAX_ROUNDS:
            self._run_one_step(round_number)
            round_number += 1

        result = self.state

        print(f"Final answer: {result.get('final_answer', 'No answer found')}")
        print(f"Current summary: {result.get('current_summary', 'No summary found')}")
        print(f"Search count: {result.get('searcher_state', {}).get('search_count', 0)}")
        print(f"Visit count: {result.get('browser_state', {}).get('visit_count', 0)}")

        print("📝 Saving workflow execution memory...")
        # Create a safe directory name from the question
        question_name = get_user_question(self.state['messages'])
        
        # More comprehensive Windows path sanitization
        # Remove invalid characters for Windows file paths and replace with underscores
        invalid_chars = '<>:"/\\|?*'
        safe_question_name = ''.join(c if c not in invalid_chars else '_' for c in question_name)
        
        # Replace spaces with underscores to avoid path issues
        safe_question_name = safe_question_name.replace(' ', '_')
        
        # Remove multiple consecutive underscores
        import re
        safe_question_name = re.sub('_+', '_', safe_question_name)
        
        # Remove leading/trailing underscores
        safe_question_name = safe_question_name.strip('_')
        
        # Limit length to avoid Windows path length issues, cut at underscore boundaries
        max_length = 40  # Conservative length to avoid issues
        if len(safe_question_name) > max_length:
            # Try to cut at an underscore boundary
            truncated = safe_question_name[:max_length]
            last_underscore = truncated.rfind('_')
            if last_underscore > max_length * 0.6:  # Only use boundary if it's not too short
                safe_question_name = truncated[:last_underscore]
            else:
                safe_question_name = truncated
        
        # Final cleanup - remove trailing underscores
        safe_question_name = safe_question_name.strip('_')
        
        # Ensure we have a valid name (no empty strings, no reserved names)
        reserved_names = {'CON', 'PRN', 'AUX', 'NUL', 'COM1', 'COM2', 'COM3', 'COM4', 'COM5', 'COM6', 'COM7', 'COM8', 'COM9', 'LPT1', 'LPT2', 'LPT3', 'LPT4', 'LPT5', 'LPT6', 'LPT7', 'LPT8', 'LPT9'}
        if not safe_question_name or safe_question_name.upper() in reserved_names:
            safe_question_name = 'unknown_question'
            
        return result