from langchain_openai import ChatOpenAI
#from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.messages import HumanMessage, trim_messages
from orchestrator_maze_implementation.state.maze_state import MazeState
from orchestrator_maze_implementation.config.config_service import config
from orchestrator_maze_implementation.utils.decision_context_capture import DecisionContextCapture
from typing import Dict, Any, List
from typing_extensions import TypedDict
import json
import traceback
import re
import logging
from orchestrator_maze_implementation.agents.maze_execution_agent import debug_log
from orchestrator_maze_implementation.utils.safe_llm_invoke import safe_llm_invoke

########################################################################################
# ORCHESTRATION AGENT CORE
########################################################################################
logger = logging.getLogger("BatchRunner")

class ExplorationIssue(TypedDict):
    agent_id: str
    issue_type: str  # "inefficient_exploration", "missed_opportunity", "incorrect_dead_end"
    position: List[int]  # [row, col]
    description: str
    priority: str  # "high", "medium", "low"

class AgentPerformanceSummary(TypedDict):
    agent_id: str
    recent_moves: List[str]  # ["moved_to:(2,3)", "move_blocked", ...]
    success_rate: float
    stuck_indicator: bool
    current_position: List[int]
    last_actions: List[str]

# Define structured output using TypedDict 
class OrchestrationCorrections(TypedDict):
    remove_dead_ends: List[List[int]]  # List of [row, col] coordinates for dead ends to remove
    add_exploration_focus: List[List[int]]  # List of [row, col] coordinates to prioritize exploring

class OrchestrationResponse(TypedDict):
    analysis: str  # Detailed analysis of current maze exploration state
    corrections: OrchestrationCorrections  # Suggested corrections for agent behavior
    guidance_for_agents: Dict[str, str]

class OrchestrationContextBuilder:
    """Build token-efficient context for maze orchestration agent"""
    
    @staticmethod
    def build_orchestration_context(state: MazeState) -> Dict[str, Any]:
        """Build enhanced context with real-time decision information."""
        try:
            # Get base orchestration data (positions, paths, dead ends)
            base_data = extract_orchestration_data(state)
            
            # Capture current decision contexts from all agents
            decision_contexts = DecisionContextCapture.aggregate_all_agent_contexts(state)
            
            # Add simple performance summaries (existing logic)
            agent_summaries = {}
            for agent_id, wrapper in state["maze_wrappers"].items():
                recent_moves = wrapper.move_history[-5:] if len(wrapper.move_history) > 5 else wrapper.move_history
                
                # Check if agent is stuck (revisiting same positions)
                is_stuck = len(set(recent_moves)) < len(recent_moves) * 0.6 if len(recent_moves) >= 3 else False
                
                agent_summaries[agent_id] = {
                    "current_position": wrapper.get_agent_position(),
                    "recent_positions": recent_moves,
                    "is_stuck": is_stuck,
                    "exploration_progress": len(wrapper.move_history)
                }
            
            # Add real-time decision intelligence
            enhanced_data = {
                "agent_positions": base_data["agent_positions"],
                "agent_summaries": agent_summaries,
                "dead_end_analysis": base_data["dead_end_analysis"],
                "discovered_cells_count": len(base_data["discovered_cells"]),
                "total_marked_dead_ends": sum(len(dead_ends) for dead_ends in base_data["marked_dead_ends"].values()),
                "turn_count": base_data["turn_count"],
                # Advanced decision context
                "decision_contexts": decision_contexts,
                "global_optimization": {
                    "movement_conflicts": _identify_movement_conflicts(decision_contexts),
                    "exploration_coordination": _suggest_exploration_coordination(decision_contexts),
                    "efficiency_optimization": _identify_efficiency_opportunities(decision_contexts)
                }
            }
            
            return enhanced_data
            
        except Exception as e:
            debug_log(f"ERROR in build_orchestration_context: {e}")
            print(f"Full traceback:\n{traceback.format_exc()}")
            raise
        
def maze_orchestration_agent(state: MazeState) -> Dict[str, Any]:
    """
    Orchestration agent that analyzes maze progress and provides strategic guidance.
    Reviews agent performance, validates dead-end markings, and suggests improvements.
    """

    # Extract orchestration data from maze wrappers
    orchestration_data = OrchestrationContextBuilder.build_orchestration_context(state)    

    # Get strategic message history using LangChain message structures
    #recent_strategic_messages = _get_strategic_message_history(state, max_messages_per_agent=3)
    
    # Build structured context message for current analysis
    context_message = HumanMessage(
        content=f"""Task: Find maze exit. Current Maze Exploration Analysis:
        
Orchestration Data: {json.dumps(orchestration_data, indent=2)}

ENHANCED DECISION INTELLIGENCE:
- Movement Conflicts: {orchestration_data.get('global_optimization', {}).get('movement_conflicts', [])}
- Exploration Coordination: {orchestration_data.get('global_optimization', {}).get('exploration_coordination', [])}
- Efficiency Optimization: {orchestration_data.get('global_optimization', {}).get('efficiency_optimization', [])}

FOCUS AREAS:
- Dead end validation accuracy: {orchestration_data.get('dead_end_analysis', {})}
- Agent coordination: {orchestration_data.get('agent_summaries', {})}
- Exploration coverage: {orchestration_data.get('discovered_cells_count', 0)} cells discovered
- Real-time Decision Context Available: {len(orchestration_data.get('decision_contexts', {}).get('individual_contexts', {}))} agents
        """)

    prompt = ChatPromptTemplate.from_messages([
        ("system", """
                You are a Maze Strategy Orchestrator focused on tactical coordination with REAL-TIME DECISION AWARENESS..
         
        === COORDINATE SYSTEM ===
        The maze is a grid: 'W' (Wall), 'O' (Open Path), 'E' (Exit).
        IMPORTANT: All positions use MATRIX notation (row, col), NOT Cartesian (x, y):
        - Position (3, 5) means "row 3, column 5"
        - NORTH decreases row number (moves UP in visual display)
        - SOUTH increases row number (moves DOWN in visual display)  
        - EAST increases column number (moves RIGHT in visual display)
        - WEST decreases column number (moves LEFT in visual display)
        - The maze is displayed like a matrix/spreadsheet, NOT a Cartesian graph
        
        === YOUR CORE CAPABILITIES ===
        You now have access to:
        1. **Real-time Decision Contexts**: What each agent sees and their movement scores
        2. **Movement Conflicts**: When individual efficiency penalties block globally optimal moves  
        3. **Coordination Opportunities**: When agents could work together better
        4. **Global Optimization Patterns**: Cross-agent insights individual agents can't see

        === STRATEGIC AMPLIFICATION RESPONSIBILITIES ===
        
        **Movement Conflict Resolution**:
        When you see movement_conflicts in the data:
        - Agent efficiency penalties blocking globally valuable exploration
        - Provide override recommendations: "Agent X: Override efficiency penalty and explore [direction] - global exploration priority"
        - Identify when local optimization (avoiding recent positions) conflicts with global needs
        
        **Exploration Coordination**:
        When you see exploration coordination opportunities:  
        - Multiple agents targeting same unexplored areas (inefficient duplication)
        - Provide coordination recommendations: "Agent X explore north, Agent Y explore south to maximize coverage"
        - Suggest strategic division of unexplored territories
        
        **Global Pattern Recognition**:
        When you see global patterns that individual agents miss:
        - Convergence zones where multiple agents are heading (potential bottleneck)
        - Exploration gaps no individual agent is prioritizing (missed opportunities)
        - Strategic redirection: Guide agents to unexplored high-value areas
        
        **Efficiency Optimization**:
        When efficiency weights are too restrictive:
        - Identify when efficiency_weight > 1.5 is blocking valid exploration
        - Recommend temporary efficiency relaxation for strategic exploration
        - Balance local efficiency with global exploration needs
        
        === YOUR ENHANCED ROLE ===
        You are a STRATEGIC AMPLIFIER:
        1. **Validate Agent Decisions**: Use real decision context to understand agent choices
        2. **Identify Global vs Local Conflicts**: When local optimization conflicts with global needs  
        3. **Provide Surgical Interventions**: Break local minima with specific, actionable guidance
        4. **Coordinate Multi-Agent Exploration**: Maximize coverage through strategic coordination
        5. **Override Local Constraints**: When global strategy requires overriding local efficiency rules
        
        === CORE RESPONSIBILITIES ===
        1. Analyze the provided MazeState (turn count, agent positions, paths, visited cells, discovered cells, marked dead ends, AND real-time decision contexts).
        2. Identify strategic issues:
            * **Validate Dead Ends**: Check if marked dead ends are truly dead ends by examining discovered_cells
            * **Movement Conflicts**: Identify when efficiency penalties block globally optimal exploration
            * **Coordination Gaps**: Find where agents are duplicating effort or missing coordination opportunities
            * **Local Minima**: Detect when agents are stuck due to local optimization constraints
        3. Provide strategic corrections based on real decision context:
            * 'remove_dead_ends': Coordinates for incorrect dead end markings
            * 'add_exploration_focus': High-priority unexplored areas with coordination guidance
        4. Give decision-aware guidance that amplifies agent intelligence rather than conflicts with it.
        
        Format your response as a JSON object:
        {{
            "analysis": "Your detailed analysis including real-time decision insights, movement conflicts, and coordination opportunities.",
            "corrections": {{
                "remove_dead_ends": [ [x1,y1], [x2,y2] ], // List of [row, col] coordinates
                "add_exploration_focus": [ [x1,y1], [x2,y2] ] // List of [row, col] coordinates for strategic exploration
            }},
            "guidance_for_agents": {{
                "agent_0": "Specific guidance message for agent_0 based on their real decision context",
                "agent_1": "Specific guidance message for agent_1 based on their real decision context"
            }}
        }}
        
        IMPORTANT: 
        - Use the real-time decision context to provide informed guidance
        - Address specific movement conflicts and coordination opportunities
        - The 'guidance_for_agents' field must be a JSON object/dictionary where each key is an agent_id and each value is strategic guidance based on their actual decision context
        - Focus on amplifying agent intelligence, not overriding it blindly

        """),
        ("human", "{context}")
        ])
    
    try:
        model_name = config.get_execution_model()
        if model_name.startswith("gpt-5"):
            reasoning = {"effort": "minimal", "summary": "auto"}
            llm = ChatOpenAI(model=config.get_orchestration_model(), temperature=config.get_reasoning_temperature(), reasoning=reasoning, use_previous_response_id=True, verbose=True)
        else:
            llm = ChatOpenAI(model=config.get_orchestration_model(), temperature=config.get_execution_temperature(), verbose=True)
        debug_log("LLM initialized", 261)

        # Build message chain: system + recent strategic context + current analysis
        messages = prompt.format_messages(context=context_message.content)
        
        result = safe_llm_invoke(llm, messages, logger=logger)
        print(f"DEBUG ORCHESTRATION AGENT: Result: {result}")
        
        # Parse the orchestration response
        if hasattr(result, 'content'):
            content = result.content
        else:
            content = str(result)
        
        # Ensure content is a string before processing
        if isinstance(content, list):
            content = " ".join(str(item) for item in content)
        elif not isinstance(content, str):
            content = str(content)

        # Clean the content to extract just the JSON
        content = content.strip()
        if content.startswith('```json'):
            content = content[7:]
        if content.endswith('```'):
            content = content[:-3]
        content = content.strip()
        
        try:
            guidance: OrchestrationResponse = json.loads(content)
            print(f"DEBUG ORCHESTRATION AGENT: Successfully parsed JSON: {guidance}")
        except json.JSONDecodeError as e:
            print(f"DEBUG ORCHESTRATION AGENT: JSON parse error: {e}")
            # Fallback response
            guidance: OrchestrationResponse = {
                "analysis": "Unable to parse LLM response",
                "corrections": {"remove_dead_ends": [], "add_exploration_focus": []},
                "guidance_for_agents": {"all_agents":"Error in parsing - Continue current strategy"}
            }
        
        # Check if any agent found the exit
        exit_found = any(wrapper.is_at_exit() for wrapper in state["maze_wrappers"].values())
        if exit_found:
            winning_agent = next(
                (agent_id for agent_id, wrapper in state["maze_wrappers"].items() 
                 if wrapper.is_at_exit()), 
                None
            )
            return {
                "maze_exit_found": True,
                "winning_agent": winning_agent,
                "orchestrator_guidance": guidance.get("guidance_for_agents"),
                "shared_knowledge": {
                    "analysis": guidance.get("analysis", ""),
                    "corrections": guidance.get("corrections", {}),
                    "orchestration_data": orchestration_data
                },
                "step_index": 0,
                "turn_complete": False,
                "agent_backtracking_state": {}
            }
        
        # Return orchestration guidance for continued exploration
        return {
            "maze_exit_found": False,
            "orchestrator_guidance": guidance.get("guidance_for_agents"),
            "shared_knowledge": {
                "analysis": guidance.get("analysis", ""),
                "corrections": guidance.get("corrections", {}),
                "orchestration_data": orchestration_data
            },
            "step_index": 0,
            "turn_complete": False,
            "agent_backtracking_state": {}
        }
        
    except json.JSONDecodeError as e:
        print(f"Error parsing orchestration JSON: {e}")
        return {
            "maze_exit_found": False,
            "guidance_for_agents": {"all_agents":"Error in parsing - Continue current strategy"},
            "shared_knowledge": {
                "analysis": "JSON parsing failed",
                "corrections": {"remove_dead_ends": [], "add_exploration_focus": []},
                "orchestration_data": orchestration_data
            },
            "step_index": 0,
            "turn_complete": False,
            "agent_backtracking_state": {}
        }
    except Exception as e:
        print(f"Error in orchestration: {e}")
        return {
            "maze_exit_found": False,
            "guidance_for_agents": {"all_agents":"Error in parsing - Continue current strategy"},
            "shared_knowledge": {
                "analysis": "Orchestration error occurred",
                "corrections": {"remove_dead_ends": [], "add_exploration_focus": []},
                "orchestration_data": orchestration_data
            },
            "step_index": 0,
            "turn_complete": False,
            "agent_backtracking_state": {}
        }

########################################################################################
# UTILITY FUNCTIONS
########################################################################################

def _identify_movement_conflicts(decision_contexts: Dict) -> List[Dict]:
    """Identify when agents' individual optimization conflicts with global optimization"""
    conflicts = []
    
    individual_contexts = decision_contexts.get("individual_contexts", {})
    
    for agent_id, context in individual_contexts.items():
        if "error" in context:
            continue
            
        movement_scores = context.get("movement_scores", {})
        efficiency_penalties = context.get("efficiency_penalties", [])
        current_pos = context.get("current_position")
        unexplored_directions = context.get("unexplored_directions", [])
        
        # Check if efficiency penalties are blocking globally optimal moves
        for direction, score in movement_scores.items():
            # Import here to avoid circular imports
            from orchestrator_maze_implementation.agents.maze_execution_agent import _get_next_position
            next_pos = _get_next_position(current_pos, direction)
            
            # If this direction has low score due to efficiency penalty,
            # but leads to unexplored areas, flag as conflict
            if (score < 0.5 and  # Low score
                next_pos in efficiency_penalties and  # Due to efficiency penalty
                direction in unexplored_directions):  # But leads to unexplored area
                
                conflicts.append({
                    "agent_id": agent_id,
                    "position": current_pos,
                    "blocked_direction": direction,
                    "score": score,
                    "reason": "efficiency_penalty_blocking_exploration",
                    "recommendation": f"Override efficiency penalty for {direction} direction"
                })
    
    return conflicts

def _suggest_exploration_coordination(decision_contexts: Dict) -> List[Dict]:
    """Suggest coordination opportunities between agents"""
    opportunities = []
    
    individual_contexts = decision_contexts.get("individual_contexts", {})
    agent_positions = {}
    
    # Collect agent positions and contexts
    for agent_id, context in individual_contexts.items():
        if "error" not in context:
            agent_positions[agent_id] = context.get("current_position")
    
    # Find agents exploring overlapping areas (coordination opportunity)
    for agent1, pos1 in agent_positions.items():
        for agent2, pos2 in agent_positions.items():
            if agent1 < agent2:  # Avoid duplicates
                if pos1 and pos2:
                    distance = abs(pos1[0] - pos2[0]) + abs(pos1[1] - pos2[1])
                    
                    if distance <= 4:  # Agents are close
                        # Check if they have overlapping unexplored directions
                        ctx1 = individual_contexts[agent1]
                        ctx2 = individual_contexts[agent2]
                        
                        unexplored1 = set(ctx1.get("unexplored_directions", []))
                        unexplored2 = set(ctx2.get("unexplored_directions", []))
                        overlap = unexplored1 & unexplored2
                        
                        if len(overlap) > 1:
                            opportunities.append({
                                "agents": [agent1, agent2],
                                "coordination_type": "exploration_division",
                                "overlapping_directions": list(overlap),
                                "distance": distance,
                                "recommendation": f"Coordinate exploration: {agent1} take {list(overlap)[0]}, {agent2} explore different direction"
                            })
    
    return opportunities

def _identify_efficiency_opportunities(decision_contexts: Dict) -> List[Dict]:
    """Identify opportunities to optimize efficiency weights"""
    opportunities = []
    
    individual_contexts = decision_contexts.get("individual_contexts", {})
    
    for agent_id, context in individual_contexts.items():
        if "error" in context:
            continue
            
        weights = context.get("dynamic_weights", {})
        movement_scores = context.get("movement_scores", {})
        unexplored_directions = context.get("unexplored_directions", [])
        efficiency_weight = weights.get("efficiency_weight", 1.0)
        
        # If efficiency weight is very high and blocking exploration
        if efficiency_weight > 1.5:
            low_score_unexplored = []
            for direction, score in movement_scores.items():
                if (score < 0.7 and direction in unexplored_directions):
                    low_score_unexplored.append(direction)
            
            if low_score_unexplored:
                opportunities.append({
                    "agent_id": agent_id,
                    "efficiency_weight": efficiency_weight,
                    "blocked_directions": low_score_unexplored,
                    "recommendation": f"Consider reducing efficiency penalty to enable exploration of {low_score_unexplored[0]}"
                })
    
    return opportunities

def extract_orchestration_data(state: MazeState) -> Dict[str, Any]:
    """Extract key data needed for orchestration analysis"""
    agent_positions = {}
    previously_visited_tiles = {}
    discovered_cells = {}
    marked_dead_ends = {}
    dead_end_analysis = {}
    
    for agent_id, wrapper in state["maze_wrappers"].items():
        agent_positions[agent_id] = wrapper.get_agent_position()
        previously_visited_tiles[agent_id] = wrapper.move_history.copy()
        
        # Get marked dead ends from wrapper
        marked_dead_ends[agent_id] = list(wrapper.get_marked_dead_ends())
        
        # Build discovered cells map from movement history
        for pos in wrapper.move_history:
            cell_type = wrapper.get_cell_type(pos)
            # Convert numpy string to regular Python string if needed
            if str(type(cell_type)) == "<class 'numpy.str_'>":
                cell_type = str(cell_type)
            discovered_cells[f"{pos[0]},{pos[1]}"] = str(cell_type)
        
        # Validate dead end markings against discovered cells
        incorrect_dead_ends = []
        for marked_pos in marked_dead_ends[agent_id]:
            row, col = marked_pos
            # Check adjacent cells for unexplored open paths or exits
            adjacent_positions = [(row-1, col), (row+1, col), (row, col-1), (row, col+1)]
            
            for adj_row, adj_col in adjacent_positions:
                adj_key = f"{adj_row},{adj_col}"
                # If adjacent cell is discovered and is open or exit, but not visited
                if (adj_key in discovered_cells and 
                    discovered_cells[adj_key] in ['O', 'E'] and
                    (adj_row, adj_col) not in wrapper.move_history):
                    incorrect_dead_ends.append([row, col])
                    break
        
        dead_end_analysis[agent_id] = {
            "marked_count": len(marked_dead_ends[agent_id]),
            "incorrect_markings": incorrect_dead_ends,
            "accuracy": (len(marked_dead_ends[agent_id]) - len(incorrect_dead_ends)) / max(1, len(marked_dead_ends[agent_id])) * 100
        }
    
    return {
        "agent_positions": agent_positions,
        "previously_visited_tiles": previously_visited_tiles,
        "discovered_cells": discovered_cells,
        "marked_dead_ends": marked_dead_ends,
        "dead_end_analysis": dead_end_analysis,
        "turn_count": state["turn_count"],
        "current_step": state["step_index"],
        "plan": state["plan"],
        "plan_completed": state["plan_completed"]
    }
