import traceback
import sys
from typing import Tuple, Dict, List, Any
import logging
from langchain_openai import ChatOpenAI
from langchain_core.messages import SystemMessage, HumanMessage, AIMessage, ToolMessage, trim_messages
from orchestrator_maze_implementation.utils.safe_llm_invoke import safe_llm_invoke
from orchestrator_maze_implementation.config.config_service import config
from orchestrator_maze_implementation.visualization.maze_visualization import display_maze_state
from orchestrator_maze_implementation.state.maze_state import MazeState
from orchestrator_maze_implementation.agents.benchmarking_node import benchmarking_node
from orchestrator_maze_implementation.tools.basic_tool_functions import (
    move_north, move_south, move_east, move_west, 
    mark_dead_end, get_current_view, start_backtracking,
)
from orchestrator_maze_implementation.tools.tool_parsers import MazeStateExtractor, MazeViewParser, MazeMoveParser
from orchestrator_maze_implementation.utils.dynamic_prompt_injector import (
    generate_dynamic_execution_prompts, generate_dynamic_execution_weights
)

MazeTools = [
    move_north, move_south, move_east, move_west, 
    mark_dead_end, get_current_view, start_backtracking
]

logger = logging.getLogger("BatchRunner")

########################################################################################
# DEBUG UTILITIES
########################################################################################

def debug_log(message: str, line_number: int = None):
    """Enhanced debug logging with line numbers"""
    if line_number:
        print(f"DEBUG LINE {line_number}: {message}")
    else:
        frame = sys._getframe(1)
        print(f"DEBUG LINE {frame.f_lineno}: {message}")

def safe_iterate(obj, context: str = "unknown"):
    """Safe iteration wrapper that reports None errors with context"""
    try:
        if obj is None:
            frame = sys._getframe(1)
            raise TypeError(f"LINE {frame.f_lineno}: Cannot iterate over None in context: {context}")
        return obj
    except Exception as e:
        frame = sys._getframe(1)
        print(f"ERROR LINE {frame.f_lineno}: {e}")
        raise

def safe_get(obj, key, default=None, context: str = "unknown"):
    """Safe dictionary/attribute access with context"""
    try:
        frame = sys._getframe(1)
        if obj is None:
            print(f"WARNING LINE {frame.f_lineno}: Accessing {key} on None object in context: {context}")
            return default
        
        if hasattr(obj, 'get'):
            result = obj.get(key, default)
        else:
            result = getattr(obj, key, default)
        
        print(f"DEBUG LINE {frame.f_lineno}: safe_get({context}.{key}) = {type(result)} {result}")
        return result
    except Exception as e:
        frame = sys._getframe(1)
        print(f"ERROR LINE {frame.f_lineno}: Error accessing {key} in {context}: {e}")
        return default

########################################################################################
# Message Parsing Methods
########################################################################################

class AgentContextBuilder:
    """Build token-efficient context for maze execution agent"""
    
    @staticmethod
    def build_execution_context(state: MazeState, agent_id: str, strategic_waypoints: List = None) -> tuple[str, Dict[str, float]]:
        """Build execution context for maze agent with strategic waypoints from teammates"""
        if strategic_waypoints is None:
            strategic_waypoints = []
        """Build minimal, structured context for current execution step"""
        try:
            debug_log(f"build_execution_context called with agent_id: {agent_id}", 85)
            
            env_input = config.env_input() #rename to tool_usage_only
            debug_log(f"God mode: {env_input}", 88)
            
            if env_input:
                current_state = MazeStateExtractor.extract_current_state(state, agent_id)
                debug_log(f"God mode current_state type: {type(current_state)}", 91)
            else:
                current_state = state
                debug_log(f"Regular mode current_state type: {type(current_state)}", 94)
            
            step_index = safe_get(state, "step_index", 0, "state")
            plan = safe_get(state, "plan", [], "state")
            debug_log(f"step_index: {step_index}, plan length: {len(plan) if plan else 'None'}", 98)
            
            if step_index < len(safe_iterate(plan, "state.plan")):
                current_step = plan[step_index]
            else:
                current_step = "Complete"
            debug_log(f"current_step: {current_step}", 103)
            
            # Get agent state with safe access
            previously_visited_tiles = safe_get(current_state, "visited_positions", [], "current_state")
            debug_log(f"previously_visited_tiles type: {type(previously_visited_tiles)}, value: {previously_visited_tiles}", 107)
            
            known_openings_dict = safe_get(state, "known_openings", {}, "state")
            known_openings = safe_get(known_openings_dict, agent_id, [], "known_openings") if known_openings_dict else []
            debug_log(f"known_openings: {known_openings}", 111)

            # Get backtracking state with safe access
            backtracking_state = safe_get(state, "agent_backtracking_state", {}, "state")
            agent_backtrack = safe_get(backtracking_state, agent_id, {}, "backtracking_state") if backtracking_state else {}
            is_backtracking = safe_get(agent_backtrack, "is_backtracking", False, "agent_backtrack")
            lock_mode = safe_get(agent_backtrack, "lock_mode", False, "agent_backtrack")
            debug_log(f"is_backtracking: {is_backtracking}, lock_mode: {lock_mode}", 114)
            
            # Get orchestrator guidance with safe access
            orchestrator_guidance = safe_get(state, "orchestrator_guidance", {}, "state")
            if isinstance(orchestrator_guidance, dict):
                agent_guidance = safe_get(orchestrator_guidance, agent_id, "", "orchestrator_guidance")
            else:
                agent_guidance = ""
            debug_log(f"agent_guidance: {agent_guidance}", 124)
            
            # Safe access to current state fields
            current_position = safe_get(current_state, "current_position", "Unknown", "current_state")
            possible_moves = safe_get(current_state, "possible_moves", [], "current_state")
            marked_dead_ends = safe_get(current_state, "marked_dead_ends", [], "current_state")
            agent_unexplored_directions = _calculate_unexplored_directions(current_position, possible_moves, previously_visited_tiles)
            debug_log(f"current_position: {current_position}, possible_moves: {possible_moves}, agent_unexplored_directions: {agent_unexplored_directions}",  130)

            # NEW: Add recent position penalty information
            maze_wrappers = safe_get(state, "maze_wrappers", {}, "state")
            if agent_id in maze_wrappers:
                maze_wrapper = maze_wrappers[agent_id]
                move_history = getattr(maze_wrapper, 'move_history', [])
                debug_log(f"move_history type: {type(move_history)}, value: {move_history}", 143)

                # Get recent positions (last 3-5 moves) to discourage immediate backtracking
                recent_positions = move_history[-6:] if len(move_history) >= 6 else move_history
                previous_position = move_history[-2:-1] if len(move_history) >= 1 else move_history

                # Multi-agent awareness - collect RECENT other agents' visited positions
                ablation_config = config.get('ablations')
                if ablation_config.get("enable_teammate_coordination", True):
                        other_agents_positions = set()
                        all_agents = safe_get(state, "all_agents", [], "state")
                        
                        for other_agent_id in all_agents:
                            if other_agent_id != agent_id and other_agent_id in maze_wrappers:
                                other_wrapper = maze_wrappers[other_agent_id]
                                other_history = getattr(other_wrapper, 'move_history', [])
                                other_agents_positions.update(other_history)
                        
                        # Convert to list for teammate avoidance
                        recent_other_positions = list(other_agents_positions) if other_agents_positions else []
                        
                        debug_log(f"Recent positions: {recent_positions}, avoid: {previous_position}", 145)
                        debug_log(f"Other agents recent positions: {recent_other_positions}", 146)  
                else:
                    debug_log("Teammate coordination disabled - no position avoidance", 149)
                    recent_other_positions = []             
            else:
                recent_positions = []
                previous_position = None
                recent_other_positions = []

            # Generate dynamic prompts and weights based on free energy metrics
            dynamic_prompts, weights = _generate_dynamic_behavioral_prompts(state, agent_id)
            
            # Skip movement analysis during lock mode to avoid LLM confusion
            if lock_mode:
                # In lock mode, don't calculate scores to avoid conflicting guidance
                movement_guidance = "🔒 LOCK MODE: Movement analysis disabled - follow lock instructions only"
                score_details = "N/A (Lock mode active)"
                should_backtrack = False  # Backtracking already active in lock mode
                movement_scores = {}
                dead_end_confidence = 0.0  # Not needed in lock mode
            else:
                # Calculate movement scores for each possible direction
                movement_scores = _calculate_movement_scores(
                    current_position, possible_moves, previously_visited_tiles, 
                    recent_positions, recent_other_positions, weights, marked_dead_ends
                )
                
                # Calculate dead end confidence
                dead_end_confidence = _calculate_dead_end_confidence(
                    current_position, possible_moves, previously_visited_tiles
                )
                ablation_config = config.get('ablations')
                if ablation_config.get('enable_reward_function'):
                    # Format movement recommendations
                    if movement_scores:
                        best_direction = max(movement_scores.keys(), key=lambda k: movement_scores[k])
                        best_score = movement_scores[best_direction]
                        
                        # Check if should backtrack (but only if not already backtracking)
                        should_backtrack = best_score < weights.get('backtrack_threshold', 0.7) and not is_backtracking

                        movement_guidance = f"RECOMMENDED: {best_direction} (score: {best_score:.2f})"
                        if should_backtrack:
                            movement_guidance += " | BACKTRACK ADVISED: All scores below threshold"
                        elif is_backtracking:
                            movement_guidance += " | BACKTRACKING ACTIVE: Follow backtrack path only"
                            
                        # Format all scores for display
                        score_details = ", ".join([f"{dir}: {score:.2f}" for dir, score in sorted(movement_scores.items())])
                    else:
                        movement_guidance = "No valid moves available"
                        score_details = "None"
                        should_backtrack = not is_backtracking  # Only suggest backtrack if not already doing it
                else:
                    movement_guidance = ""
                    score_details = ""

            context = f"""EXECUTION CONTEXT - STEP {step_index + 1}:
            CURRENT STEP: {current_step}

            CURRENT STATE:
            - Position: {current_position}
            - Available moves: {possible_moves}
            - Current unexplored directions: {agent_unexplored_directions}
            - Known unexplored openings: {known_openings}
            {_format_dynamic_modifiers(dynamic_prompts)}

            WEIGHTED MOVEMENT ANALYSIS:
            - {movement_guidance}
            - All direction scores: {score_details}
            - Backtrack threshold: {weights.get('backtrack_threshold', 0.7):.2f}
            - Dead end confidence: {dead_end_confidence:.2f} (threshold: {weights.get('dead_end_confidence', 0.8):.2f})

            BACKTRACKING STATUS:
            - Currently backtracking: {"YES" if is_backtracking else "NO"}
            - Lock mode active: {"YES" if lock_mode else "NO"}
            - WARNING: Do NOT call start_backtracking() if already backtracking

            EXPLORATION STATUS:
            - Previously visited: {previously_visited_tiles}
            - Dead ends marked: {len(marked_dead_ends) if marked_dead_ends else 0}
            - Avoid backtracking to recent path, unless other rules apply: {recent_positions}

            OSCILLATION CHECK:
            - Recent movement pattern: {recent_positions}
            - WARNING: If current position appears more than 2 times in recent pattern, call start_backtracking()

            MULTI-AGENT COORDINATION:
            - AVOID returning to your previous position: {previous_position if previous_position else "None (no previous position)"}
            - Teammate recent positions (last 10, avoid when alternatives exist): {recent_other_positions[-10:] if len(recent_other_positions) >= 10 else recent_other_positions}
            - Teammate explored junctions/dead ends (avoid when alternatives exist): {strategic_waypoints}

            GUIDANCE:
            - Orchestrator: {agent_guidance}

            PERFORMANCE WEIGHTS:
            - Exploration weight: {weights.get('exploration_weight', 1.0):.1f}
            - Efficiency weight: {weights.get('efficiency_weight', 1.0):.1f}
            - Backtrack threshold: {weights.get('backtrack_threshold', 0.7):.1f}
            - Dead end confidence: {weights.get('dead_end_confidence', 0.8):.1f}

            """ + _get_backtracking_lock_guidance(agent_backtrack, current_position)
            debug_log("build_execution_context completed successfully", 148)
            return context, weights
            
        except Exception as e:
            debug_log(f"ERROR in build_execution_context: {e}", 152)
            print(f"Full traceback:\n{traceback.format_exc()}")
            default_weights = {
                "exploration_weight": 1.0,
                "efficiency_weight": 1.0,
                "teammate_avoidance": 1.0,
                "backtrack_threshold": 0.7,
                "dead_end_confidence": 0.8
            }
            return "Error in context building", default_weights

    @staticmethod
    def extract_last_action_result(agent_messages: List) -> Dict:
        """Extract the result of the last action for decision making"""
        try:
            debug_log(f"extract_last_action_result called with {len(agent_messages) if agent_messages else 'None'} messages", 160)
            
            if not agent_messages:
                debug_log("No agent messages, returning none type", 163)
                return {"type": "none", "data": {}}
            
            last_msg = agent_messages[-1]
            debug_log(f"Last message type: {type(last_msg)}, has name: {hasattr(last_msg, 'name')}", 167)
            
            # Check if it's a tool message (result of action)
            if hasattr(last_msg, 'name') and last_msg.name is not None:
                debug_log(f"Last message name: {last_msg.name}", 171)
                if last_msg.name == 'get_current_view':
                    return {
                        "type": "view", 
                        "data": MazeViewParser().parse(last_msg.content)
                    }
                elif 'move_' in last_msg.name:
                    return {
                        "type": "move",
                        "data": MazeMoveParser().parse(last_msg.content)
                    }
                elif 'dead_end' in last_msg.name:
                    return {
                        "type": "dead_end",
                        "data": MazeMoveParser().parse(last_msg.content)
                    }
            else:
                debug_log(f"Last message has no name or name is None: {getattr(last_msg, 'name', 'NO_NAME_ATTR')}", 181)
            
            debug_log("Returning none type", 183)
            return {"type": "no recent tool calls", "data": {}}
            
        except Exception as e:
            debug_log(f"ERROR in extract_last_action_result: {e}", 187)
            print(f"Full traceback:\n{traceback.format_exc()}")
            return {"type": "error", "data": {"error": str(e)}}

########################################################################################
# Agent Core Design
########################################################################################

def maze_execution_agent(state: MazeState):
    """
    Maze-solving agent that uses the LLM with tools - just the model part.
    """
    try:
        debug_log("maze_execution_agent started", 200)
        
        current_agent = safe_get(state, "current_agent", "unknown", "state")
        turn_count = safe_get(state, "turn_count", 0, "state")
        step_index = safe_get(state, "step_index", 0, "state")
        debug_log(f"Agent: {current_agent}, Turn: {turn_count}, Step: {step_index}", 205)
        
        print(f"\n🤖 {current_agent} taking turn (Turn {turn_count}, Step {step_index})")
        
        # Display maze state with 0.5 second delay
        try:
            display_maze_state(state)
            debug_log("display_maze_state completed", 212)
        except Exception as e:
            debug_log(f"Error in display_maze_state: {e}", 214)
        
        # Get the current agent's maze wrapper and agent_id
        agent_id = safe_get(state, "current_agent", "unknown", "state")
        maze_wrappers = safe_get(state, "maze_wrappers", {}, "state")
        debug_log(f"maze_wrappers type: {type(maze_wrappers)}, keys: {list(maze_wrappers.keys()) if maze_wrappers else 'None'}", 219)
        
        if agent_id not in maze_wrappers:
            raise KeyError(f"Agent {agent_id} not found in maze_wrappers")
        
        maze_wrapper = maze_wrappers[agent_id]
        debug_log(f"maze_wrapper type: {type(maze_wrapper)}", 225)
        
        agent_messages_dict = safe_get(state, "agent_messages", {}, "state")
        agent_messages = safe_get(agent_messages_dict, agent_id, [], "agent_messages") if agent_messages_dict else []
        debug_log(f"agent_messages length: {len(agent_messages) if agent_messages else 'None'}", 229)
        
        # Benchmark Node with Ablations Switch
        ablation_config = config.get('ablations')
        if ablation_config.get("enable_dynamic_weights"):
            try:
                benchmark_data = benchmarking_node(state)
                debug_log(f"benchmark_data type: {type(benchmark_data)}", 233)
            except Exception as e:
                debug_log(f"Error in benchmarking_node: {e}", 235)
                benchmark_data = {}
        else:
            benchmark_data = {}
        plan = safe_get(state, "plan", [], "state")
        debug_log(f"plan length: {len(plan) if plan else 'None'}, step_index: {step_index}", 243)
        
        if step_index < len(safe_iterate(plan, "state.plan")):
            try:
                debug_log("Starting agent execution logic", 247)
                ablation_config = config.get('ablations')
                if ablation_config.get("enable_teammate_coordination", True):
                    teammate_strategic_waypoints = set()
                    all_agents = safe_get(state, "all_agents", [], "state")
                    for other_agent_id in all_agents:
                        if other_agent_id != agent_id and other_agent_id in maze_wrappers:
                            other_wrapper = maze_wrappers[other_agent_id]
                            other_history = getattr(other_wrapper, 'move_history', [])
                            waypoints = _extract_strategic_waypoints(other_wrapper, other_history)
                            teammate_strategic_waypoints.update(waypoints)
                    strategic_waypoints = list(teammate_strategic_waypoints) if teammate_strategic_waypoints else []
                    debug_log(f"Teammate coordination enabled: {len(strategic_waypoints)} waypoints", 255)
                else:
                    strategic_waypoints = []
                    debug_log("Teammate coordination disabled", 258)
                
                # Get current agent position
                agent_pos = maze_wrapper.get_agent_position()
                debug_log(f"agent_pos: {agent_pos}", 251)

                execution_context, weights = AgentContextBuilder.build_execution_context(state, agent_id, strategic_waypoints)
                debug_log("execution_context built successfully", 254)
                
                last_action = AgentContextBuilder.extract_last_action_result(agent_messages)
                debug_log(f"last_action: {last_action}", 257)

                model_name = config.get_execution_model()
                if model_name.startswith("gpt-5"):
                    reasoning = {"effort": "minimal", "summary": "detailed"}
                    llm = ChatOpenAI(model=config.get_execution_model(), temperature=config.get_reasoning_temperature(), reasoning=reasoning, use_previous_response_id=True, verbose=True)
                    llm_with_tools = llm.bind_tools(MazeTools)
                else:
                    llm = ChatOpenAI(model=config.get_execution_model(), temperature=config.get_execution_temperature(), verbose=True)
                    llm_with_tools = llm.bind_tools(MazeTools)
                debug_log("LLM initialized", 261)
                
                # load system prompt with weights
                system_prompt = SystemMessage(content=_create_maze_system_prompt(agent_id, weights))
                debug_log("system_prompt created", 265)

                context_message = HumanMessage(
                    content=f"""
                
                {execution_context}

                LAST ACTION RESULT: {last_action['type']} - {last_action['data']}
                
                DO NOT PROVIDE REASONING. NEVER END YOUR STEP WITHOUT THE INSTRUCTED TOOL CALL - unless marked as OPTIONAL.
                """)
                debug_log("context_message created", 275)

                # Include recent conversation history with proper tool call/response pairing
                try:
                    recent_message_history = get_valid_message_history(agent_messages, max_count=3)
                    debug_log(f"recent_message_history length: {len(recent_message_history) if recent_message_history else 'None'}", 280)
                except Exception as e:
                    debug_log(f"Error in get_valid_message_history: {e}", 282)
                    recent_message_history = []
                
                current_turn_message = [system_prompt] + recent_message_history + [context_message]
                debug_log(f"current_turn_message length: {len(current_turn_message)}", 286)
                
                #get response from llm
                result = execute_step(current_turn_message, llm_with_tools)
                debug_log(f"execute_step completed, result type: {type(result)}", 290)

                print(f"EXEC AGENT DEBUG: Agent {agent_id} reasoning: {result.content}")
                print(f"EXEC AGENT DEBUG: Agent {agent_id} tool calls: {result.tool_calls}")

                try:
                    updated_openings = _update_last_known_openings(state, agent_id, agent_pos, maze_wrapper, result)
                    debug_log(f"updated_openings: {updated_openings}", 297)
                except Exception as e:
                    debug_log(f"Error in _update_last_known_openings: {e}", 299)
                    updated_openings = {agent_id: []}

                # Update backtracking state if agent moved
                try:
                    updated_backtrack_state = _update_backtracking_state(state, agent_id, maze_wrapper, result)
                    debug_log(f"updated_backtrack_state: {updated_backtrack_state}", 305)
                except Exception as e:
                    debug_log(f"Error in _update_backtracking_state: {e}", 307)
                    updated_backtrack_state = {}

                ##
                # update state
                ##

                #update graph (or subgraph) state
                updated_agent_messages = safe_iterate(agent_messages, "agent_messages").copy()
                updated_agent_messages.append(result)
                debug_log("agent_messages updated", 309)
                
                #update step_index
                updated_step_index = step_index + 1
                debug_log(f"updated_step_index: {updated_step_index}", 313)
                
                #check if victory condition is met
                result_content = str(result.content) if result.content else ""
                if "FINISH" in result_content or "Maze Exit found" in result_content:
                    debug_log("Victory condition detected", 318)
                    updated_state = set_victory(agent_id, agent_pos)
                    return updated_state
                else:
                    debug_log("Building return state", 322)
                    return_state = {
                        "agent_messages": {agent_id: updated_agent_messages},
                        "step_index": updated_step_index,
                        "known_openings": updated_openings,
                        "agent_backtracking_state": updated_backtrack_state or {}
                    }
                                        
                    if benchmark_data:
                        return_state.update({
                            "entropy_history": benchmark_data.get("entropy_history", []),
                            "free_energy_metrics": benchmark_data.get("free_energy_metrics", {}),
                        })
                    debug_log("return_state built successfully", 334)
                    return return_state
                    
            except Exception as e:
                debug_log(f"ERROR in execution logic: {e}", 338)
                print(f"Full traceback:\n{traceback.format_exc()}")
                raise
        else:
            debug_log("Plan completed, setting turn_complete=True", 342)
            return_state = {
                "turn_complete": True,
                "agent_backtracking_state": {}
            }
            if benchmark_data:
                return_state.update({
                    "entropy_history": benchmark_data.get("entropy_history", []),
                    "free_energy_metrics": benchmark_data.get("free_energy_metrics", {}),
                })
            debug_log(f"Returning state with turn_complete=True: {return_state}", 350)
            return return_state
            
    except Exception as e:
        debug_log(f"CRITICAL ERROR in maze_execution_agent: {e}", 354)
        print(f"Full traceback:\n{traceback.format_exc()}")
        raise

def execute_step(current_turn_message, llm_with_tools):
    # Get current turn message input
    #print(f"EXECUTE STEP DEBUG: {current_turn_message}")
    try:
        response = safe_llm_invoke(llm_with_tools, current_turn_message, logger=logger)
        # Safeguard: LLMs sometimes invoke multiple tool_calls
        if hasattr(response, 'tool_calls') and response.tool_calls is not None and len(response.tool_calls) > 1:
            print(f"WARNING: Agent attempted {len(response.tool_calls)} tool calls. Using only the first one.")
            # Keep only the first tool call
            response.tool_calls = response.tool_calls[:1]
            # Update additional_kwargs to match
            if 'tool_calls' in response.additional_kwargs:
                response.additional_kwargs['tool_calls'] = response.additional_kwargs['tool_calls'][:1]

        print(f"Agent thinking:| {response.content} {response.tool_calls}|")
    except Exception as e:
        print(f"Error getting LLM response: {e}")
        return AIMessage(content=f"Error: {e}")
    
    return response

def set_victory(agent_id: str, exit_position: Tuple[int, int]) -> 'MazeState':
    """Return new state with victory condition"""
    return {
        'maze_exit_found': True,
        'exit_position': exit_position,
        'winning_agent': agent_id,
        'agent_backtracking_state': {}
    }



########################################################################################
# Helper functions
########################################################################################

def _create_maze_system_prompt(agent_id: str, weights: Dict[str, float] = None) -> str:
    """Create system prompt with optional weight integration"""
    
    if weights is None:
        weights = {
            "exploration_weight": 1.0,
            "efficiency_weight": 1.0,
            "teammate_avoidance": 1.0,
            "backtrack_threshold": 0.7,
            "dead_end_confidence": 0.8
        }
    
    return f"""You are Agent {agent_id} in a collaborative maze escape.

CRITICAL: You MUST call exactly ONE tool per step. Only exception is the mark_dead_end tool which is OPTIONAL.

=== COORDINATE SYSTEM ===
IMPORTANT: All positions use MATRIX notation (row, col), NOT Cartesian (x, y):
- Position (3, 5) means "row 3, column 5"
- NORTH decreases row number (moves UP in visual display)
- SOUTH increases row number (moves DOWN in visual display)  
- EAST increases column number (moves RIGHT in visual display)
- WEST decreases column number (moves LEFT in visual display)
- The maze is displayed like a matrix/spreadsheet, NOT a Cartesian graph

=== WEIGHTED DECISION SYSTEM ===
Your decisions are guided by dynamic performance weights:

=== DECISION HIERARCHY (Check in this order) ===

1. BACKTRACKING LOCK MODE CHECK (Weight: OVERRIDE - Always 100% priority):
   - If context shows "🔒 BACKTRACKING LOCK MODE ACTIVE 🔒"
   - IMMEDIATELY execute the REQUIRED MOVE as specified
   - IGNORE all other guidance until lock mode is cleared
   - NO turn completion, NO alternative actions allowed

2. COORDINATE WITH TEAMMATES (Weight modifier: {weights.get('teammate_avoidance', 1.0):.1f}x):
   - ALWAYS prefer directions that lead AWAY from teammate-explored areas
   - Check "Teammate recent positions" and avoid those areas when alternatives exist
   - AVOID returning to your previous position unless no other options
   - Apply teammate_avoidance weight when calculating movement scores

3. ORCHESTRATOR & OPTIMIZATION GUIDANCE:
   - Review any specific guidance provided for strategic improvements
   - Apply optimization recommendations to improve exploration efficiency and movement success
   - Follow orchestrator corrections for dead end markings and exploration focus areas
   - Current weight adjustments: exploration={weights.get('exploration_weight', 1.0):.1f}x, efficiency={weights.get('efficiency_weight', 1.0):.1f}x

4. STANDARD BACKTRACKING MODE CHECK:
   - If context shows "BACKTRACKING ACTIVE" or "Next required move: [direction]"
   - IMMEDIATELY call move_[direction]() as specified
   - SKIP all other analysis

5. OSCILLATION DETECTION (Trigger threshold: {weights.get('backtrack_threshold', 0.7):.1f}):
   - If you're revisiting the same 2-3 positions repeatedly (check "Recent movement pattern")
   - If current position appears more than 2 times in recent_positions
   - AND you are NOT already in backtracking mode
   - IMMEDIATELY call start_backtracking() to break the cycle
   - DO NOT continue moving in circles

6. SAFETY CHECK:
   - Never move into walls
   - If all directions blocked AND not already backtracking, call start_backtracking()

7. EXPLORATION PRIORITY (Use pre-calculated movement scores):
   - Review the "WEIGHTED MOVEMENT ANALYSIS" section in context
   - Follow the RECOMMENDED direction unless overridden by higher priority rules
   - If "BACKTRACK ADVISED" is shown AND not already backtracking, call start_backtracking()
   - The system has already calculated: exploration weight ({weights.get('exploration_weight', 1.0):.1f}x), efficiency penalty ({weights.get('efficiency_weight', 1.0):.1f}x), teammate avoidance ({weights.get('teammate_avoidance', 1.0):.1f}x)
   - AVOID moving to positions marked as dead ends unless absolutely necessary for backtracking

=== AVAILABLE ACTIONS ===
- get_current_view(): Observe surroundings (shows 3x3 grid around your position)
- move_north/south/east/west(): Move one step in that direction
- mark_dead_end(): Mark current position as dead end (no coordinates needed)
- start_backtracking(): Return to nearest unexplored opening when stuck

=== DEAD END MARKING (Confidence threshold: {weights.get('dead_end_confidence', 0.8):.1f}) ===
Mark current position as dead end when ALL of these conditions are met:
1. You have only ONE possible move (e.g. ONLY 'north', ONLY 'south', ONLY 'west', ONLY 'east' is possible -> indicating corridor endpoint) AND all possible moves lead back to visited positions
2. Current unexplored directions is empty [] (no unexplored paths available)
3. You are NOT currently backtracking (avoid marking during retreat)

CONFIDENCE CALCULATION:
- Count adjacent cells you've visited
- confidence = visited_adjacent_cells / total_adjacent_open_cells
- Only mark if confidence exceeds threshold

DO NOT mark dead end if:
- Multiple unexplored directions exist
- You just arrived at this position for the first time
- Backtracking mode is active
- Confidence score < {weights.get('dead_end_confidence', 0.8):.1f}

=== TURN STRUCTURE ===
You execute 3 steps per turn:
1. get_current_view() to assess situation
2. move_[direction]() to advance exploration. Options are:
3. Optional: mark_dead_end() if criteria met AND confidence > {weights.get('dead_end_confidence', 0.8):.1f}, otherwise turn complete

=== VICTORY CONDITION ===
If you find "Maze Exit", return "FINISH" immediately.

=== FORBIDDEN ===
- Multiple tool calls per step
- Moving to same position repeatedly
- Explaining reasoning (action only)
- Using start_backtracking() when already backtracking
"""

def get_last_tool_message(messages):
    for msg in reversed(messages):
        if isinstance(msg, ToolMessage):
            return msg
    return None

def extract_orchestrator_guidance_for_agent(state: MazeState, agent_id: str) -> str:
    """Extract agent-specific guidance from orchestrator output"""
    
    # Get orchestrator guidance (should be Dict[str, str])
    orchestrator_guidance = state.get("orchestrator_guidance", {})
    
    # Extract guidance for specific agent
    if isinstance(orchestrator_guidance, dict):
        return orchestrator_guidance.get(agent_id, "")
    
    # Fallback: if somehow it's a string, return it
    return str(orchestrator_guidance) if orchestrator_guidance else ""

def get_valid_message_history(messages, max_count=10):
    """Get recent messages while preserving reasoning/function_call pairs for o1 models"""
    try:
        debug_log(f"get_valid_message_history called with messages type: {type(messages)}, count: {len(messages) if messages else 'None'}", 580)
        
        if messages is None:
            debug_log("messages is None, returning empty list", 583)
            return []
        
        # Check if we're using a reasoning model
        model_name = config.get_execution_model()
        is_reasoning_model = model_name.startswith("gpt-5") or model_name.startswith("o1") or model_name.startswith("o3")
        
        if is_reasoning_model:
            # For reasoning models, check if ANY message has reasoning items
            has_reasoning_items = any(
                isinstance(msg, AIMessage) and 
                hasattr(msg, 'additional_kwargs') and 
                msg.additional_kwargs.get('reasoning') is not None
                for msg in messages
            )
            
            if has_reasoning_items:
                # If reasoning items are present, be very conservative
                # Only return recent messages if we're well under the limit
                if len(messages) <= max_count // 2:  # Use half the limit for safety
                    debug_log(f"Reasoning model with reasoning items: returning all {len(messages)} messages (under safety limit)", 588)
                    debug_log(f"cleaned_messages reasoning content: {messages if messages else 'None'}", 588)
                    return messages
                else:
                    # Too many messages with reasoning items - start fresh to avoid breaking chains
                    debug_log(f"Reasoning model with too many reasoning items ({len(messages)}): starting fresh conversation", 588)
                    debug_log(f"cleaned_messages reasoning content: {messages if messages else 'None'}", 588)
                    return []
            else:
                # No reasoning items yet, safe to trim normally
                cleaned_messages = messages[-max_count:] if len(messages) > max_count else messages
                debug_log(f"Reasoning model without reasoning items: returning {len(cleaned_messages)} messages", 588)
                return cleaned_messages
        else:
            # Original logic for non-reasoning models
            cleaned_messages = _remove_orphaned_tool_messages(messages)
            debug_log(f"cleaned_messages length: {len(cleaned_messages) if cleaned_messages else 'None'}", 588)
            
            if len(cleaned_messages) <= max_count:
                debug_log("Returning all cleaned messages", 591)
                return cleaned_messages
            
            # Use LangChain's trim_messages to handle tool call/response pairing automatically
            try:
                trimmed = trim_messages(
                    messages,
                    strategy="last",  # Keep the most recent messages
                    token_counter=len,  # Count messages instead of tokens (each message = 1 token)
                    max_tokens=max_count,  # Use max_count as our token limit
                    include_system=False,  # Don't include system messages in the trimming
                    allow_partial=False,  # Don't allow partial tool call/response pairs
                )
                debug_log(f"trimmed messages length: {len(trimmed) if trimmed else 'None'}", 603)
            except Exception as e:
                debug_log(f"Error in trim_messages: {e}", 605)
                trimmed = cleaned_messages[-max_count:] if len(cleaned_messages) > max_count else cleaned_messages

            result = _remove_orphaned_tool_messages(trimmed)
            debug_log(f"final result length: {len(result) if result else 'None'}", 609)
            return result
        
    except Exception as e:
        debug_log(f"ERROR in get_valid_message_history: {e}", 613)
        print(f"Full traceback:\n{traceback.format_exc()}")
        return []

def _update_last_known_openings(state: MazeState, agent_id: str, current_pos: Tuple[int, int], maze_wrapper, llm_result) -> Dict[str, List[str]]:
    """Update list of unexplored openings - add new ones, remove explored ones, prioritize by distance"""
    try:
        debug_log(f"_update_last_known_openings called for agent {agent_id} at position {current_pos}", 621)
        
        # Get the dictionary of known openings, and the list for this agent
        known_openings = safe_get(state, "known_openings", {}, "state")
        debug_log(f"known_openings type: {type(known_openings)}, value: {known_openings}", 625)
        
        # Get the agent's current openings, with proper None handling
        agent_openings = safe_get(known_openings, agent_id, [], "known_openings") if known_openings else []
        debug_log(f"agent_openings type: {type(agent_openings)}, value: {agent_openings}", 629)

        if agent_openings is None:
            debug_log("agent_openings was None, setting to empty list", 632)
            agent_openings = []
        
        # Work with a copy to avoid modifying the original
        openings = safe_iterate(agent_openings, "agent_openings").copy()
        debug_log(f"openings copy: {openings}", 637)

        # Add new unexplored directions
        try:
            possible_moves = maze_wrapper.get_possible_moves()
            debug_log(f"possible_moves: {possible_moves}", 642)
        except Exception as e:
            debug_log(f"Error getting possible_moves: {e}", 644)
            possible_moves = []
        
        if len(possible_moves) >= 2:
            # Check if at least one direction leads to unexplored territory
            has_unexplored = False
            unexplored_directions = []
            
            for direction in safe_iterate(possible_moves, "possible_moves"):
                try:
                    next_pos = _get_next_position(current_pos, direction)
                    if next_pos and next_pos not in maze_wrapper.move_history:
                        has_unexplored = True
                        unexplored_directions.append(direction)
                        break
                except Exception as e:
                    debug_log(f"Error checking direction {direction}: {e}", 655)
            
            if has_unexplored:
                debug_log(f"Agent {agent_id} at junction with {len(possible_moves)} directions: {possible_moves} (has unexplored paths)", 658)
                
                # Prioritize unexplored directions first
                for direction in unexplored_directions:
                    try:
                        opening = f"{current_pos}-{direction}-UNEXPLORED"
                        if opening not in openings:
                            openings.insert(0, opening)  # Insert at beginning for priority
                            debug_log(f"Agent {agent_id} discovered HIGH PRIORITY unexplored opening: {opening}", 664)
                    except Exception as e:
                        debug_log(f"Error processing unexplored direction {direction}: {e}", 666)
                
                # Then add other directions as lower priority
                for direction in safe_iterate(possible_moves, "possible_moves_iteration"):
                    if direction not in unexplored_directions:
                        try:
                            opening = f"{current_pos}-{direction}-EXPLORED"
                            if opening not in openings:
                                openings.append(opening)  # Add at end for lower priority
                                debug_log(f"Agent {agent_id} discovered lower priority opening: {opening}", 670)
                        except Exception as e:
                            debug_log(f"Error processing explored direction {direction}: {e}", 672)
            else:
                debug_log(f"Agent {agent_id} at junction with {len(possible_moves)} directions: {possible_moves} (all paths already explored) - no high priority openings added", 674)
        else:
            debug_log(f"Agent {agent_id} at corridor/dead-end with {len(possible_moves)} directions: {possible_moves} - no openings added", 676)

        # Remove the direction we just took
        try:
            direction_taken = _extract_direction_from_result(llm_result)
            debug_log(f"direction_taken: {direction_taken}", 675)
            if direction_taken and direction_taken.strip():
                # Remove both unexplored and explored versions of this direction
                explored_opening = f"{current_pos}-{direction_taken}"
                unexplored_opening = f"{current_pos}-{direction_taken}-UNEXPLORED"
                explored_opening_tagged = f"{current_pos}-{direction_taken}-EXPLORED"
                
                for opening_to_remove in [explored_opening, unexplored_opening, explored_opening_tagged]:
                    if opening_to_remove in openings:
                        openings.remove(opening_to_remove)
                        debug_log(f"Agent {agent_id} explored opening: {opening_to_remove}", 680)
        except Exception as e:
            debug_log(f"Error extracting/removing direction: {e}", 682)

        # Clean up EXPLORED entries to avoid agent confusion
        try:
            openings = [opening for opening in openings if "-EXPLORED" not in opening]
            debug_log(f"Cleaned EXPLORED entries, remaining openings: {len(openings)}", 703)
        except Exception as e:
            debug_log(f"Error cleaning EXPLORED entries: {e}", 705)

        # Sort openings: unexplored first, then by distance to current position
        try:
            # Get teammate positions for priority calculation
            all_agents = safe_get(state, "all_agents", [], "state")
            teammate_positions = set()
            for other_agent_id in all_agents:
                if other_agent_id != agent_id and other_agent_id in state.get("maze_wrappers", {}):
                    other_wrapper = state["maze_wrappers"][other_agent_id]
                    other_history = getattr(other_wrapper, 'move_history', [])
                    teammate_positions.update(other_history)
            
            def opening_priority(opening: str) -> Tuple[int, float, int]:
                """Return (priority_level, distance, teammate_proximity) for sorting"""
                try:
                    if "UNEXPLORED" in opening:
                        priority = 0  # Highest priority
                    else:
                        priority = 1  # Lower priority
                    
                    # Extract position for distance calculation
                    parts = opening.split("-")
                    if len(parts) >= 2:
                        pos_str = parts[0]
                        if pos_str.startswith("(") and pos_str.endswith(")"):
                            pos_coords = eval(pos_str)  # Safe since we control the format
                            distance = abs(pos_coords[0] - current_pos[0]) + abs(pos_coords[1] - current_pos[1])
                            
                            # Check if this opening leads toward teammate-explored areas
                            teammate_proximity = 0
                            if len(parts) >= 2:
                                direction = parts[1]
                                next_pos = _get_next_position(pos_coords, direction)
                                if next_pos and next_pos in teammate_positions:
                                    teammate_proximity = 1  # Lower priority if leads to teammate area
                                    
                        else:
                            distance = 999  # Unknown position gets lowest priority
                            teammate_proximity = 0
                    else:
                        distance = 999
                        teammate_proximity = 0
                    
                    return (priority, distance, teammate_proximity)
                except Exception:
                    return (2, 999, 1)  # Error case gets lowest priority
            
            openings.sort(key=opening_priority)
            debug_log(f"Sorted openings by priority (considering teammates): {openings[:5]}...", 709)
        except Exception as e:
            debug_log(f"Error sorting openings: {e}", 711)

        result = {agent_id: openings}
        debug_log(f"_update_last_known_openings result: {result}", 685)
        return result
        
    except Exception as e:
        debug_log(f"ERROR in _update_last_known_openings: {e}", 689)
        print(f"Full traceback:\n{traceback.format_exc()}")
        return {agent_id: []}

def _extract_direction_from_result(llm_result) -> str:
    """Extract direction from LLM result - defensive version"""
    try:
        debug_log(f"_extract_direction_from_result called with result type: {type(llm_result)}", 697)
        
        # Enhanced with explicit None and length checks:
        if (hasattr(llm_result, 'tool_calls') and 
            llm_result.tool_calls is not None and 
            len(llm_result.tool_calls) > 0):
            try:
                debug_log(f"Processing {len(llm_result.tool_calls)} tool calls", 704)
                for i, tc in enumerate(safe_iterate(llm_result.tool_calls, "tool_calls")):
                    debug_log(f"Processing tool call {i}: {tc}", 706)
                    tc_str = str(tc).lower()
                    for direction in ["north", "south", "east", "west"]:
                        if f"move_{direction}" in tc_str:
                            debug_log(f"Found direction: {direction}", 710)
                            return direction
            except (TypeError, AttributeError) as e:
                debug_log(f"Warning: Error processing tool_calls: {e}", 713)
        
        # Fallback to content check
        try:
            content = str(llm_result.content).lower() if llm_result.content else ""
            debug_log(f"Checking content for direction: {content[:100]}...", 718)
            for direction in ["north", "south", "east", "west"]:
                if f"move_{direction}" in content:
                    debug_log(f"Found direction in content: {direction}", 721)
                    return direction
        except (TypeError, AttributeError) as e:
            debug_log(f"Warning: Error processing content: {e}", 724)
        
        debug_log("No direction found", 726)
        return ""
        
    except Exception as e:
        debug_log(f"ERROR in _extract_direction_from_result: {e}", 730)
        print(f"Full traceback:\n{traceback.format_exc()}")
        return ""


def _get_next_position(current_pos: Tuple[int, int], direction: str) -> Tuple[int, int]:
    """Calculate the next position based on current position and direction using array (row, col) coordinates"""
    row, col = current_pos  # Changed from x, y to row, col for correct array indexing
    direction_deltas = {
        'north': (-1, 0),  # Decrease row (move up in array/visual display)
        'south': (1, 0),   # Increase row (move down in array/visual display)
        'east': (0, 1),    # Increase col (move right in array/visual display)
        'west': (0, -1)    # Decrease col (move left in array/visual display)
    }
    
    if direction in direction_deltas:
        delta_row, delta_col = direction_deltas[direction] 
        return (row + delta_row, col + delta_col) 
    
    return None

def _remove_orphaned_tool_messages(messages):
    """Remove ToolMessages that don't have proper preceding tool calls to prevent OpenAI API errors"""
    try:
        debug_log(f"_remove_orphaned_tool_messages called with {len(messages) if messages else 'None'} messages", 738)
        
        if messages is None:
            debug_log("messages is None, returning empty list", 741)
            return []
        
        cleaned = []
        
        for i, msg in enumerate(safe_iterate(messages, "messages")):
            debug_log(f"Processing message {i}: {type(msg)}", 747)
            
            if isinstance(msg, ToolMessage):
                # Check if this ToolMessage has a proper preceding AIMessage with tool_calls
                if cleaned and isinstance(cleaned[-1], AIMessage):
                    prev_ai = cleaned[-1]
                    if (hasattr(prev_ai, 'tool_calls') and prev_ai.tool_calls is not None and
                        any(tc.get('id') == msg.tool_call_id for tc in safe_iterate(prev_ai.tool_calls, "prev_ai.tool_calls") if isinstance(tc, dict))):
                        # Valid tool call/response pair
                        cleaned.append(msg)
                        debug_log(f"Keeping valid ToolMessage {i}", 756)
                    else:
                        debug_log(f"Removing orphaned ToolMessage {i}: {str(msg.content)[:80]}...", 758)
                else:
                    debug_log(f"Removing orphaned ToolMessage {i}: {str(msg.content)[:80]}...", 760)
            else:
                # Keep all non-ToolMessages
                cleaned.append(msg)
                debug_log(f"Keeping non-ToolMessage {i}", 764)
        
        debug_log(f"_remove_orphaned_tool_messages result: {len(cleaned)} messages", 766)
        return cleaned
        
    except Exception as e:
        debug_log(f"ERROR in _remove_orphaned_tool_messages: {e}", 770)
        print(f"Full traceback:\n{traceback.format_exc()}")
        return []

def _update_backtracking_state(state: MazeState, agent_id: str, maze_wrapper, llm_result) -> Dict[str, Dict[str, Any]]:
    """Update backtracking state when agent moves during backtracking."""
    try:
        debug_log(f"_update_backtracking_state called for agent {agent_id}")
        
        # Get current backtracking state from UPDATED state (post-tool execution)
        # CRITICAL: Use updated state that may have been modified by tools like start_backtracking
        backtrack_state = safe_get(state, "agent_backtracking_state", {}, "state")
        
        # If no backtracking state exists at all, return empty (agent not backtracking)
        if not backtrack_state:
            debug_log(f"No backtracking state found for any agent")
            return {}
            
        # If this specific agent is not in backtracking state, preserve existing state for other agents
        if agent_id not in backtrack_state:
            debug_log(f"Agent {agent_id} not in backtracking state, preserving existing state")
            return backtrack_state  # Return full state to preserve other agents' backtracking
        
        agent_backtrack = backtrack_state[agent_id]
        if not agent_backtrack.get('is_backtracking', False):
            debug_log(f"Agent {agent_id} not currently backtracking")
            return backtrack_state  # Preserve existing state
        
        # Agent is backtracking - check if agent successfully moved
        if _is_successful_move(llm_result):
            # Update current step in path
            current_step = agent_backtrack.get('current_step', 0)
            agent_backtrack['current_step'] = current_step + 1
            
            # Check if we've reached the target
            current_pos = maze_wrapper.get_agent_position()
            target_pos = agent_backtrack.get('target_position')
            
            if current_pos == target_pos:
                # Clear backtracking state - target reached
                agent_backtrack['is_backtracking'] = False
                agent_backtrack['lock_mode'] = False
                debug_log(f"Agent {agent_id} reached backtracking target {target_pos}, lock mode disabled")
        
        # Update and return the modified backtracking state
        backtrack_state[agent_id] = agent_backtrack
        return backtrack_state
        
    except Exception as e:
        debug_log(f"ERROR in _update_backtracking_state: {e}")
        # On error, try to preserve existing state
        existing_state = safe_get(state, "agent_backtracking_state", {}, "state")
        return existing_state or {}

def _is_successful_move(llm_result) -> bool:
    """Check if the LLM result indicates a successful movement."""
    try:
        if hasattr(llm_result, 'tool_calls') and llm_result.tool_calls:
            for tool_call in llm_result.tool_calls:
                if 'move_' in str(tool_call).lower():
                    return True
        return False
    except Exception:
        return False
    
def _extract_strategic_waypoints(maze_wrapper, move_history) -> set:
    """Extract strategic waypoints from teammate's movement history.
    
    Returns key decision points where teammates explored different paths:
    - Junction points (where ≥2 moves were possible)
    - Dead end positions marked by teammates
    - Key branch points that define explored corridors
    """
    waypoints = set()
    
    if not move_history or len(move_history) < 2:
        return waypoints
        
    try:
        # Get dead ends marked by this teammate
        dead_ends = getattr(maze_wrapper, 'marked_dead_ends', [])
        waypoints.update(dead_ends)
        
        # Extract junction points from move history
        # A junction is a position where the agent had multiple possible moves
        for i, position in enumerate(move_history):
            try:
                # Simulate what moves were possible at this position
                # This is a simplified heuristic - we look at adjacent positions in history
                # to infer if this was a junction (multiple unexplored directions available)
                
                if i == 0:  # First position
                    continue
                    
                prev_pos = move_history[i-1] if i > 0 else None
                next_pos = move_history[i+1] if i < len(move_history)-1 else None
                
                # Check if this was a turning point (change in direction)
                if prev_pos and next_pos:
                    # Calculate direction vectors
                    dir_in = (position[0] - prev_pos[0], position[1] - prev_pos[1])
                    dir_out = (next_pos[0] - position[0], next_pos[1] - position[1])
                    
                    # If direction changed, this was likely a junction decision
                    if dir_in != dir_out:
                        waypoints.add(position)
                        
                # Also mark positions where agent spent multiple turns (backtracking indicators)
                if move_history.count(position) >= 2:
                    waypoints.add(position)
                    
            except Exception as e:
                debug_log(f"Error processing waypoint {position}: {e}", 0)
                continue
                
    except Exception as e:
        debug_log(f"Error extracting strategic waypoints: {e}", 0)
        
    return waypoints


def _calculate_movement_scores(current_position: Tuple[int, int], possible_moves: List[str], 
                              visited_positions: List[Tuple[int, int]], recent_positions: List[Tuple[int, int]],
                              teammate_positions: List[Tuple[int, int]], weights: Dict[str, float],
                              marked_dead_ends: List[Tuple[int, int]]) -> Dict[str, float]:
    """Calculate weighted movement scores for each possible direction"""
    
    scores = {}
    
    for direction in possible_moves:
        next_pos = _get_next_position(current_position, direction)
        if not next_pos:
            continue
            
        # Base score
        score = 1.0
        
        # Apply exploration weight (unexplored directions get bonus)
        if next_pos not in visited_positions:
            score *= weights.get('exploration_weight', 1.0)
            
        # Apply efficiency penalty (recent positions get penalty)
        if next_pos in recent_positions:
            score *= (1.0 / weights.get('efficiency_weight', 1.0))
            
        # Apply teammate avoidance penalty ONLY if coordination enabled
        ablation_config = config.get('ablations')
        if ablation_config.get("enable_teammate_coordination", True):
            if next_pos in teammate_positions:
                score *= (1.0 / weights.get('teammate_avoidance', 1.0))
        
        # CRITICAL: Apply heavy penalty for marked dead ends (but still allow as last resort)
        if next_pos in marked_dead_ends:
            score *= 0.01  # Heavy penalty but not absolute block - allows escape from false dead ends
            
        scores[direction] = score
    
    return scores

def _calculate_dead_end_confidence(current_position: Tuple[int, int], possible_moves: List[str],
                                 visited_positions: List[Tuple[int, int]]) -> float:
    """Calculate confidence score for marking current position as dead end"""
    
    if len(possible_moves) != 1:  # Not a corridor endpoint
        return 0.0
        
    # Check adjacent positions
    adjacent_positions = [
        (current_position[0] - 1, current_position[1]),  # North
        (current_position[0] + 1, current_position[1]),  # South  
        (current_position[0], current_position[1] - 1),  # West
        (current_position[0], current_position[1] + 1)   # East
    ]
    
    visited_adjacent = sum(1 for pos in adjacent_positions if pos in visited_positions)
    total_adjacent = len(adjacent_positions)
    
    confidence = visited_adjacent / total_adjacent if total_adjacent > 0 else 0.0
    return confidence

def _calculate_unexplored_directions(current_pos: Tuple[int, int], 
                                   possible_moves: List[str], 
                                   visited_positions: List[Tuple[int, int]]) -> List[str]:
    """Calculate which directions lead to unexplored positions"""
    unexplored_dirs = []
    
    for direction in possible_moves:
        next_pos = _get_next_position(current_pos, direction)
        if next_pos and next_pos not in visited_positions:
            unexplored_dirs.append(direction)
    
    return unexplored_dirs

def _generate_dynamic_behavioral_prompts(state: MazeState, agent_id: str) -> tuple[Dict[str, str], Dict[str, float]]:
    """Generate dynamic behavioral prompts and weights based on free energy metrics"""
    try:
        debug_log(f"_generate_dynamic_behavioral_prompts called for agent {agent_id}")
        
        # Get latest free energy data from entropy history
        entropy_history = safe_get(state, "entropy_history", [], "state")
        if not entropy_history:
            debug_log("No entropy history available, returning empty prompts and default weights")
            default_weights = {
                "exploration_weight": 1.0,
                "efficiency_weight": 1.0,
                "teammate_avoidance": 1.0,
                "backtrack_threshold": 0.7,
                "dead_end_confidence": 0.8
            }
            return {}, default_weights
        
        latest_entry = entropy_history[-1]
        agent_fe_scores = safe_get(latest_entry, "agent_fe_scores", {}, "latest_entry")
        
        if agent_id not in agent_fe_scores:
            debug_log(f"No FE data for agent {agent_id}, returning empty prompts and default weights")
            default_weights = {
                "exploration_weight": 1.0,
                "efficiency_weight": 1.0,
                "teammate_avoidance": 1.0,
                "backtrack_threshold": 0.7,
                "dead_end_confidence": 0.8
            }
            return {}, default_weights
        
        fe_data = agent_fe_scores[agent_id]
        debug_log(f"FE data for {agent_id}: {fe_data}")
        
        # Prepare agent state with FE history
        fe_history = []
        for entry in entropy_history[-5:]:  # Last 5 entries
            scores = safe_get(entry, "agent_fe_scores", {}, "entry")
            if agent_id in scores:
                fe_value = safe_get(scores[agent_id], "total_fe", 1.0, "agent_scores")
                fe_history.append(fe_value)
        
        agent_state = {"fe_history": fe_history}
        
        # Generate weights first
        weights = generate_dynamic_execution_weights(fe_data, agent_state)
        debug_log(f"Generated weights for {agent_id}: {weights}")
        
        # Generate dynamic prompts with Ablations
        ablation_config = config.get('ablations')
        if ablation_config.get("enable_dynamic_weights", True):
            # Generate dynamic prompts
            dynamic_prompts = generate_dynamic_execution_prompts(fe_data, agent_state)
            debug_log(f"Generated dynamic prompts: {dynamic_prompts}")
        else:
            # Use static default weights when dynamic weights disabled
            dynamic_prompts = {}
            weights = {
                "exploration_weight": 1.0,
                "efficiency_weight": 1.0,
                "teammate_avoidance": 1.0,
                "backtrack_threshold": 0.7,
                "dead_end_confidence": 0.8
            }
            debug_log("Dynamic weights disabled, using static defaults", 172)
        
        return dynamic_prompts, weights
        
    except Exception as e:
        debug_log(f"ERROR in _generate_dynamic_behavioral_prompts: {e}")
        print(f"Full traceback:\n{traceback.format_exc()}")
        default_weights = {
            "exploration_weight": 1.0,
            "efficiency_weight": 1.0,
            "teammate_avoidance": 1.0,
            "backtrack_threshold": 0.7,
            "dead_end_confidence": 0.8
        }
        return {}, default_weights


def _get_backtracking_lock_guidance(agent_backtrack: Dict[str, Any], current_position: Tuple[int, int]) -> str:
    """Generate lock mode guidance for backtracking agents."""
    try:
        if not agent_backtrack or not agent_backtrack.get('is_backtracking', False):
            return ""
        
        lock_mode = agent_backtrack.get('lock_mode', False)
        if not lock_mode:
            return ""
        
        target_pos = agent_backtrack.get('target_position')
        path = agent_backtrack.get('path', [])
        current_step = agent_backtrack.get('current_step', 0)
        
        # Check if we've reached the target
        if current_position == target_pos:
            return """
            
BACKTRACKING STATUS: TARGET REACHED! Lock mode disabled. Resume normal exploration.
            """
        
        # Get next move in path
        if current_step < len(path) - 1:
            from orchestrator_maze_implementation.tools.basic_tool_functions import _get_next_backtrack_move
            direction = _get_next_backtrack_move(current_position, path).lower()
            remaining_steps = len(path) - current_step - 1
            
            return f"""
            
🔒 BACKTRACKING LOCK MODE ACTIVE 🔒
CRITICAL: Agent is in MANDATORY backtracking mode.
→ Target: {target_pos} ({remaining_steps} steps remaining)
→ REQUIRED MOVE: move_{direction.lower()}()
→ Path: {' → '.join(map(str, path[current_step:current_step+3]))}{'...' if len(path) > current_step + 3 else ''}

⚠️  LOCK MODE RESTRICTIONS:
- MUST call move_{direction.lower()}() - NO alternatives
- NO turn completion allowed until target reached
- NO other tool calls permitted (except required movement)
- IGNORE all other guidance while in lock mode

EXECUTE: move_{direction.lower()}() IMMEDIATELY
            """
        
        return """
        
BACKTRACKING STATUS: Path calculation error. Resume normal exploration.
        """
        
    except Exception as e:
        debug_log(f"Error in _get_backtracking_lock_guidance: {e}")
        return ""

def _format_dynamic_modifiers(dynamic_prompts: Dict[str, str]) -> str:
    """Format dynamic prompts as concise modifiers"""
    if not dynamic_prompts:
        return ""
    
    modifiers = []
    for key, value in dynamic_prompts.items():
        if value.strip():
            modifiers.append(f"- {value.strip()}")
    
    return "\nPERFORMANCE MODIFIERS:\n" + "\n".join(modifiers) if modifiers else ""

def _get_reasoning_safe_history(messages, max_count=10):
    """Get message history while preserving reasoning/function_call pairs for o1 models"""
    debug_log(f"_get_reasoning_safe_history called with {len(messages)} messages")
    
    if not messages or len(messages) <= max_count:
        return messages
    
    # For reasoning models: keep reasoning items with their function calls
    
    # For reasoning models, we need to be very careful about reasoning items
    # According to OpenAI docs: "you must include all reasoning items between the function call and the last user message"
    # This means we cannot break reasoning chains arbitrarily
    
    # Find the last user message (HumanMessage or SystemMessage)
    last_user_idx = -1
    for i in range(len(messages) - 1, -1, -1):
        if isinstance(messages[i], (HumanMessage, SystemMessage)):
            last_user_idx = i
            break
    
    if last_user_idx == -1:
        # No user message found, return recent messages safely
        return messages[-max_count:] if len(messages) > max_count else messages
    
    # Include all messages from the last user message onward
    # This ensures we don't break reasoning chains
    messages_from_last_user = messages[last_user_idx:]
    
    # If this is still too many messages, we need to find a safe earlier cutoff
    if len(messages_from_last_user) <= max_count:
        return messages_from_last_user
    
    # If we have too many messages even from the last user message,
    # we need to find safe breakpoints (before reasoning chains start)
    safe_messages = []
    reasoning_chain_active = False
    
    # Work backwards from the end
    for i in range(len(messages) - 1, -1, -1):
        msg = messages[i]
        safe_messages.insert(0, msg)
        
        # Check if this message starts or is part of a reasoning chain
        if isinstance(msg, AIMessage):
            has_reasoning = (hasattr(msg, 'additional_kwargs') and 
                           msg.additional_kwargs.get('reasoning') is not None)
            
            if has_reasoning:
                reasoning_chain_active = True
            elif reasoning_chain_active and not has_reasoning:
                # We've reached a message without reasoning after a reasoning chain
                # This is a safe place to stop if we have enough messages
                if len(safe_messages) >= max_count:
                    break
        elif isinstance(msg, (HumanMessage, SystemMessage)):
            # User messages are always safe breakpoints
            reasoning_chain_active = False
            if len(safe_messages) >= max_count:
                break
        elif isinstance(msg, ToolMessage):
            # Tool messages are part of reasoning chains, keep going
            pass
        
        # Safety check - don't go below minimum
        if len(safe_messages) >= max_count and not reasoning_chain_active:
            break
    
    debug_log(f"_get_reasoning_safe_history returning {len(safe_messages)} messages (reasoning-safe)")
    return safe_messages