import atexit
import os
import pickle
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
import datetime
from datetime import datetime, date, time, timedelta
from typing import Dict, List, Any, Literal, Tuple, Union, Optional
import copy
import spacy
from spacy.matcher import PhraseMatcher
from openai import OpenAI
from dateparser.search import search_dates
import traceback
import gradio as gr
import plotly.graph_objects as go
from openai.types.chat import ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam
from pyngrok import ngrok
import logging
from collections import deque
import re
import json
import dateparser
from ablation_controller import AblationMode
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

try:
    # Temporarily disable tigramite imports to test basic functionality
    print("Skipping tigramite imports for testing...")
    pp = None
    PCMCI = None
    ParCorr = None
    GPDC = None
    CMIknn = None
    pcmci_available = False
except Exception as e:
    print("Warning: Tigramite library not found or failed to import correctly.")
    print("Exception:", e)
    pp = None  # Set pp to None to avoid NameError later

    class PCMCI:
        def __init__(self, *args, **kwargs): pass
        def run_pcmci(self, *args, **kwargs): return {'graph': []}

    class ParCorr:
        def __init__(self, *args, **kwargs): pass

    class GPDC:
        def __init__(self, *args, **kwargs): pass

    class CMIknn:
        def __init__(self, *args, **kwargs): pass

    pcmci_available = False
class PlotSessionManager:
    def __init__(self):
        self.current_plot = None
        self.plot_history = []
        self.session_active = True

    def set_plot(self, plot_fig, metadata=None):
        """Set the current active plot"""
        self.current_plot = {
            'figure': plot_fig,
            'metadata': metadata or {},
            'timestamp': datetime.now()
        }
        self.plot_history.append(self.current_plot)
        return self.current_plot

    def get_current_plot(self):
        """Get the current active plot"""
        return self.current_plot['figure'] if self.current_plot else None

    def clear_plot(self):
        """Clear the current plot"""
        self.current_plot = None

    def clear_session(self):
        """Clear entire session"""
        self.current_plot = None
        self.plot_history = []

    def has_active_plot(self):
        """Check if there's an active plot in session"""
        return self.current_plot is not None


# 3. Initialize the plot session manager (add this after kg and qa_system initialization)
plot_session = PlotSessionManager()


class EnhancedGreenhouseKnowledgeGraph:
    def __init__(self, kg, data_path):
        self.kg = kg
        self.G = self.kg.G  # Make graph directly accessible
        self.data = self.load_data(data_path)
        self.nlp = spacy.load('en_core_web_sm')
        
        # Set default ablation mode
        self.ablation_mode = AblationMode.FULL_HCA
        
        self.config = {
            'analysis_lookback_hours': 6,
            'prediction_horizon_hours': 8,
            'observation_window_hours': 2, # Default, will be overridden by dynamic lags
            'default_observation_window_minutes': 60, # Fallback for _analyze_observed_effects
            'control_active_threshold': {
                'uQh': 1E-10,    # A very sensitive threshold for the tiny heat_ref values
                'default': 1E-6  # The standard threshold for other controls
            },
            'lagrangian_active_threshold': {
                    # Minimum threshold 1e-4 to avoid numerical noise
                    'T_ieq': 1e-4,    # Temperature inequality Lagrangian
                    'H_ieq': 1e-4,    # Humidity inequality Lagrangian (was 3.29e-10 - TOO SENSITIVE!)
                    'C_ieq': 1e-4,    # CO2 inequality Lagrangian (was 1e-9 - TOO SENSITIVE!)
                    'uV_ieq': 1e-4,   # Ventilation control inequality Lagrangian
                    'uQc_ieq': 1e-4,  # Cooling control inequality Lagrangian
                    'default': 1e-4   # Fallback for any others
                },
            'setpoint_change_threshold': {'T': 1.0, 'H': 5.0, 'C': 50.0}, # Thresholds for what counts as a big setpoint change
            'control_thresholds': {'change_threshold': 0.001},
            'reasoning_thresholds': {
                'Qrad_high': 500,
                'Tout_cold': 10,
                'Qrad_photosynthesis': 200,
                'Temp_trend_significant': 0.01,
                'humidity_critical_threshold': 85.0,
                'sunrise_spike_window_minutes': 60 
            }
        }
        # Create direct access attributes for convenience
        self.control_thresholds = self.config['control_thresholds']
        self.reasoning_thresholds = self.config['reasoning_thresholds']

        # --- 3. Variable and Node Mappings (The Single Source of Truth) ---

        # A. User-Friendly Name to KG Base Node Code (for NLP)
        self.name_to_code = {
            'temperature': 'T', 'temp': 'T', 'hot': 'T', 'cold': 'T',
            'humidity': 'H', 'humid': 'H', 'relhum': 'H', 'relhum_ref': 'H',
            'co2': 'C', 'carbon dioxide': 'C','co₂': 'C',
            'biomass': 'B', 'bio': 'B',
            'ventilation': 'uV', 'vent': 'uV', 'vents': 'uV',
            'co2 injection': 'uC', 'injection': 'uC',
            'heating': 'uQh', 'heat': 'uQh',
            'cooling': 'uQc', 'cool': 'uQc',
            'solar radiation': 'Qrad', 'radiation': 'Qrad', 'light': 'Qrad',
            'temp_ref': 'T', 'temperature setpoint': 'T', 'temperature reference': 'T',
            'humidity setpoint': 'H', 'humidity reference': 'H',
            'co2 setpoint': 'C', 'co2 reference': 'C', 'carbon dioxide setpoint': 'C',
            'biomass setpoint': 'B', 'biomass reference': 'B',
            'outside temperature': 'Tout',
            'outside co2': 'Cout',
            'outside humidity': 'Hout',
            # Add actual CSV column names to prevent KeyErrors
            # Map CSV column names to actual KG nodes (not _ref, _eq, _ieq - those don't exist in KG)
            'Temp_ref': 'T', 'CO2_ref': 'C', 'relhum_ref': 'H', 'Bio_ref': 'B',
            'Vent_ref': 'uV', 'CO2_inj_ref': 'uC', 'heat_ref': 'uQh', 'cool_ref': 'uQc',
            'Teq_lg': 'T', 'Ceq_lg': 'C', 'Heq_lg': 'H', 'Beq_lg': 'B',
            'Tieq_lg': 'T', 'Cieq_lg': 'C', 'Hieq_lg': 'H', 'Bieq_lg': 'B',
            'heat exchange through the greenhouse cover': 'Q_cov',
            'cover heat flow': 'Q_cov',
            'phi_qt_cov': 'Q_cov',
            'transpiration': 'Q_trans',
            'plant sweating': 'Q_trans',
            'phi_qt_trans': 'Q_trans'
        }

        # B. KG Node Name to Data Column Name (PRIMARY MAPPING)
        # This is the most critical map. Ensure every key is a valid KG node
        # and every value is an exact column name from your CSV file.
        self.node_to_column = {
            # States (map to their reference columns as per your data)
            'T': 'Temp_ref',
            'H': 'relhum_ref',
            'H_ref': 'relhum_ref',
            'C': 'CO2_ref',
            'B': 'Bio_ref',
            # State References
            'T_ref': 'Temp_ref', 'C_ref': 'CO2_ref', 'H_ref': 'relhum_ref', 'B_ref': 'Bio_ref',
            'fruit_ref': 'fruit_ref', 'prune_ref': 'prune_ref',
            # Controls (map base codes to their data columns)
            'uV': 'Vent_ref',
            'uC': 'CO2_inj_ref',
            'uQh': 'heat_ref',
            'uQc': 'cool_ref',
            # Control References
            'uV_ref': 'Vent_ref', 'uC_ref': 'CO2_inj_ref', 'uQh_ref': 'heat_ref', 'uQc_ref': 'cool_ref',
            # Disturbances
            'Tout': 'Tout', 'Cout': 'Cout', 'Hout': 'Hout', 'Qrad': 'Iout',
            # Flux Variables
            'Q_vent': 'phi_QT_vent', 'Q_sun': 'phi_QT_sun', 'Q_cov': 'phi_QT_cov',
            'Q_trans': 'phi_QT_trans', 'Q_heat': 'phi_QT_heat', 'Q_cool': 'phi_QT_cool',
            'C_inj': 'phi_QCO2_inj', 'C_vent': 'phi_QCO2_vent', 'C_phot': 'Phi_CO2_ac',
            'H_trans': 'phi_H_trans', 'H_vent': 'phi_H_vent', 'H_cov': 'phi_H_cov', 'H_cool': 'phi_H_cool',
            # Lagrangian Multipliers
            'T_ieq': 'Tieq_lg', 'C_ieq': 'Cieq_lg', 'H_ieq': 'Hieq_lg', 'B_ieq': 'Bieq_lg',
            'uV_ieq': 'uVieq_lg', 'uC_ieq': 'uCO2ieq_lg', 'uQh_ieq': 'uHieq_lg', 'uQc_ieq': 'uCieq_lg',
            'T_eq': 'Teq_lg', 'C_eq': 'Ceq_lg', 'H_eq': 'Heq_lg', 'B_eq': 'Beq_lg'
        }

        self.feature_metadata = {
            'Temp_ref': {'name': 'Reference Temperature', 'unit': '°C'},
            'CO2_ref': {'name': 'Reference CO2 Concentration', 'unit': 'ppm'},
            'relhum_ref': {'name': 'Reference Relative Humidity', 'unit': '%'},
            'Bio_ref': {'name': 'Reference Biomass', 'unit': 'kg'},
            'Vent_ref': {'name': 'Reference Ventilation', 'unit': 'm³/h'},
            'CO2_inj_ref': {'name': 'Reference CO2 Injection', 'unit': 'kg/h'},
            'heat_ref': {'name': 'Reference Heating Power', 'unit': 'kW'},
            'cool_ref': {'name': 'Reference Cooling Power', 'unit': 'kW'},
            'cond_ref': {'name': 'Reference Condensation Rate', 'unit': 'kg/h'},
            'fruit_ref': {'name': 'Reference Fruit Yield', 'unit': 'kg'},
            'prune_ref': {'name': 'Reference Pruning Waste', 'unit': 'kg'},
            'Tout': {'name': 'Outside Temperature', 'unit': '°C'},
            'Cout': {'name': 'Outside CO2 Concentration', 'unit': 'ppm'},
            'Hout': {'name': 'Outside Relative Humidity', 'unit': '%'},
            'Iout': {'name': 'Outside Solar Radiation', 'unit': 'W/m²'}
        }
        # C. Derived Mappings (Generated automatically, no manual editing needed)
        self.column_to_node = {v: k for k, v in self.node_to_column.items()}
        self.node_to_name = {node: data.get('name', node) for node, data in self.kg.G.nodes(data=True)}
        # Add friendly names for nodes that might not be in the KG explicitly
        self.node_to_name.update({
            'T_ieq': 'Lagrangian Temp Inequality', 'C_ieq': 'Lagrangian CO2 Inequality',
            'H_ieq': 'Lagrangian Humidity Inequality', 'B_ieq': 'Lagrangian Biomass Inequality',
            'uV_ieq': 'Lagrangian Vent Inequality', 'uC_ieq': 'Lagrangian CO2 Inj Inequality',
            'uQh_ieq': 'Lagrangian Heat Inequality', 'uQc_ieq': 'Lagrangian Cool Inequality',
            'T_eq': 'Lagrangian Temp Equality', 'C_eq': 'Lagrangian CO2 Equality',
            'H_eq': 'Lagrangian Humidity Equality', 'B_eq': 'Lagrangian Biomass Equality',
        })

        # D. Reference Node to Base Code (for identifying control actions)
        self.ref_node_to_base_code = {
            'uV_ref': 'uV',
            'uC_ref': 'uC',
            'uQh_ref': 'uQh',
            'uQc_ref': 'uQc'
        }

        # --- 4. System State and Logic Initializations ---
        self.dialogue_state = {}
        self.base_state_variables = ['T', 'C', 'H', 'B']
        self.base_control_variables = ['uV', 'uC', 'uQh', 'uQc']
        self.control_action_nodes = {'uV', 'uC', 'uQh', 'uQc'}
        
        # This mapping is crucial for linking base codes to their Lagrangians
        self.lagrangian_nodes = {
            'T': {'ieq': 'T_ieq', 'eq': 'T_eq'},
            'C': {'ieq': 'C_ieq', 'eq': 'C_eq'},
            'H': {'ieq': 'H_ieq', 'eq': 'H_eq'},
            'B': {'ieq': 'B_ieq', 'eq': 'B_eq'},
            'uV': {'ieq': 'uV_ieq'}, # No equality constraint for controls
            'uC': {'ieq': 'uC_ieq'},
            'uQh': {'ieq': 'uQh_ieq'},
            'uQc': {'ieq': 'uQc_ieq'}
        }
        self.extreme_event_configs = {
            "maximum heat load": {
                'metric_name': 'net_natural_heat_load',
                'columns': ['Q_sun', 'Q_cov', 'Q_trans'], # Nodes, not columns
                'operation': 'sum',
                'find': 'idxmax', # Find the maximum of the sum
                'analysis_variable': 'T', # Analyze Temperature context at that moment
                'unit': 'W/m²'
            },
            "maximum cooling load": {
                'metric_name': 'net_cooling_demand',
                'columns': ['Q_sun', 'Q_cov', 'Q_trans'], # Same as heat load
                'operation': 'sum',
                'find': 'idxmax', # Also the maximum
                'analysis_variable': 'uQc', # Analyze the COOLING ACTION context
                'unit': 'W/m²'
            },
            "coldest moment": {
                'metric_name': 'temperature',
                'columns': ['T'], # Just the Temperature node
                'operation': 'identity', # No operation, just use the column directly
                'find': 'idxmin', # Find the minimum
                'analysis_variable': 'uQh', # Analyze the HEATING ACTION context
                'unit': '°C'
            },
            "peak humidity risk": {
                'metric_name': 'humidity',
                'columns': ['H'], # Just the Humidity node
                'operation': 'identity',
                'find': 'idxmax', # Find the maximum
                'analysis_variable': 'uV', # Analyze the VENTILATION context
                'unit': '%'
            },
            "peak photosynthesis": {
                'metric_name': 'photosynthesis_rate',
                'columns': ['C_phot'], # The CO2 consumption flux
                'operation': 'identity',
                'find': 'idxmin', # Min because it's a negative flux (consumption)
                'analysis_variable': 'C', # Analyze the CO2 context
                'unit': 'mg/m²/s'
            },
            "sharp drops in solar radiation": {
                'metric_name': 'solar_radiation',
                'columns': ['Qrad'], # Solar radiation (Iout)
                'operation': 'identity',
                'find': 'idxmin', # Find the minimum (deepest drop)
                'analysis_variables': ['uC', 'uQc'], # Analyze both CO2 injection and cooling systems
                'unit': 'W/m²'
            }
        }
        # In your __init__ method:
        self.stress_signatures = {
            'Heat Stress': {
                'user_keywords': ['heat', 'hot', 'temperature', 'warm'],
                'state_node': 'T',
                'lagrangian_node': 'T_ieq',
                'limit_type': 'UPPER',  # A positive Lagrangian indicates hitting the upper limit
                'time_window': ('11:00', '18:00'),
                'primary_mitigation_system': 'cooling system'
            },
            'Cold Stress': {
                'user_keywords': ['cold', 'cool', 'frost', 'low temperature'],
                'state_node': 'T',
                'lagrangian_node': 'T_ieq',
                'limit_type': 'LOWER', # A negative Lagrangian indicates hitting the lower limit
                'time_window': ('22:00', '06:00'),
                'primary_mitigation_system': 'heating system'
            },
            'Humidity Stress (High)': {
                'user_keywords': ['humidity', 'humid', 'damp', 'wet', 'disease'],
                'state_node': 'H',
                'lagrangian_node': 'H_ieq',
                'limit_type': 'UPPER',
                'time_window': ('00:00', '23:59'), # Can happen anytime
                'primary_mitigation_system': 'ventilation or cooling system'
            }
            # You can easily add more stress types here in the future, e.g., 'CO2 Deficiency'
        }
        self.constraint_limits = self._initialize_constraint_limits() # Assumes this method exists
        # NEW, CORRECTED LINE
        self.constraint_tolerance = {
            'T': 0.1,    # e.g., a 0.1 degree change is not significant
            'H': 0.5,    # e.g., a 0.5% humidity change is not significant
            'C': 5.0,    # e.g., a 5 ppm CO2 change is not significant
            'default': 0.01 # A fallback for any other variable
        } # A default tolerance, can be made more specific
        self._build_kg_matcher()
        # --- 5. Pre-compute Causal Model (PCMCI) ---
        self.pcmci_results = None # Initialize attribute
        self.pcmci_var_to_idx = {} # Initialize attribute
        
        try:
            DATA_SOURCE_FILE = "filtered_dates.csv"
            self.precompute_pcmci(data_file_path=DATA_SOURCE_FILE)
        except Exception as e:
            print(f"Warning: PCMCI precomputation failed: {e}. Continuing without causal analysis.")
            self.pcmci_results = None
        
        print("--- Initialization Complete ---")

        self.control_thresholds = {
            'uV': 0.1, 'uC': 0.05, 'uQh': 0.1, 'uQc': 0.1
        }

        self.prediction_horizon_steps = 288 # 24 hours * 12 steps/hour = 288 steps
        self.lookback_history_steps = 24
        try:
            self.nlp = spacy.load('en_core_web_sm')
        except OSError:
            print("Downloading spacy model en_core_web_sm...")
            spacy.cli.download("en_core_web_sm")
            self.nlp = spacy.load('en_core_web_sm')

        # Initialize validation components
        self.validator = PhysicalPlausibilityValidator(self)
        self.validation_logger = ValidationLogger()

    def get_control_action_base_code(self, control_action_node: str) -> str:
        """
        Helper to convert a control action node name (e.g., 'uV_ref')
        to its base code (e.g., 'uV'). If already a base code, returns itself.
        """
        return self.ref_node_to_base_code.get(control_action_node, control_action_node)

    def _get_active_hca_components(self) -> Dict[str, bool]:
        """
        Maps the current ablation mode into boolean switches for the
        three HCA layers: optimizer/KKT, physics (KG/flux), and causal (PCMCI/correlations).
        """
        mode = getattr(self, 'ablation_mode', AblationMode.FULL_HCA)
        component_map = {
            AblationMode.FULL_HCA:      {'kkt': True,  'physics': True,  'causal': True},
            AblationMode.KKT_PHYSICS:   {'kkt': True,  'physics': True,  'causal': False},
            AblationMode.KKT_PCMCI:     {'kkt': True,  'physics': False, 'causal': True},
            AblationMode.PHYSICS_PCMCI: {'kkt': False, 'physics': True,  'causal': True},
            AblationMode.KKT_ONLY:      {'kkt': True,  'physics': False, 'causal': False},
            AblationMode.PHYSICS_ONLY:  {'kkt': False, 'physics': True,  'causal': False},
            AblationMode.CAUSAL_ONLY:   {'kkt': False, 'physics': False, 'causal': True},
            AblationMode.TEMPLATE_ONLY: {'kkt': False, 'physics': False, 'causal': False},
        }
        return component_map.get(mode, {'kkt': True, 'physics': True, 'causal': True})

    def _filter_context_by_ablation(self, context: Dict[str, Any]) -> Dict[str, Any]:
        """
        Removes physics/KKT/causal sections from a context dictionary
        when the current ablation configuration disables them.
        """
        if not isinstance(context, dict):
            return context

        components = self._get_active_hca_components()
        filtered_context = copy.deepcopy(context)

        if not components['physics']:
            filtered_context.pop('flux_values', None)
            kg_rels = filtered_context.get('kg_relationships')
            if isinstance(kg_rels, dict):
                kg_rels['incoming_edges'] = []
                kg_rels['outgoing_edges'] = []

        if not components['kkt']:
            filtered_context.pop('optimizer_interpretation', None)
            filtered_context.pop('constraints', None)

        if not components['causal']:
            filtered_context.pop('causal_effects', None)
            filtered_context.pop('correlations', None)

        return filtered_context
 
    def _initialize_constraint_limits(self) -> Dict:
        """
        (Helper) Initializes constraint limits from the MPC formulation.
        
        IMPORTANT: Lagrangian multipliers correspond ONLY to HARD constraints!
        - Hard bounds (min/max): Actual box constraints in MPC → Have Lagrangian multipliers
        - Soft bounds (soft_min/soft_max): Penalty in cost function → NO Lagrangian multipliers
        
        Bounds from plantRef_llm.py:
        - state_hlb/state_hub: HARD constraint bounds (what Lagrangians monitor)
        - state_lb/state_ub: SOFT constraint bounds (penalty in cost, no Lagrangians)
        """
        print("Initializing constraint limits...")
        return {
            # State variables - Temperature (°C)
            'T': {
                'min': 14.0,              # HARD lower bound (state_hlb) - Lagrangian monitors this
                'max': 30.0,              # HARD upper bound (state_hub) - Lagrangian monitors this
                'soft_min': 18.0,         # Soft lower bound (state_lb) - penalty in cost only
                'soft_max': 26.0,         # Soft upper bound (state_ub) - penalty in cost only
                'optimal_range': [18.0, 26.0], 
                'unit': '°C'
            },
            # Humidity (%) - Note: stored as RH%, not absolute humidity
            'H': {
                'min': 10.0,              # HARD lower bound (state_hlb) - Lagrangian monitors this
                'max': 100.0,             # HARD upper bound (state_hub) - Lagrangian monitors this
                'soft_min': 60.0,         # Soft lower bound (state_lb) - penalty in cost only
                'soft_max': 90.0,         # Soft upper bound (state_ub) - penalty in cost only
                'optimal_range': [60.0, 90.0], 
                'unit': '%'
            },
            # CO2 - Note: stored as g/m³ in data (converted from ppm using temperature-dependent formula)
            # Conversion: ppm2gpmc(M_CO2=44.01, ppm, T) as per plantODE_cpl.py
            'C': {
                'min': 560.338,           # HARD lower bound: 300 ppm at 14°C → g/m³
                'max': 1769.214,          # HARD upper bound: 1000 ppm at 30°C → g/m³
                'soft_min': 921.067,      # Soft lower bound: 500 ppm at 18°C → g/m³
                'soft_max': 1613.584,     # Soft upper bound: 900 ppm at 26°C → g/m³
                'optimal_range': [921.067, 1613.584],  # Same as soft bounds
                'unit': 'g/m³',           # Native unit in CSV data
                'ppm_reference': {        # For reference/documentation only
                    'soft_min_ppm': 500,
                    'soft_max_ppm': 900,
                    'hard_min_ppm': 300,
                    'hard_max_ppm': 1000,
                    'note': 'Conversion depends on temperature: higher T → higher g/m³ for same ppm'
                }
            },
            # Biomass (kg)
            'B': {
                'min': 0.0,               # HARD lower bound
                'max': 1000.0,            # HARD upper bound
                'soft_min': 0.0,          # Soft lower bound
                'soft_max': 1000.0,       # Soft upper bound
                'unit': 'kg'
            },
            # Control variables (all normalized 0-1 in MPC, but may be 0-100% in data)
            'uV': {'min': 0.0, 'max': 1.0, 'unit': 'normalized'},   # Ventilation control
            'uC': {'min': 0.0, 'max': 1.0, 'unit': 'normalized'},   # CO2 injection control
            'uQh': {'min': 0.0, 'max': 1.0, 'unit': 'normalized'},  # Heating control
            'uQc': {'min': 0.0, 'max': 1.0, 'unit': 'normalized'}   # Cooling control
        }
    
    def interpret_hard_constraint_lagrangian(self, state_node: str, state_value: float, 
                                             lagrangian_value: float) -> Dict[str, Any]:
        """
        Interpret the Lagrangian multiplier for HARD constraints.
        
        CRITICAL: Lagrangian multipliers correspond ONLY to HARD box constraints, 
        NOT to soft constraint penalties in the cost function.
        
        From KKT conditions:
        - λ ≈ 0:  State is NOT near hard bounds → no constraint pressure
        - λ > 0:  State is near/at UPPER hard bound → constraint is binding
        - λ < 0:  State is near/at LOWER hard bound → constraint is binding
        
        Args:
            state_node: State variable node (e.g., 'T', 'H', 'C')
            state_value: Current value of the state variable  
            lagrangian_value: Actual Lagrangian value from data (e.g., Tieq_lg, Hieq_lg)
            
        Returns:
            Dictionary with interpretation results
        """
        # Get HARD constraint bounds (what Lagrangians actually monitor)
        limits = self.constraint_limits.get(state_node, {})
        hard_lb = limits.get('min', 0.0)
        hard_ub = limits.get('max', 100.0)
        
        # Get threshold for this Lagrangian
        lg_node = self.lagrangian_nodes.get(state_node, {}).get('ieq')
        threshold = self.config['lagrangian_active_threshold'].get(lg_node, 1e-7)
        
        # Determine if Lagrangian is active (significantly non-zero)
        is_active = abs(lagrangian_value) > threshold
        
        # Calculate distance from hard bounds
        distance_to_lb = state_value - hard_lb
        distance_to_ub = hard_ub - state_value
        bound_range = hard_ub - hard_lb
        
        # CRITICAL FIX: Add physical distance validation
        # If state is far from bounds, Lagrangian shouldn't be considered "active"
        # even if it's technically above the threshold (might be numerical noise)
        PHYSICAL_PROXIMITY_THRESHOLD = 0.10  # Must be within 10% of range from bound
        
        physically_near_upper = (distance_to_ub / bound_range) < PHYSICAL_PROXIMITY_THRESHOLD
        physically_near_lower = (distance_to_lb / bound_range) < PHYSICAL_PROXIMITY_THRESHOLD
        
        # Determine which bound (if any) is being approached
        if not is_active:
            status = 'inactive'
            constraint_type = 'none'
            interpretation = f'{state_node} is NOT near hard bounds [{hard_lb}, {hard_ub}]'
            confidence = 'high' if abs(lagrangian_value) < threshold/10 else 'medium'
        elif lagrangian_value > threshold:
            # Positive Lagrangian: pushing against upper bound
            # BUT: only accept if physically close to the bound
            if physically_near_upper:
                status = 'active_upper'
                constraint_type = 'upper'
                interpretation = f'{state_node} is near/at upper hard bound ({hard_ub})'
                # Confidence based on how close to the bound
                if distance_to_ub < 0.05 * bound_range:  # Within 5% of range
                    confidence = 'very_high'
                elif distance_to_ub < 0.1 * bound_range:  # Within 10%
                    confidence = 'high'
                else:
                    confidence = 'medium'
            else:
                # Lagrangian is non-zero but state is far from bound - likely noise
                status = 'inactive'
                constraint_type = 'none'
                pct_from_bound = (distance_to_ub / bound_range) * 100
                interpretation = f'{state_node} is NOT near hard bounds [{hard_lb}, {hard_ub}] (currently {pct_from_bound:.1f}% away from upper bound - Lagrangian value likely numerical noise)'
                confidence = 'high'
                is_active = False  # Override - not physically close
        else:  # lagrangian_value < -threshold
            # Negative Lagrangian: pushing against lower bound
            # BUT: only accept if physically close to the bound
            if physically_near_lower:
                status = 'active_lower'
                constraint_type = 'lower'
                interpretation = f'{state_node} is near/at lower hard bound ({hard_lb})'
                # Confidence based on how close to the bound
                if distance_to_lb < 0.05 * bound_range:
                    confidence = 'very_high'
                elif distance_to_lb < 0.1 * bound_range:
                    confidence = 'high'
                else:
                    confidence = 'medium'
            else:
                # Lagrangian is non-zero but state is far from bound - likely noise
                status = 'inactive'
                constraint_type = 'none'
                pct_from_bound = (distance_to_lb / bound_range) * 100
                interpretation = f'{state_node} is NOT near hard bounds [{hard_lb}, {hard_ub}] (currently {pct_from_bound:.1f}% away from lower bound - Lagrangian value likely numerical noise)'
                confidence = 'high'
                is_active = False  # Override - not physically close
        
        return {
            'state_node': state_node,
            'state_value': state_value,
            'hard_bounds': {'min': hard_lb, 'max': hard_ub},
            'lagrangian_value': lagrangian_value,
            'threshold': threshold,
            'is_active': is_active,
            'status': status,
            'constraint_type': constraint_type,
            'interpretation': interpretation,
            'confidence': confidence,
            'distance_to_lower_bound': distance_to_lb,
            'distance_to_upper_bound': distance_to_ub,
            'shadow_price': abs(lagrangian_value) if is_active else 0.0
        }
    
    def calculate_soft_constraint_penalty(self, state_node: str, state_value: float) -> Dict[str, Any]:
        """
        Calculate the EXACT soft constraint penalty using the MPC penalty function.
        
        From plantODE_cpl.py:
        def penalty(self, x, xMin, xMax, c):
            beta = 10**-4
            P = c / 2 * (np.sqrt((xMin - x)**2 + beta) + np.sqrt((xMax - x)**2 + beta) - (xMax - xMin))
            return P
        
        Where:
        - x: current state value
        - xMin, xMax: soft constraint bounds
        - c: penalty weight (penT, penH, penC)
        - beta: small constant for smoothness
        
        If P ≈ 0: state is within soft bounds
        If P ≈ c: state is near or outside soft bounds
        
        Args:
            state_node: State variable node (e.g., 'T', 'H', 'C')
            state_value: Current value of the state variable
            
        Returns:
            Dictionary with penalty calculation and interpretation
        """
        # Get soft constraint bounds
        limits = self.constraint_limits.get(state_node, {})
        soft_lb = limits.get('soft_min', limits.get('min', 0.0))
        soft_ub = limits.get('soft_max', limits.get('max', 100.0))
        
        # Penalty weights from plantODE_cpl.py
        penalty_weights = {
            'T': 0.05,   # penT
            'H': 1.17,   # penH  
            'C': 0.01    # penC
        }
        c = penalty_weights.get(state_node, 0.01)
        
        # Calculate penalty using the EXACT MPC formula
        beta = 1e-4
        x = state_value
        xMin = soft_lb
        xMax = soft_ub
        
        P = (c / 2) * (
            np.sqrt((xMin - x)**2 + beta) + 
            np.sqrt((xMax - x)**2 + beta) - 
            (xMax - xMin)
        )
        
        # Interpret the penalty
        # P ≈ 0 means within bounds
        # P ≈ c means at/beyond bounds
        penalty_ratio = P / c if c > 0 else 0.0
        
        # Determine which bound is being violated (if any)
        distance_to_lower = abs(x - xMin)
        distance_to_upper = abs(x - xMax)
        
        # Check if penalty is significant (> 1% of max penalty)
        penalty_threshold = 0.01 * c
        is_penalty_active = P > penalty_threshold
        
        if not is_penalty_active:
            status = 'satisfied'
            violation_type = None
            interpretation = f'{state_node} = {x:.2f} within soft bounds [{xMin}, {xMax}]. Penalty P = {P:.6f} ≈ 0 (no cost)'
        else:
            # Determine if near lower or upper bound
            if distance_to_lower < distance_to_upper:
                violation_type = 'lower'
                bound_value = xMin
                distance = xMin - x  # Positive if below bound
            else:
                violation_type = 'upper'
                bound_value = xMax
                distance = x - xMax  # Positive if above bound
            
            status = 'violated'
            
            # Classify severity based on penalty ratio
            if penalty_ratio > 0.9:  # P > 90% of max
                severity = 'severe'
                interpretation = f'{state_node} = {x:.2f} SEVERELY violates soft {violation_type} bound ({bound_value}). Penalty P = {P:.6f} ≈ {penalty_ratio*100:.0f}% of max ({c:.2f})'
            elif penalty_ratio > 0.5:  # P > 50% of max
                severity = 'moderate'
                interpretation = f'{state_node} = {x:.2f} MODERATELY violates soft {violation_type} bound ({bound_value}). Penalty P = {P:.6f} ≈ {penalty_ratio*100:.0f}% of max ({c:.2f})'
            else:  # P > threshold but < 50% of max
                severity = 'mild'
                interpretation = f'{state_node} = {x:.2f} MILDLY violates soft {violation_type} bound ({bound_value}). Penalty P = {P:.6f} ≈ {penalty_ratio*100:.0f}% of max ({c:.2f})'
        
        return {
            'state_node': state_node,
            'state_value': x,
            'soft_bounds': {'min': xMin, 'max': xMax},
            'penalty_weight': c,
            'penalty_value': P,
            'penalty_ratio': penalty_ratio,
            'penalty_threshold': penalty_threshold,
            'is_penalty_active': is_penalty_active,
            'status': status,
            'violation_type': violation_type,
            'severity': severity if is_penalty_active else None,
            'interpretation': interpretation,
            'distance_to_lower': distance_to_lower,
            'distance_to_upper': distance_to_upper
        }
    
    def check_soft_constraint_violation(self, state_node: str, state_value: float) -> Dict[str, Any]:
        """
        Check if a SOFT constraint is violated.
        
        IMPORTANT: Soft constraints are implemented as penalties in the cost function.
        They do NOT have Lagrangian multipliers - just check the state value directly.
        
        This is a simplified version - use calculate_soft_constraint_penalty() for exact penalty values.
        
        Args:
            state_node: State variable node (e.g., 'T', 'H', 'C')
            state_value: Current value of the state variable
            
        Returns:
            Dictionary with soft constraint status
        """
        # Use the penalty calculation for accurate assessment
        return self.calculate_soft_constraint_penalty(state_node, state_value)
    
    def _build_kg_matcher(self):
        """Builds a spaCy PhraseMatcher based on KG node names and aliases."""
        if not hasattr(self, 'nlp') or not self.nlp:
            raise ValueError("spaCy NLP model not loaded. Load it in __init__.")
        if not hasattr(self, 'kg') or not self.kg.G:
            raise ValueError("Knowledge Graph not available.")

        matcher = PhraseMatcher(self.nlp.vocab, attr="LOWER")
        self.node_id_map = {} # Map pattern hash/text back to KG node ID

        # Add names from KG nodes
        for node_id, data in self.kg.G.nodes(data=True):
            name = data.get('name', node_id)
            if name:
                pattern_doc = self.nlp.make_doc(str(name)) # Ensure name is string
                matcher.add("KG_NODES", [pattern_doc], on_match=None)
                self.node_id_map[pattern_doc.text.lower()] = node_id

                # Add node_id itself if different and simple (e.g., 'T')
                if node_id != name and isinstance(node_id, str) and len(node_id) < 5:
                    pattern_doc_id = self.nlp.make_doc(node_id)
                    if pattern_doc_id.text.lower() not in self.node_id_map: # Avoid overwriting if ID is same as a name
                        matcher.add("KG_NODES", [pattern_doc_id], on_match=None)
                        self.node_id_map[pattern_doc_id.text.lower()] = node_id

        # Add names from name_to_code mapping (ensure they map back to a KG node)
        for name, code in self.name_to_code.items():
            node_id = None
            # Find the KG node corresponding to this code more robustly
            if code in self.kg.G: # Is the code directly a node? (e.g., 'T', 'Qrad')
                node_id = code
            else: # Check if code maps via column/node mappings
                column = self.code_to_column.get(code)
                if column:
                    node_id = self.column_to_node.get(column)

            # Add if we found a node and haven't added this name yet
            if node_id and name.lower() not in self.node_id_map:
                pattern_doc = self.nlp.make_doc(str(name)) # Ensure name is string
                matcher.add("KG_NODES", [pattern_doc], on_match=None)
                self.node_id_map[pattern_doc.text.lower()] = node_id

        print(f"Built KG Matcher with {len(self.node_id_map)} terms.")
        return matcher

    def find_nearest_timestamp(self, timestamp: datetime) -> datetime:
        """
        Safely find the nearest timestamp in the data index to the given timestamp.
        Returns the nearest valid timestamp, or None if data is empty.
        """
        if len(self.data.index) == 0:
            return None
        
        if timestamp in self.data.index:
            return timestamp
        
        # Use searchsorted to find insertion point, then check neighbors
        idx = self.data.index.searchsorted(timestamp)
        if idx == 0:
            # timestamp is before the first element
            return self.data.index[0]
        elif idx >= len(self.data.index):
            # timestamp is after the last element
            return self.data.index[-1]
        else:
            # Check which neighbor is closer
            before = self.data.index[idx - 1]
            after = self.data.index[idx]
            if abs((timestamp - before).total_seconds()) <= abs((timestamp - after).total_seconds()):
                return before
            else:
                return after

    # Helper to update dialogue state
    def update_dialogue_state(self, original_query: str,
                              resolved_intent: str, # Keep resolved_intent as fallback
                              resolved_primary_node: str | None,
                              resolved_variable_code: str | None,
                              resolved_query_term: str | None,
                              resolved_timestamp: datetime | None,
                              executed_intent: Literal['time_range','timestamp', 'pattern', 'correlation', 'explain_specific_event', 'evaluate_control_strategy', 'explain_net_effect', 'analyze_volatility','explain_causal_anomaly',
                                                       'explain_sudden_event','analyze_profitability_strategy','explain_causal_trend','evaluate_day_strategy','analyze_model_discrepancy','explain_control_action','visualize','explain_system_event', 'find_and_analyze_event', 'explain_obstacle', 'explain_strategy',
                                                      'explain_gradual_trend','evaluate_setpoint_strategy','find_and_explain_reaction','analyze_daily_total','explain_trade_off','explain_anomaly','explain_disturbance_significance','analyze_control_saturation'] | str | None = None,
                              pattern_type: str | None = None, # Add executed_intent as an optional parameter
                              plot_generated: bool = False,
                              resolved_date: date | None = None,
                              resolved_time: Optional[datetime.time] = None
                              ):
        """
        Updates the dialogue state with the final, resolved information for the turn.
        Prioritizes storing the executed intent if available, otherwise stores the resolved intent.
        """
        if not hasattr(self, 'dialogue_state'):
            self.dialogue_state = {} # Initialize if missing
            print("Initialized dialogue_state inside update function.") # Debug print

        self.dialogue_state['last_query'] = original_query

        # Store the executed intent if it was determined (i.e., an analysis path was taken)
        # Otherwise, fall back to storing the resolved intent from parsing
        self.dialogue_state['last_intent'] = executed_intent if executed_intent is not None else resolved_intent


        # Store the final resolved node/variable/term information
        self.dialogue_state['last_primary_node'] = resolved_primary_node
        self.dialogue_state['last_variable_code'] = resolved_variable_code
        self.dialogue_state['last_query_term'] = resolved_query_term

        # Store the final resolved timestamp and its components
        self.dialogue_state['last_timestamp'] = resolved_timestamp
        if resolved_timestamp:
            self.dialogue_state['last_date'] = resolved_timestamp.date()
            self.dialogue_state['last_time'] = resolved_timestamp.time()
        else:
            # Update with provided date/time if timestamp not available
            if resolved_date:
                self.dialogue_state['last_date'] = resolved_date
            if resolved_time:
                self.dialogue_state['last_time'] = resolved_time

        # Add logic to store pattern_type if provided
        if pattern_type is not None: # Only store if a pattern_type was passed
            self.dialogue_state['last_pattern_type'] = pattern_type
        elif 'last_pattern_type' in self.dialogue_state: # Clear if not provided in this call
            del self.dialogue_state['last_pattern_type'] # Or set to None: self.dialogue_state['last_pattern_type'] = None

        self.dialogue_state['plot_generated'] = plot_generated
        if plot_generated:
            self.dialogue_state['last_plot_node'] = resolved_primary_node
            self.dialogue_state['last_plot_date'] = resolved_timestamp.date() if resolved_timestamp else None
        # 'last_detected_events' is updated within explain_pattern when it runs,
        # so no need to manage it here unless you want to clear it on certain conditions.

        print(f"Dialogue state updated. Last node: {self.dialogue_state.get('last_primary_node')}, Last time: {self.dialogue_state.get('last_timestamp')}, Last intent: {self.dialogue_state.get('last_intent')}") # Print the stored intent


    def clear_dialogue_state(self):
        """Resets the dialogue state."""
        self.dialogue_state = {k: None for k in self.dialogue_state}
        print("Dialogue state cleared.")

    def _build_kg_matcher(self):
        """
        (NEW HELPER)
        Builds the spaCy PhraseMatcher for efficient KG entity extraction.
        This should be called once during initialization.
        """
        print("Building Knowledge Graph Phrase Matcher...")
        
        # Initialize the matcher with the shared NLP vocabulary
        self.kg_matcher = PhraseMatcher(self.nlp.vocab, attr='LOWER')
        self.node_id_map = {} # This will map the matched phrase text to the KG node ID

        # Iterate through your primary name-to-code mapping
        for user_friendly_name, node_id in self.name_to_code.items():
            # Create a spaCy Doc object for the pattern
            pattern = self.nlp.make_doc(user_friendly_name)
            
            # Add the pattern to the matcher. We use the node_id as the match_id.
            # This is a bit of a misuse of match_id, but it works for simple cases.
            # A more robust way is to use a separate map.
            self.kg_matcher.add(node_id, [pattern])
            
            # Store the mapping from the lower-case phrase to the node ID
            self.node_id_map[user_friendly_name.lower()] = node_id

        print("KG Phrase Matcher built successfully.")


    def extract_kg_entities(self, query: str) -> list[tuple[str, int]]:
        """
        (FINAL, MORE PRECISE VERSION)
        Prevents false positives by requiring word boundaries for short, ambiguous aliases.
        """
        print("\n--- Running Entity Extraction ---")
        
        normalized_query = query.lower().replace('₂', '2')
        print(f"DEBUG (Extractor): Normalized Query: '{normalized_query}'")
        
        # --- 1. Build Two Separate Search Dictionaries ---
        strict_map = {}   # For short, ambiguous terms that need word boundaries
        flexible_map = {} # For long, specific terms

        # Populate the maps from all known sources
        all_sources = list(self.name_to_code.items())
        for node, data in self.G.nodes(data=True):
            if 'name' in data:
                all_sources.append((data['name'], node))
        for node, col in self.node_to_column.items():
            all_sources.append((node, node))
            all_sources.append((col, node))

        for term, node_id in set(all_sources):
            term_lower = term.lower()
            
            # --- THE CRITICAL FIX ---
            # If a term is short AND purely alphabetic, it must be a whole word.
            if len(term_lower) <= 3 and term_lower.isalpha():
                strict_map[term_lower] = node_id
            else:
                flexible_map[term_lower] = node_id

        # --- 2. Find ALL Possible Matches using the right strategy ---
        all_matches = []

        # A) Find matches for strict terms (e.g., 't', 'c')
        for term, node_id in strict_map.items():
            # Use regex with word boundaries (\b)
            pattern = r'\b' + re.escape(term) + r'\b'
            for match in re.finditer(pattern, normalized_query):
                all_matches.append({"node": node_id, "start": match.start(), "end": match.end()})

        # B) Find matches for flexible terms (e.g., 'temperature', 'phi_qt_cool')
        for term, node_id in flexible_map.items():
            # Use simple substring search
            start_index = 0
            while start_index < len(normalized_query):
                pos = normalized_query.find(term, start_index)
                if pos == -1:
                    break
                all_matches.append({"node": node_id, "start": pos, "end": pos + len(term)})
                start_index = pos + 1
        
        if not all_matches:
            print("DEBUG (Extractor): No entities found.")
            return []
        
        # --- 2.5. Filter out likely unit matches ---
        filtered_matches = []
        for match in all_matches:
            term = normalized_query[match['start']:match['end']]
            # Skip temperature units (celsius, fahrenheit)
            if term.lower() in ['c', 'f'] and match['start'] > 0 and normalized_query[match['start']-1] == '°':
                continue
            # Skip other potential units if needed
            filtered_matches.append(match)
        
        # --- 3. De-conflict Overlaps (This part is already correct) ---
        sorted_matches = sorted(filtered_matches, key=lambda m: (m['start'], -(m['end'] - m['start'])))

        final_matches = []
        last_end_position = -1
        for match in sorted_matches:
            if match['start'] >= last_end_position:
                final_matches.append((match['node'], match['start']))
                last_end_position = match['end']
        
        print(f"DEBUG (Extractor): Final extracted entities: {final_matches}")
        return final_matches
    

    def parse_temporal_expressions(self, query: str) -> Dict[str, Any]:
        """
        (FINAL HYBRID PARSER v11.1 - Robust with search_dates Fallback)
        Returns a dictionary containing found datetimes and a separate key for the primary date.
        Uses regex patterns first, then falls back to dateparser.search_dates for robustness.
        Assumes year 2011 if not specified, as the data is from 2011.
        """
        from datetime import datetime
        print("DEBUG (Parser) - Using Robust Hybrid Parser v11.1.")
        mdy_pattern_with_year = r'\b((?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]*\s+\d{1,2}(?:st|nd|rd|th)?(?:,)?\s*\d{4})\b'
        dmy_pattern = r'\b(\d{1,2}\s+(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]*\s+\d{4})\b'
        mdy_pattern = r'\b((?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]*\s+\d{1,2}(?:,)?\s*\d{4})\b'
        ymd_pattern = r'\b(\d{4}[-/]\d{1,2}[-/]\d{1,2})\b'
        time_pattern = r'\b(\d{1,2}(?::\d{2})?\s*(?:AM|PM|am|pm)?)\b'
        day_month_pattern = r'\b((?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]*\s+\d{1,2})\b'
        day_month_pattern_no_year = r'\b((?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]*\s+\d{1,2}(?:st|nd|rd|th)?)\b'
        dmy_dates = re.findall(dmy_pattern, query, re.IGNORECASE)
        mdy_dates = re.findall(mdy_pattern, query, re.IGNORECASE)
        ymd_dates = re.findall(ymd_pattern, query, re.IGNORECASE)
        day_month_dates = re.findall(day_month_pattern, query, re.IGNORECASE)
        dates_with_year = re.findall(mdy_pattern_with_year, query, re.IGNORECASE) + re.findall(ymd_pattern, query, re.IGNORECASE)
        dates_without_year = re.findall(day_month_pattern_no_year, query, re.IGNORECASE)
        # CRITICAL FIX: Include BOTH dates_with_year AND dates_without_year to prevent dialogue state override
        found_dates_str = dmy_dates + mdy_dates + ymd_dates + day_month_dates + dates_with_year + dates_without_year
        found_times_str = re.findall(time_pattern, query, re.IGNORECASE)

        # Filter out numbers that are likely parts of dates (months 1-12, days 1-31)
        filtered_times = []
        for t_str in found_times_str:
            t_str_stripped = t_str.strip()
            if ':' in t_str_stripped:
                # Has colon, likely a time like 19:00
                filtered_times.append(t_str)
            elif t_str_stripped.isdigit():
                num = int(t_str_stripped)
                # If it's 13-23, likely an hour without colon (military time)
                if 13 <= num <= 23:
                    filtered_times.append(t_str)
                # Else, likely part of date, skip
            else:
                # Has AM/PM or other, keep
                filtered_times.append(t_str)
        found_times_str = filtered_times

        primary_date = None
        if dates_with_year:
            date_str_to_parse = dates_with_year[0]
            parsed_dt_obj = dateparser.parse(date_str_to_parse)
            if parsed_dt_obj:
                primary_date = parsed_dt_obj.date()
                
        # If no year was specified, parse the date and MANUALLY set the year to 2011
        elif dates_without_year:
            date_str_to_parse = dates_without_year[0]
            # Use dateparser, which is good at handling "August 24th"
            parsed_dt_obj = dateparser.parse(date_str_to_parse)
            if parsed_dt_obj:
                # --- THE SECOND FIX: Manually correct the year ---
                # dateparser will default to the current year, so we replace it with 2011.
                primary_date = parsed_dt_obj.date().replace(year=2011)
        else:
            # Fallback: Use dateparser.search_dates on the full query
            try:
                search_results = search_dates(query)
                if search_results:
                    # Take the first found date
                    parsed_dt_obj = search_results[0][1]
                    if parsed_dt_obj:
                        primary_date = parsed_dt_obj.date()
                        # If no year, assume 2011
                        if primary_date.year == datetime.now().year and primary_date.year != 2011:
                            primary_date = primary_date.replace(year=2011)
            except Exception as e:
                print(f"DEBUG (Fallback Parser): search_dates failed: {e}")
        
        # If no date was mentioned in the query, use the last date from dialogue state
        if not found_dates_str and hasattr(self, 'dialogue_state') and self.dialogue_state.get('last_date'):
            primary_date = self.dialogue_state['last_date']
        
        result = {'datetimes': [], 'primary_date': primary_date}

        if primary_date and found_times_str:
            for t_str in found_times_str:
                try:
                    # Try parsing as HH:MM format first
                    time_obj = datetime.strptime(t_str, '%H:%M').time()
                    full_dt = datetime.combine(primary_date, time_obj)
                    result['datetimes'].append({'text': t_str, 'datetime': full_dt})
                except ValueError:
                    # If that fails, try parsing with dateparser (handles AM/PM)
                    try:
                        parsed_time = dateparser.parse(t_str)
                        if parsed_time:
                            time_obj = parsed_time.time()
                            full_dt = datetime.combine(primary_date, time_obj)
                            result['datetimes'].append({'text': t_str, 'datetime': full_dt})
                    except Exception:
                        continue
        
        print(f"DEBUG (Robust Parser) - Final result: {result}")
        return result

    
    def visualize_data(self, primary_node: str, target_date: datetime.date, secondary_nodes: List[str] = None) -> str | None:
        """
        Generates an interactive Plotly graph for a given variable on a specific date.
        If secondary_nodes are provided, creates a subplot layout with primary node on left
        and secondary nodes in columns. Time axis is shared at the bottom.
        The graph will show data from 00:00 to 23:59 in 5-minute intervals,
        but the x-axis ticks will be structured for 3-hour intervals.
        Returns the HTML string of the plot or an error message.
        """
        from datetime import datetime, time
        if self.data is None or self.data.empty:
            return "Error: Data not loaded or is empty."

        # Prepare list of nodes to plot
        nodes_to_plot = [primary_node]
        if secondary_nodes:
            nodes_to_plot.extend(secondary_nodes)

        # Validate all nodes exist
        valid_nodes = []
        for node in nodes_to_plot:
            feature_key = self.node_to_column.get(node)
            if not feature_key or feature_key not in self.data.columns:
                print(f"Warning: Could not find data for '{self.node_to_name.get(node, node)}' (internal code: {node}). Skipping.")
                continue
            valid_nodes.append(node)

        if not valid_nodes:
            return f"Error: Could not find data for any of the requested variables."

        start_of_day = datetime.combine(target_date, time(0, 0, 0))
        end_of_day = datetime.combine(target_date, time(23, 59, 59))
        
        # Ensure datetime objects are timezone-naive to match the data index
        if hasattr(self.data.index, 'tz') and self.data.index.tz is not None:
            # If data index has timezone, make start/end times timezone-aware
            import pytz
            utc = pytz.UTC
            start_of_day = utc.localize(start_of_day)
            end_of_day = utc.localize(end_of_day)
        else:
            # Ensure both are naive datetime objects
            start_of_day = pd.to_datetime(start_of_day)
            end_of_day = pd.to_datetime(end_of_day)
            
        df_day = self.data.loc[start_of_day:end_of_day]

        if df_day.empty:
            return f"No data available for {target_date.strftime('%Y-%m-%d')}."

        # Create subplot layout
        num_plots = len(valid_nodes)
        if num_plots == 1:
            # Single plot
            fig = go.Figure()
            node = valid_nodes[0]
            feature_key = self.node_to_column[node]
            feature_metadata = self.feature_metadata.get(feature_key, {'name': feature_key, 'unit': ''})
            feature_name = feature_metadata['name']
            feature_unit = feature_metadata['unit']

            fig.add_trace(go.Scatter(
                x=df_day.index.tolist(),
                y=df_day[feature_key],
                mode='lines',
                name=feature_name,
                hovertemplate='Time: %{x|%H:%M}<br>' +
                              f'{feature_name}: ' +
                              '%{y:.2f} ' + feature_unit +
                              '<extra></extra>'
            ))

            fig.update_layout(
                title=f'{feature_name} on {target_date.strftime("%Y-%m-%d")}',
                xaxis_title='Time of Day',
                yaxis_title=f'{feature_name} ({feature_unit})',
                hovermode="x unified",
                xaxis=dict(
                    tickmode='array',
                    tickvals=[start_of_day + timedelta(hours=h) for h in range(0, 24, 3)],
                    tickformat='%H:%M',
                    range=[start_of_day, datetime.combine(target_date, time(23, 59, 0))]
                ),
            )
        else:
            # Multi-plot layout with shared x-axis
            fig = go.Figure()

            # Create subplots specification
            subplot_titles = []
            for node in valid_nodes:
                feature_metadata = self.feature_metadata.get(self.node_to_column[node], {'name': self.node_to_column[node], 'unit': ''})
                subplot_titles.append(f'{feature_metadata["name"]}')

            # Add traces for each node
            for i, node in enumerate(valid_nodes):
                feature_key = self.node_to_column[node]
                feature_metadata = self.feature_metadata.get(feature_key, {'name': feature_key, 'unit': ''})
                feature_name = feature_metadata['name']
                feature_unit = feature_metadata['unit']

                fig.add_trace(go.Scatter(
                    x=df_day.index.tolist(),
                    y=df_day[feature_key],
                    mode='lines',
                    name=feature_name,
                    xaxis=f'x{i+1}' if i > 0 else 'x',
                    yaxis=f'y{i+1}' if i > 0 else 'y',
                    hovertemplate='Time: %{x|%H:%M}<br>' +
                                  f'{feature_name}: ' +
                                  '%{y:.2f} ' + feature_unit +
                                  '<extra></extra>'
                ))

            # Update layout for subplots
            fig.update_layout(
                title=f'Variable Comparison on {target_date.strftime("%Y-%m-%d")}',
                hovermode="x unified",
                grid=dict(rows=1, columns=num_plots, pattern='independent'),
            )

            # Configure x-axes (time axis at bottom)
            for i in range(num_plots):
                xaxis_key = f'xaxis{i+1}' if i > 0 else 'xaxis'
                fig.update_layout({
                    xaxis_key: dict(
                        tickmode='array',
                        tickvals=[start_of_day + timedelta(hours=h) for h in range(0, 24, 3)],
                        tickformat='%H:%M',
                        range=[start_of_day, datetime.combine(target_date, time(23, 59, 0))],
                        showticklabels=True if i == num_plots - 1 else False,  # Only show ticks on bottom plot
                        title='Time of Day' if i == num_plots - 1 else ''
                    )
                })

            # Configure y-axes
            for i, node in enumerate(valid_nodes):
                feature_key = self.node_to_column[node]
                feature_metadata = self.feature_metadata.get(feature_key, {'name': feature_key, 'unit': ''})
                feature_name = feature_metadata['name']
                feature_unit = feature_metadata['unit']

                yaxis_key = f'yaxis{i+1}' if i > 0 else 'yaxis'
                fig.update_layout({
                    yaxis_key: dict(
                        title=f'{feature_name} ({feature_unit})',
                        showticklabels=True
                    )
                })

        return fig

    def analyze_relationship(self, var1_node: str, var2_node: str, date: datetime.date, time_obj: Optional[datetime.time] = None) -> Dict[str, Any]:
        """
        (UPGRADED - EXPERT CORRELATOR)
        Analyzes the statistical and physical relationship between two variables.
        """
        from datetime import datetime
        print(f"DEBUG - Expert Analysis of relationship between '{var1_node}' and '{var2_node}' for {date}.")
        
        var1_name = self.node_to_name.get(var1_node, var1_node)
        var2_name = self.node_to_name.get(var2_node, var2_node)
        
        analysis_result = {
            'var1_name': var1_name,
            'var2_name': var2_name,
            'conclusion': f"Analysis of the relationship between {var1_name} and {var2_name} could not be completed.",
            'evidence': {}
        }

        try:
            # --- NEW TIME-AWARE LOGIC ---
            if time_obj:
                # A specific time was mentioned, so we analyze a local window.
                print(f"DEBUG - Time provided ({time_obj}). Performing LOCAL correlation analysis.")
                center_timestamp = datetime.combine(date, time_obj)
                window_start = center_timestamp - timedelta(minutes=15)
                window_end = center_timestamp + timedelta(minutes=15)
                analysis_data = self.data.loc[window_start:window_end].copy()
                analysis_scope = f"in the 30-minute window around {time_obj.strftime('%H:%M')}"
            else:
                # No time mentioned, analyze the full day.
                print("DEBUG - No time provided. Performing FULL DAY correlation analysis.")
                day_start = datetime.combine(date, time.min)
                day_end = datetime.combine(date, time.max)
                analysis_data = self.data.loc[day_start:day_end].copy()
                analysis_scope = "over the course of the day"
            
            if analysis_data.empty: return {'error': f"No data available for the specified period."}
        
            
        except Exception as e: return {'error': f"Error slicing data: {e}"}

        var1_col = self.node_to_column.get(var1_node)
        var2_col = self.node_to_column.get(var2_node)
        if not all(col and col in analysis_data.columns for col in [var1_col, var2_col]):
            return {'error': "Missing data columns for one or both variables."}

        # 1. Calculate correlation on the appropriate data slice (either window or full day)
        clean_data = analysis_data[[var1_col, var2_col]].dropna()
        if len(clean_data) < 2:
            return {'error': "Not enough overlapping data to calculate a correlation."}
            
        correlation = clean_data[var1_col].corr(clean_data[var2_col])
        
        # 2. Interpret the correlation
        strength = "strong" if abs(correlation) > 0.7 else "moderate" if abs(correlation) > 0.4 else "weak"
        direction = "positive" if correlation > 0 else "negative"
        correlation_summary = f"**{analysis_scope}**, the data shows a **{strength} {direction} relationship**, with a correlation coefficient of **{correlation:.2f}**."
        relationship_info = self.kg.find_path(var1_node, var2_node) 
        
        kg_explanation = "No clear physical relationship was found in the Knowledge Graph." # Default message

        # Now, check the 'type' key from the returned dictionary
        if relationship_info['type'] == 'direct':
            path_str = " -> ".join(relationship_info['path'])
            kg_explanation = f"The knowledge graph shows a **direct causal path**: {path_str}. This indicates that '{var1_name}' directly influences '{var2_name}'."

        elif relationship_info['type'] == 'common_successor':
            common_node = relationship_info['common_node']
            common_node_name = self.node_to_name.get(common_node, common_node)

            # --- NEW CONTEXT-AWARE EXPLANATION LOGIC ---
            node1_type = self.kg.G.nodes[var1_node].get('type')
            node2_type = self.kg.G.nodes[var2_node].get('type')

            # Identify which is the control and which is the flux/disturbance
            control_name, force_name = (var1_name, var2_name) if node1_type in ['Control', 'Reference'] else (var2_name, var1_name)

            # Generate a highly specific explanation based on this context
            kg_explanation = f"The knowledge graph reveals a direct causal relationship. The **'{force_name}'** represents a physical force (like heat loss) that directly affects the **'{common_node_name}'**. The **'{control_name}'** is the specific control action the system uses to counteract this physical force and maintain stability. The strong negative correlation is expected, as the controller increases the action in direct opposition to the force."
            
        elif relationship_info['type'] == 'common_predecessor':
            common_node = relationship_info['common_node']
            common_node_name = self.node_to_name.get(common_node, common_node)
            kg_explanation = f"The knowledge graph explains this correlation. Both **'{var1_name}'** and **'{var2_name}'** are not directly causing each other; instead, they are both caused by a common external driver: **'{common_node_name}'**."

        analysis_result['conclusion'] = correlation_summary
        analysis_result['evidence']['Statistical Finding'] = correlation_summary
        analysis_result['evidence']['Physical Link'] = kg_explanation

        # --- 4. (NEW) Search for Overriding Factors if the relationship is unexpectedly weak ---
        is_control_disturbance_pair = (self.kg.G.nodes[var1_node].get('type') == 'Disturbance' and self.kg.G.nodes[var2_node].get('type') == 'Control') or \
                                    (self.kg.G.nodes[var2_node].get('type') == 'Disturbance' and self.kg.G.nodes[var1_node].get('type') == 'Control')

        # This logic is specific to the Ventilation/Humidity/Temperature trade-off but can be generalized
        if is_control_disturbance_pair and strength == 'weak':
            control_node = var2_node if self.kg.G.nodes[var2_node].get('type') == 'Control' else var1_node
            disturbance_node = var1_node if control_node == var2_node else var2_node

            if control_node == 'uV' and disturbance_node == 'Hout':
                tout_col = self.node_to_column.get('Tout')
                # Find the period when the disturbance was most favorable for action
                favorable_period = day_data[day_data[disturbance_col] < day_data[disturbance_col].quantile(0.25)] # Bottom 25% humidity
                if not favorable_period.empty and tout_col in favorable_period.columns:
                    avg_tout_during_favorable_period = favorable_period[tout_col].mean()
                    
                    if avg_tout_during_favorable_period > 25: # Threshold for "hot"
                        overriding_factor_summary = f"This weak correlation is not a sign of a poor strategy, but a **deliberate trade-off**. During the driest part of the day, the outside temperature was very high (averaging {avg_tout_during_favorable_period:.1f}°C). The controller correctly prioritized **temperature stability** over opportunistic dehumidification, avoiding the massive heat load that ventilation would have introduced."
                        analysis_result['conclusion'] = f"The controller was **not** using periods of dry outside air for dehumidification because it made a strategic trade-off to prioritize temperature control."
                        analysis_result['evidence']['Overriding Factor'] = overriding_factor_summary

        # Check for the specific "Ventilation vs. Outside Humidity" weak correlation scenario
        is_vent_hout_pair = ('uV' in [var1_node, var2_node] and 'Hout' in [var1_node, var2_node])
        
        if is_vent_hout_pair and strength == 'weak':
            print("DEBUG (Relationship Expert): Detected weak Vent/Hout correlation. Checking for temperature trade-off...")
            
            # We need the full day's data to find the driest period, even if it was a local analysis.
            day_start = datetime.combine(date, time.min)
            day_end = datetime.combine(date, time.max)
            day_data = self.data.loc[day_start:day_end]

            hout_col = self.node_to_column.get('Hout')
            tout_col = self.node_to_column.get('Tout')
            
            # This check is necessary because the columns could be named differently
            if not all(col in day_data.columns for col in [hout_col, tout_col]):
                return analysis_result # Return the original analysis if data is missing

            # Find the period of the day with the driest outside air (e.g., bottom 25th percentile)
            driest_period_data = day_data[day_data[hout_col] < day_data[hout_col].quantile(0.25)]
            
            if not driest_period_data.empty:
                # What was the average outside temperature during this driest period?
                avg_tout_during_driest_period = driest_period_data[tout_col].mean()
                
                # If it was very hot outside during the driest time...
                if avg_tout_during_driest_period > 28: # A high temperature threshold
                    # We have found the trade-off!
                    tradeoff_explanation = (
                        "The weak correlation is not a sign of a poor strategy, but a **deliberate and intelligent trade-off**. "
                        f"During the driest part of the day, the outside temperature was extremely high (averaging {avg_tout_during_driest_period:.1f}°C). "
                        "The controller correctly prioritized **temperature stability** over opportunistic dehumidification, avoiding the massive heat load that opening the vents would have introduced. "
                        "This proves the controller successfully balanced conflicting objectives."
                    )
                    
                    # Override the generic conclusion with this new, expert insight
                    analysis_result['conclusion'] = "The controller was **not** effectively using periods of dry outside air, but this was an intentional and intelligent trade-off to prioritize temperature control."
                    analysis_result['evidence']['Overriding Factor (The Trade-off)'] = tradeoff_explanation

        return analysis_result


    def analyze_oscillation(self, variable_node: str, start_dt: datetime, end_dt: datetime) -> Dict[str, Any]:
        """
        (REFINED V2.1)
        Analyzes the volatility of a variable and identifies the MOST volatile physical
        drivers from the Knowledge Graph to provide a more focused explanation.
        """
        print(f"DEBUG - Analyzing oscillation for '{variable_node}' from {start_dt} to {end_dt}.")
        
        try:
            window_data = self.data.loc[start_dt:end_dt]
            if window_data.empty:
                return {'error': "No data for the specified time range."}
        except Exception as e:
            return {'error': f"Error slicing data: {e}"}

        target_col = self.node_to_column.get(variable_node)
        if not target_col or target_col not in window_data.columns:
            return {'error': f"Data column for '{variable_node}' not found."}

        target_series = window_data[target_col].dropna()
        if len(target_series) < 2:
            return {'error': "Not enough data to analyze oscillation."}
        
        target_volatility = target_series.std()
        target_mean_abs = target_series.abs().mean()
        is_volatile = (target_mean_abs > 1e-9) and (target_volatility / target_mean_abs > 0.3)

        analysis_result = {
            'target_variable': {
                'name': self.node_to_name.get(variable_node, variable_node),
                'volatility_score': target_volatility,
                'is_volatile': is_volatile
            },
            'contributing_factors': []
        }

        if not is_volatile:
            analysis_result['conclusion'] = f"The variable '{analysis_result['target_variable']['name']}' was relatively stable during this period."
            return analysis_result

        base_state_node = variable_node.split('_')[0]
        
        physical_drivers = []
        if self.G.has_node(base_state_node):
            for predecessor in self.G.predecessors(base_state_node):
                if self.G.nodes[predecessor].get('type') in ['Flux', 'Process']: # Include Processes like Photosynthesis
                    physical_drivers.append(predecessor)

        driver_analysis_list = []
        for driver_node in physical_drivers:
            driver_col = self.node_to_column.get(driver_node)
            if not driver_col or driver_col not in window_data.columns: continue

            driver_series = window_data[driver_col].dropna()
            if len(driver_series) < 2: continue

            driver_volatility = driver_series.std()
            
            ultimate_cause_info = ""
            for pred_of_driver in self.G.predecessors(driver_node):
                if self.G.nodes[pred_of_driver].get('type') == 'Disturbance':
                    ultimate_cause_info = f"This is driven by the external disturbance '{self.node_to_name.get(pred_of_driver)}'."
                    break
            
            driver_analysis_list.append({
                'name': self.node_to_name.get(driver_node, driver_node),
                'volatility_score': driver_volatility,
                'explanation': ultimate_cause_info
            })

        # <<< --- THE KEY REFINEMENT IS HERE --- >>>
        # Sort the drivers by their volatility, from most to least volatile.
        if driver_analysis_list:
            sorted_drivers = sorted(driver_analysis_list, key=lambda x: x['volatility_score'], reverse=True)
            
            # Label the top 2 as the primary drivers of instability
            for i, driver in enumerate(sorted_drivers):
                if i < 2:
                    driver['role'] = "Primary Volatile Driver"
                else:
                    driver['role'] = "Contributing Factor"
            
            analysis_result['contributing_factors'] = sorted_drivers

        analysis_result['conclusion'] = "The high volatility in the controller's state is a direct reflection of the unstable physical environment it was managing."
        return analysis_result


    def evaluate_control_strategy(self, control_node: str, target_nodes: List[str], start_dt: datetime, end_dt: datetime) -> Dict[str, Any]:
        """
        (ROBUST) Evaluates the effectiveness and side-effects of a control strategy over a time range.
        """
        print(f"DEBUG - Evaluating strategy for '{control_node}' targeting {target_nodes}")
        
        analysis_result = {'conclusion': "Could not evaluate the control strategy.", 'evidence': {}}

        try:
            window_data = self.data.loc[start_dt:end_dt]
            if window_data.empty: return {'error': "No data in the specified time range."}
        except Exception as e: return {'error': f"Error slicing data: {e}"}

        control_col = self.node_to_column.get(control_node)
        if not control_col or control_col not in window_data.columns:
            return {'error': f"Control action data for '{control_node}' not found."}
        control_name = self.node_to_name.get(control_node, control_node)

        control_mean = window_data[control_col].mean()
        control_max = window_data[control_col].max()
        action_pattern = "intermittent bursts" if (control_max > 0.01 and control_mean < control_max * 0.25) else "a sustained action"

        # --- Primary Target Analysis ---
        if not target_nodes:
            return {'error': "No target state variables were provided for evaluation."}
        
        primary_target_node = target_nodes[0]
        primary_col = self.node_to_column.get(primary_target_node)
        if not primary_col or primary_col not in window_data.columns:
            return {'error': f"Data for primary target '{primary_target_node}' not found."}
            
        primary_name = self.node_to_name.get(primary_target_node, primary_target_node)
        primary_start = window_data[primary_col].iloc[0]
        primary_end = window_data[primary_col].iloc[-1]
        primary_change = primary_end - primary_start
        primary_unit = self.constraint_limits.get(primary_target_node, {}).get('unit', '')
        effectiveness_summary = f"The strategy was effective. The {action_pattern} of {control_name} successfully influenced the primary target, causing **{primary_name}** to change by **{primary_change:+.2f}{primary_unit}** (from {primary_start:.2f} to {primary_end:.2f})."

        # --- Side-Effect Analysis ---
        side_effects_summary = ""
        if len(target_nodes) > 1:
            secondary_target_node = target_nodes[1]
            secondary_col = self.node_to_column.get(secondary_target_node)
            secondary_lg_node = self.lagrangian_nodes.get(secondary_target_node, {}).get('ieq')
            secondary_lg_col = self.node_to_column.get(secondary_lg_node) if secondary_lg_node else None
            if secondary_col and secondary_lg_col and all(c in window_data.columns for c in [secondary_col, secondary_lg_col]):
                secondary_name = self.node_to_name.get(secondary_target_node, secondary_target_node)
                secondary_change = window_data[secondary_col].iloc[-1] - window_data[secondary_col].iloc[0]
                avg_secondary_lg = window_data[secondary_lg_col].mean()
                
                if abs(secondary_change) < self.constraint_tolerance.get(secondary_target_node, 0.5):
                    side_effects_summary = f"Crucially, this was achieved while carefully managing the impact on **{secondary_name}**, which remained stable. The active Lagrangian for this variable (avg: {avg_secondary_lg:.2E}) provides proof that the controller was successfully preventing negative side-effects."
                else:
                    side_effects_summary = f"This had a noticeable side-effect on **{secondary_name}**, causing it to change by {secondary_change:+.2f}."
            else:
                side_effects_summary = "Could not analyze side-effects due to missing data."

        analysis_result['conclusion'] = f"The strategy of using {action_pattern} of {control_name} was effective and demonstrated a sophisticated balance of objectives."
        analysis_result['evidence'] = {"Effectiveness on Primary Goal": effectiveness_summary}
        if side_effects_summary:
            analysis_result['evidence']["Side-Effect Management"] = side_effects_summary
        
        return analysis_result

    def analyze_trade_off(self, var_nodes: List[str], start_dt: datetime, end_dt: datetime) -> Dict[str, Any]:
        """
        (UPGRADED) Analyzes trade-offs. Handles two scenarios:
        1. Prioritization: Compares two state variables (e.g., T vs C).
        2. Evaluation: Compares a state variable against the main objective (Biomass).
        """
        print(f"DEBUG - Analyzing trade-off between {var_nodes} from {start_dt} to {end_dt}")
        
        # --- SCENARIO DETECTION ---
        is_evaluative_query = 'B' in var_nodes
        
        if is_evaluative_query:
            # Re-assign variables for clarity in the evaluative scenario
            objective_node = 'B'
            # Find the other state variable that is not 'B'
            state_node = next((node for node in var_nodes if node != 'B'), None)
            if not state_node:
                return {'error': "Could not identify the state variable for the trade-off evaluation."}
            
            print(f"DEBUG - Scenario: EVALUATION (State: {state_node}, Objective: {objective_node})")
            # Call a new helper for this specific logic path
            return self._evaluate_biomass_trade_off(state_node, objective_node, start_dt, end_dt)
        else:
            # Fallback to the original prioritization logic
            print(f"DEBUG - Scenario: PRIORITIZATION ({var_nodes[0]} vs {var_nodes[1]})")
            return self._compare_state_priorities(var_nodes[0], var_nodes[1], start_dt, end_dt)

    def _evaluate_biomass_trade_off(self, state_node: str, objective_node: str, start_dt: datetime, end_dt: datetime) -> Dict[str, Any]:
        """Evaluates if constraining a state was a good trade-off for biomass."""
        analysis_result = {'conclusion': "Could not determine if the trade-off was acceptable.", 'evidence': {}}
        try:
            window_data = self.data.loc[start_dt:end_dt]
            if window_data.empty:
                return {'error': "No data available for the specified period."}
        except Exception as e:
            return {'error': f"Error slicing data: {e}"}

        # --- 1. Check Goal: Is the optimizer maximizing biomass? ---
        obj_lg_node = self.lagrangian_nodes.get(objective_node, {}).get('ieq')
        obj_lg_col = self.node_to_column.get(obj_lg_node)
        
        if not obj_lg_col or obj_lg_col not in window_data.columns:
            return {'error': f"Could not find biomass objective data column ('{obj_lg_col}') for analysis."}
        
        avg_obj_lg = window_data[obj_lg_col].mean()
        is_maximizing_objective = avg_obj_lg < -1e-9

        # --- 2. Check "Cost": Was the state constraint active? ---
        state_lg_node = self.lagrangian_nodes.get(state_node, {}).get('ieq')
        state_lg_col = self.node_to_column.get(state_lg_node)

        if not state_lg_col or state_lg_col not in window_data.columns:
            return {'error': f"Could not find state constraint data column ('{state_lg_col}') for analysis."}
            
        max_state_lg = window_data[state_lg_col].max()
        is_constraint_active = max_state_lg > self.config['lagrangian_active_threshold'].get(state_lg_node, 1e-7)

        # --- 3. Check Opportunity: Were conditions favorable for growth? ---
        qrad_col = self.node_to_column.get('Qrad')
        if not qrad_col or qrad_col not in window_data.columns:
            return {'error': "Could not find solar radiation data column for analysis."}
        
        avg_qrad = window_data[qrad_col].mean()
        is_condition_favorable = avg_qrad > self.reasoning_thresholds['Qrad_photosynthesis']

        # --- 4. Synthesize the Conclusion ---
        if is_maximizing_objective and is_constraint_active and is_condition_favorable:
            analysis_result['conclusion'] = "Yes, this was an acceptable and deliberate trade-off, representing an aggressive profit-maximization strategy."
            analysis_result['evidence'] = {
                "Controller's Primary Goal": f"The Biomass Lagrangian was strongly negative (avg: {avg_obj_lg:.2E}), proving the main objective was to maximize crop growth.",
                "Favorable Conditions": f"This was viable due to peak solar radiation (avg: {avg_qrad:.1f} W/m²), creating a prime opportunity for photosynthesis.",
                "The Necessary 'Cost'": f"To fuel this growth, the controller pushed {self.node_to_name.get(state_node)} to its upper safety limit, causing its constraint to become active (max Lagrangian: {max_state_lg:.2E}). This shows the system was operating at its maximum productive capacity."
            }
        else:
            analysis_result['conclusion'] = "The evidence does not clearly support this being an acceptable trade-off."
            analysis_result['evidence']['Reason'] = "The controller was not aggressively pursuing biomass maximization during this period, or conditions were not favorable."

        return analysis_result
    
    def analyze_setpoint_anomaly_PATTERN(self, setpoint_node: str, date: date, start_time: time, end_time: time) -> Dict[str, Any]:
        """
        (NEW, RANGE-BASED EXPERT)
        Analyzes an unusual pattern in a setpoint over a TIME RANGE by correlating it
        with its primary disturbance and checking the optimizer's state.
        """
        from datetime import datetime
        print(f"DEBUG (Setpoint Anomaly Pattern Expert): Analyzing {setpoint_node} from {start_time} to {end_time}.")
        
        try:
            start_dt = datetime.combine(date, start_time)
            end_dt = datetime.combine(date, end_time)
            window_data = self.data.loc[start_dt:end_dt]
        except Exception as e:
            return {'error': f"Could not retrieve data for analysis: {e}"}

        base_state_node = setpoint_node.replace('_ref', '')
        
        # --- For a Temperature anomaly, the primary disturbance is Outside Temperature ---
        if base_state_node == 'T':
            setpoint_col = self.node_to_column.get(setpoint_node)
            disturbance_col = self.node_to_column.get('Tout')
            optim_state_col = self.node_to_column.get('T_ieq')

            if not all(col in window_data.columns for col in [setpoint_col, disturbance_col, optim_state_col]):
                return {'error': "Missing necessary data columns for this analysis."}

            # --- Perform the analysis from your expert answer ---
            correlation = window_data[setpoint_col].corr(window_data[disturbance_col])
            avg_optim_state = window_data[optim_state_col].mean()

            # Signature: Setpoint strongly follows outside temp, and optimizer is fighting the cold.
            if correlation is not None and correlation > 0.7 and avg_optim_state < -1e-10:
                return {
                    'strategy_name': "Dynamic Energy Conservation",
                    'conclusion': "The unusual double-dip in the temperature setpoint was a sophisticated energy-saving maneuver in response to an unexpected drop in the outside temperature.",
                    'evidence': {
                        'External Influence': f"The setpoint pattern closely mirrored the outside temperature during this period (correlation: {correlation:.2f}).",
                        "Optimizer's Goal": f"The Temperature Lagrangian was consistently negative (avg: {avg_optim_state:.2E}), proving the controller's primary goal was to prevent the temperature from dropping too low while minimizing heating costs.",
                        'Conclusion': "Instead of burning excess energy to maintain a fixed target against the cold snap, the controller dynamically lowered its own setpoint to a still-safe level. This demonstrates an advanced, cost-effective optimization strategy."
                    }
                }
                
        # --- FALLBACK ---
        return {'error': "Could not identify a clear, correlated cause for the anomalous pattern."}

    def analyze_setpoint_anomaly_strategy(self, setpoint_node: str, timestamp: datetime) -> Dict[str, Any]:
        """
        (NEW, STRATEGY-AWARE)
        Analyzes an unusual setpoint at a specific time by checking for known operational strategies
        like nighttime energy saving.
        """
        print(f"DEBUG (Setpoint Anomaly Expert): Evaluating {setpoint_node} at {timestamp}.")
        
        base_state_node = setpoint_node.replace('_ref', '')
        
        try:
            data_row = self.data.loc[self.find_nearest_timestamp(timestamp)]
        except Exception as e:
            return {'error': f"Could not retrieve data for analysis: {e}"}

        # Gather evidence
        setpoint_col = self.node_to_column.get(setpoint_node)
        tout_col = self.node_to_column.get('Tout')
        qrad_col = self.node_to_column.get('Qrad')

        setpoint_val = data_row.get(setpoint_col)
        tout_val = data_row.get(tout_col)
        qrad_val = data_row.get(qrad_col)

        # ==============================================================================
        # --- SCENARIO: Nighttime Energy Saving (Low Temperature Setpoint) ---
        # ==============================================================================
        if base_state_node == 'T' and (timestamp.hour < 7 or timestamp.hour > 20): # Nighttime/early morning
            
            is_setpoint_low = pd.notna(setpoint_val) and setpoint_val < 18
            is_cold_outside = pd.notna(tout_val) and tout_val < 18
            is_dark = pd.notna(qrad_val) and qrad_val < 10

            print(f"DEBUG (Anomaly - Energy Save Check): Setpoint Low? {is_setpoint_low} (val: {setpoint_val}), Cold Outside? {is_cold_outside} (val: {tout_val}), Dark? {is_dark} (val: {qrad_val})")

            if is_setpoint_low and is_cold_outside and is_dark:
                return {
                    'strategy_name': "Proactive Energy Conservation",
                    'conclusion': "This was a deliberate energy-saving strategy for the cold, dark, early morning hours.",
                    'evidence': {
                        "Economic Rationale": "The controller intentionally lowered the temperature setpoint to a safe minimum to avoid burning expensive energy for heating when there is no sunlight available for plant growth.",
                        "Environmental Context": f"This was a sensible strategy given the cold outside temperature of {tout_val:.1f}°C.",
                        "Conclusion": "This demonstrates an advanced, cost-effective optimization strategy that prioritizes energy conservation during non-productive hours."
                    }
                }

        # ... You can add other anomaly scenarios here in the future ...

        # --- FALLBACK ---
        return {
            'error': "Could not identify a specific strategic reason for the setpoint value at this time."
        }

    def _compare_state_priorities(self, var1_node: str, var2_node: str, start_dt: datetime, end_dt: datetime) -> Dict[str, Any]:
        """Analyzes prioritization between two state variables."""
        analysis_result = {
            'var1': {'name': self.node_to_name.get(var1_node, var1_node)}, 
            'var2': {'name': self.node_to_name.get(var2_node, var2_node)}
        }
        window_data = self.data.loc[start_dt:end_dt]

        # Use a loop for cleaner code
        for i, node in enumerate([var1_node, var2_node]):
            var_key = f'var{i+1}'
            # Check if this node has Lagrangian multipliers
            if node in self.lagrangian_nodes:
                lg_col = self.node_to_column.get(self.lagrangian_nodes[node]['ieq'])
                analysis_result[var_key]['avg_abs_lg'] = window_data[lg_col].abs().mean() if lg_col in window_data else 0
            else:
                # For nodes without Lagrangian multipliers (e.g., reference setpoints), set effort to 0
                analysis_result[var_key]['avg_abs_lg'] = 0

        var1_effort = analysis_result['var1']['avg_abs_lg']
        var2_effort = analysis_result['var2']['avg_abs_lg']

        # Assign names for clarity in the evidence strings
        name1 = analysis_result['var1']['name']
        name2 = analysis_result['var2']['name']

        # --- THE FIX IS HERE: We now correctly assign to the dictionary in all cases ---
        if var1_effort > var2_effort * 10:
            if var2_effort == 0:
                conclusion = f"**{name1}** has active constraints while **{name2}** does not have Lagrangian multipliers."
                evidence = {
                    f"Evidence for {name1} Activity": f"The average magnitude of its Lagrangian multiplier was {var1_effort:.2E}, indicating active optimization constraints.",
                    f"Evidence for {name2}": f"{name2} does not have Lagrangian multipliers (likely a reference setpoint or unconstrained variable)."
                }
            else:
                conclusion = f"The controller was prioritizing the **{name1}** setpoint."
                evidence = {
                    f"Evidence for {name1} Priority": f"The average magnitude of its Lagrangian multiplier was significantly higher ({var1_effort:.2E}), indicating strong effort from the optimizer.",
                    f"Evidence for {name2} Stability": f"Conversely, the Lagrangian for {name2} was near-zero ({var2_effort:.2E}), showing it was not a limiting factor."
                }
        elif var2_effort > var1_effort * 10:
            if var1_effort == 0:
                conclusion = f"**{name2}** has active constraints while **{name1}** does not have Lagrangian multipliers."
                evidence = {
                    f"Evidence for {name2} Activity": f"The average magnitude of its Lagrangian multiplier was {var2_effort:.2E}, indicating active optimization constraints.",
                    f"Evidence for {name1}": f"{name1} does not have Lagrangian multipliers (likely a reference setpoint or unconstrained variable)."
                }
            else:
                conclusion = f"The controller was prioritizing the **{name2}** setpoint."
                evidence = {
                    f"Evidence for {name2} Priority": f"The average magnitude of its Lagrangian multiplier was significantly higher ({var2_effort:.2E}), indicating strong effort from the optimizer.",
                    f"Evidence for {name1} Stability": f"Conversely, the Lagrangian for {name1} was near-zero ({var1_effort:.2E}), showing it was not a limiting factor."
                }
        else:
            if var1_effort == 0 and var2_effort == 0:
                conclusion = f"Neither **{name1}** nor **{name2}** have active optimization constraints."
                evidence = {
                    "Evidence": f"Both variables lack Lagrangian multipliers, suggesting they are reference setpoints or unconstrained variables."
                }
            else:
                conclusion = "The controller was balancing both setpoints without a strong priority for either."
                evidence = {
                    "Evidence": f"The optimizer's effort, measured by the Lagrangian multipliers, was comparable for both {name1} ({var1_effort:.2E}) and {name2} ({var2_effort:.2E})."
                }

        # Assign the determined conclusion and evidence to the result dictionary
        analysis_result['conclusion'] = conclusion
        analysis_result['evidence'] = evidence
        
        return analysis_result    
        
    def analyze_humidity_strategy(self, date: datetime.date) -> Dict[str, Any]:
        """
        (NEW, SPECIALIZED SYNTHESIZER)
        Analyzes and synthesizes the complete day vs. night humidity strategy.
        """
        print(f"DEBUG - Synthesizing full-day humidity strategy for {date}.")
        
        try:
            day_data = self.data[self.data.index.date == date]
            if day_data.empty:
                return {'error': "No data available for the specified date."}

            # Define Day vs. Night based on solar radiation
            qrad_col = self.node_to_column.get('Qrad')
            daytime_slice = day_data[day_data[qrad_col] > 50]
            nighttime_slice = day_data[day_data[qrad_col] <= 50]

            if daytime_slice.empty or nighttime_slice.empty:
                return {'error': "Data for this day does not contain a full day/night cycle."}

            # --- Analyze the Daytime Strategy ---
            day_strategy = {
                'period': 'Daytime',
                'humidity_setpoint': daytime_slice[self.node_to_column['H_ref']].median(),
                'avg_transpiration': daytime_slice[self.node_to_column['H_trans']].mean(),
                'avg_condensation': daytime_slice[self.node_to_column['H_cov']].mean(),
                'avg_cooling_dehumidification': daytime_slice[self.node_to_column['H_cool']].mean()
            }

            # --- Analyze the Nighttime Strategy ---
            night_strategy = {
                'period': 'Nighttime',
                'humidity_setpoint': nighttime_slice[self.node_to_column['H_ref']].median(),
                'avg_transpiration': nighttime_slice[self.node_to_column['H_trans']].mean(),
                'avg_condensation': nighttime_slice[self.node_to_column['H_cov']].mean(),
                'avg_cooling_dehumidification': nighttime_slice[self.node_to_column['H_cool']].mean()
            }

            return {'day_strategy': day_strategy, 'night_strategy': night_strategy}

        except Exception as e:
            return {'error': f"An error occurred during strategy synthesis: {e}"}

    def analyze_disease_risk(self, date: datetime.date) -> Dict[str, Any]:
        """
        (STRATEGIC VERSION) Assesses disease risk by analyzing both the risk-conducive
        conditions AND the controller's nighttime mitigation strategy.
        """
        day_data = self.data[self.data.index.date == date]
        if day_data.empty: return {'error': 'No data for this day.'}

        # --- MODULE 1: Daytime Risk Assessment (Your existing logic) ---
        risk_threshold = 85.0
        hum_col = self.node_to_column.get('H')
        high_hum_data = day_data[day_data[hum_col] > risk_threshold]
        duration_hours = len(high_hum_data) * 5 / 60 # 5-minute intervals
        
        risk_assessment = {
            'daytime_risk_factor': f"During the day, conditions were conducive to disease, with humidity exceeding {risk_threshold}% for approximately {duration_hours:.1f} hours.",
            'risk_level': "High"
        }

        # --- MODULE 2: Nighttime Mitigation Strategy Evaluation ---
        night_data = day_data.between_time('18:00', '23:59')
        mitigation_evaluation = {
            'mitigation_detected': False,
            'mitigation_strategy': "No clear nighttime mitigation strategy was detected."
        }

        if not night_data.empty:
            night_setpoint = night_data[self.node_to_column.get('H_ref')].mean()
            # Check if the setpoint was actively lowered for the night
            if night_setpoint < 75.0:
                mitigation_evaluation['mitigation_detected'] = True
                mitigation_evaluation['mitigation_strategy'] = (
                    f"However, the controller correctly identified this risk and initiated a critical "
                    f"nighttime disease prevention strategy. It forcefully lowered the humidity setpoint to around {night_setpoint:.0f}% after sunset."
                )
                
                # Identify the mechanism
                h_cov_col = self.node_to_column.get('H_cov')
                h_vent_col = self.node_to_column.get('H_vent')
                avg_cov_flux = night_data[h_cov_col].mean()
                avg_vent_flux = night_data[h_vent_col].mean()
                
                if avg_cov_flux > abs(avg_vent_flux):
                    mitigation_evaluation['mitigation_mechanism'] = "This was achieved primarily through passive condensation on the cooler greenhouse cover, which is a highly energy-efficient method."
                else:
                    mitigation_evaluation['mitigation_mechanism'] = "This was achieved using active ventilation to purge the humid air."

        # --- Combine and return ---
        return {
            'risk_assessment': risk_assessment,
            'mitigation_evaluation': mitigation_evaluation
        }

    def format_oscillation_for_llm(self, analysis_result: Dict[str, Any]) -> str:
        if 'error' in analysis_result:
            return f"Error during analysis: {analysis_result['error']}"

        target_info = analysis_result['target_variable']
        context = [
            f"--- Analysis of High-Frequency Oscillation ---",
            f"Target Variable: {target_info['name']}",
            f"Volatility Status: {'Highly Volatile' if target_info['is_volatile'] else 'Stable'}",
            f"Volatility Score (Std Dev): {target_info['volatility_score']:.4f}",
            "\n--- Analysis of Contributing Physical Forces ---"
        ]
        
        primary_drivers = [f for f in analysis_result.get('contributing_factors', []) if f.get('role') == 'Primary Volatile Driver']
    
        if primary_drivers:
            context.append("\n--- Primary Drivers of Instability ---")
            for factor in primary_drivers:
                context.append(f"- Factor Name: {factor['name']}")
                if factor.get('explanation'):
                    context.append(f"  - Root Cause: {factor['explanation']}")        
        context.append(f"\nOverall Conclusion: {analysis_result['conclusion']}")
        return "\n".join(context)

    def format_disease_risk_for_llm(self, analysis_result: Dict[str, Any]) -> str:
        """
        Formats the strategic disease risk analysis into a structured context for the LLM,
        presenting both the daytime risk and the nighttime mitigation.
        """
        # Handle the case where the analysis itself returned an error
        if 'error' in analysis_result:
            return f"Error during disease risk analysis: {analysis_result['error']}"

        # Extract the two main modules of the analysis
        risk_assessment = analysis_result.get('risk_assessment', {})
        mitigation_evaluation = analysis_result.get('mitigation_evaluation', {})

        # Start building the context string with a clear title
        context = [
            "--- Strategic Disease Risk & Mitigation Analysis ---"
        ]

        # Add the daytime risk factor
        if risk_assessment:
            context.append(f"\n**Daytime Risk Factor:** {risk_assessment.get('risk_level', 'Unknown')}")
            context.append(f"- {risk_assessment.get('daytime_risk_factor', 'Could not be determined.')}")
        
        # Add the nighttime mitigation strategy evaluation
        if mitigation_evaluation and mitigation_evaluation.get('mitigation_detected'):
            context.append(f"\n**Nighttime Mitigation Strategy:** Detected and Effective")
            context.append(f"- **Controller Action:** {mitigation_evaluation.get('mitigation_strategy', 'No specific action noted.')}")
            if mitigation_evaluation.get('mitigation_mechanism'):
                context.append(f"- **Physical Mechanism:** {mitigation_evaluation.get('mitigation_mechanism', 'Mechanism not identified.')}")
        else:
            context.append(f"\n**Nighttime Mitigation Strategy:** Not Detected")
            context.append(f"- {mitigation_evaluation.get('mitigation_strategy', 'The controller did not appear to take specific action to lower humidity at night.')}")
        
        # Join all the parts into a single string
        return "\n".join(context)

    def format_strategy_for_llm(self, analysis: Dict[str, Any]) -> str:
        """Formats the synthesized strategy for the LLM."""
        if 'error' in analysis:
            return f"Error during strategy analysis: {analysis['error']}"

        day = analysis['day_strategy']
        night = analysis['night_strategy']

        context = [
            "--- Humidity Management Strategy Analysis ---",
            "\n**Daytime Strategy:**",
            f"- The controller maintains a **high humidity setpoint of ~{day['humidity_setpoint']:.0f}%**.",
            f"- **Primary Moisture Source:** Plants actively add a large amount of moisture to the air through transpiration (avg flux: {day['avg_transpiration']:.4f}).",
            f"- **Primary Moisture Removal:** The active cooling system provides significant dehumidification as a side effect (avg flux: {day['avg_cooling_dehumidification']:.4f}).",
            f"- **Conclusion:** The strategy is to allow high humidity, which is beneficial for growth, while relying on the necessary daytime cooling to prevent it from getting excessive.",

            "\n**Nighttime Strategy:**",
            f"- The controller enforces a **low humidity setpoint of ~{night['humidity_setpoint']:.0f}%**.",
            f"- **Primary Moisture Source:** Plant transpiration nearly ceases (avg flux: {night['avg_transpiration']:.4f}).",
            f"- **Primary Moisture Removal:** As the greenhouse cools, moisture naturally condenses on the cover, removing it from the air (avg flux: {night['avg_condensation']:.4f}).",
            f"- **Conclusion:** The strategy is to lower humidity to prevent condensation on the plants and reduce the risk of fungal diseases, which is a major concern in cooler, dark conditions."
        ]
        return "\n".join(context)
    
    def analyze_setpoint_anomaly(self, setpoint_node: str, date: datetime.date, start_time: time, end_time: time) -> Dict[str, Any]:
        """
        (FINAL, ROBUST & GENERALIZED VERSION)
        Analyzes an unusual pattern in a setpoint by correlating it with its primary disturbance
        and checking the optimizer's state. It can now handle multiple types of anomalies.
        """
        from datetime import datetime
        print(f"DEBUG - Analyzing anomaly for setpoint '{setpoint_node}' from {start_time} to {end_time}.")
        
        try:
            start_dt = datetime.combine(date, start_time)
            end_dt = datetime.combine(date, end_time)
            window_data = self.data.loc[start_dt:end_dt]
            data_row = self.data.loc[self.find_nearest_timestamp(timestamp)]
        except Exception as e:
            return {'error': f"Could not retrieve data for anomaly analysis: {e}"}

        # --- 1. DYNAMIC VARIABLE MAPPING ---
        # Map setpoints to their most likely disturbance variable.
        setpoint_to_disturbance_map = {
            'T_ref': 'Tout', # Temperature is mainly disturbed by outside temperature.
            'H_ref': 'Hout', # Humidity is mainly disturbed by outside humidity.
            'C_ref': 'Qrad'  # CO2 setpoint strategy is often linked to available light for photosynthesis.
        }

        base_state_node = setpoint_node.replace('_ref', '')
        disturbance_node = setpoint_to_disturbance_map.get(setpoint_node)
        
        if not disturbance_node:
            return {'error': f"No anomaly analysis strategy defined for setpoint '{setpoint_node}'."}

        setpoint_col = self.node_to_column.get(setpoint_node)
        optim_node = self.lagrangian_nodes.get(base_state_node, {}).get('ieq')
        optim_state_col = self.node_to_column.get(optim_node)
        disturbance_col = self.node_to_column.get(disturbance_node)
        setpoint_val = data_row.get(setpoint_col)
        tout_val = data_row.get(tout_col)
        qrad_val = data_row.get(qrad_col)
        print(f"DEBUG (Anomaly Check) - Columns: Setpoint='{setpoint_col}', Disturbance='{disturbance_col}', Optimizer='{optim_state_col}'")
        
        if not all(col in window_data.columns for col in [setpoint_col, disturbance_col, optim_state_col]):
            return {'error': "Missing one or more necessary data columns for anomaly analysis."}

        # --- 2. Core Logic: Check for a strong correlation and the optimizer's goal ---
        correlation = window_data[setpoint_col].corr(window_data[disturbance_col])
        avg_optim_state = window_data[optim_state_col].mean()

        # --- 3. SCENARIO-SPECIFIC ANOMALY SIGNATURES ---
        # ==============================================================================
        # --- SCENARIO 1: Nighttime Energy Saving (Low Temperature Setpoint) ---
        # ==============================================================================
        timestamp = datetime.combine(date, start_time)
        if base_state_node == 'T' and (timestamp.hour < 6 or timestamp.hour > 21): # Nighttime hours
            # Signature: Setpoint is low, it's cold outside, and it's dark.
            is_setpoint_low = pd.notna(setpoint_val) and setpoint_val < 18
            is_cold_outside = pd.notna(tout_val) and tout_val < 15
            is_dark = pd.notna(qrad_val) and qrad_val < 10

            if is_setpoint_low and is_cold_outside and is_dark:
                return {
                    'explanation': "This was a deliberate energy-saving strategy for the cold, dark, early morning hours.",
                    'evidence': {
                        "Economic Rationale": "The controller intentionally lowered the temperature setpoint to a safe minimum to avoid burning expensive energy for heating when there is no sunlight available for plant growth.",
                        "Environmental Context": f"This was a sensible strategy given the cold outside temperature of {tout_val:.1f}°C.",
                        "Conclusion": "This demonstrates an advanced, cost-effective optimization strategy, prioritizing energy conservation over maintaining a high temperature during non-productive hours."
                    }
                }
        # SCENARIO 1: Temperature Anomaly (Energy-saving during a cold snap)
        elif setpoint_node == 'T_ref':
            # Signature: Setpoint strongly follows outside temp, and optimizer is fighting the cold.
            if correlation is not None and correlation > 0.7 and avg_optim_state < 0:
                return {
                    'explanation': "The unusual dip in the temperature setpoint was a sophisticated energy-saving maneuver in response to an unexpected drop in the outside temperature.",
                    'evidence': {
                        'External Influence': f"The setpoint pattern closely mirrored the outside temperature during this period (correlation: {correlation:.2f}).",
                        "Optimizer's Goal": f"The Temperature Lagrangian was consistently negative (avg: {avg_optim_state:.2E}), proving the controller's primary goal was to prevent the temperature from dropping too low while minimizing heating costs.",
                        'Conclusion': "Instead of burning excess energy to maintain a fixed target against the cold snap, the controller dynamically lowered its own setpoint to a still-safe level. This demonstrates an advanced, cost-effective optimization strategy."
                    }
                }

        # SCENARIO 2: Humidity Anomaly (Avoiding dehumidification costs when it's humid outside)
        elif setpoint_node == 'H_ref':
            # Signature: Setpoint follows outside humidity, and optimizer is fighting high humidity.
            if correlation is not None and correlation > 0.7 and avg_optim_state > 0:
                return {
                    'explanation': "The unusual rise in the humidity setpoint was a cost-saving strategy to avoid fighting against very humid outdoor conditions.",
                    'evidence': {
                        'External Influence': f"The setpoint pattern closely followed the high outside humidity during this period (correlation: {correlation:.2f}).",
                        "Optimizer's Goal": f"The Humidity Lagrangian was consistently positive (avg: {avg_optim_state:.2E}), proving the controller was actively trying to prevent humidity from exceeding its maximum limit.",
                        'Conclusion': "Rather than using costly ventilation or dehumidification to fight the incoming moisture, the controller intelligently raised its own target to a higher but still-safe level. This is an advanced strategy to minimize energy consumption."
                    }
                }

        # SCENARIO 3: CO2 Anomaly (Avoiding waste when there is no light for photosynthesis)
        elif setpoint_node == 'C_ref':
            # Signature: Setpoint follows solar radiation, and optimizer is relaxed about the lower CO2 limit.
            if correlation is not None and correlation > 0.7 and avg_optim_state < 0:
                return {
                    'explanation': "The dip in the CO2 setpoint was a resource-saving measure taken in response to low light conditions.",
                    'evidence': {
                        'External Influence': f"The CO2 setpoint pattern strongly tracked the available solar radiation (correlation: {correlation:.2f}).",
                        "Optimizer's Goal": f"The CO2 Lagrangian was negative (avg: {avg_optim_state:.2E}), indicating the controller was aware the CO2 level was below the ideal target but was not fighting hard to raise it.",
                        'Conclusion': "The controller recognized that without sufficient light, the plants cannot perform photosynthesis. Instead of wasting expensive CO2 by injecting it, the controller lowered its target to save resources until the sun returned. This is a smart, cost-effective strategy."
                    }
                }

        return {'error': "Could not identify a clear, correlated cause for the anomaly."}
    
    
    def explain_anomaly(self, primary_node: str, timestamp: datetime) -> str:
        """
        Provides detailed analysis of anomalies at specific timestamps.
        """
        try:
            # Get data around the timestamp (2 hours before and after)
            start_time = timestamp - timedelta(hours=2)
            end_time = timestamp + timedelta(hours=2)
            
            # Slice the data using pandas datetime indexing
            data_window = self.data.loc[start_time:end_time]
            if data_window.empty:
                return f"No data available around {timestamp.strftime('%Y-%m-%d %H:%M')}."
            
            # Find the closest timestamp in data
            time_diffs = [(ts - timestamp).total_seconds() for ts in data_window.index]
            min_idx = time_diffs.index(min(time_diffs, key=abs))
            actual_timestamp = data_window.index[min_idx]
            
            # Get values at the timestamp
            values = {}
            nodes_to_check = self.base_state_variables + list(self.control_action_nodes) + ['Tout', 'Qrad']
            for node in nodes_to_check:
                col = self.node_to_column.get(node)
                if col and col in data_window.columns:
                    values[node] = data_window.loc[actual_timestamp, col]
            
            # Build explanation
            explanation = f"At {actual_timestamp.strftime('%Y-%m-%d %H:%M')}, the {self.node_to_name.get(primary_node, primary_node)} showed an anomalous reading.\n\n"
            
            # Add current values
            if primary_node in values:
                unit = self.feature_metadata.get(self.node_to_column.get(primary_node, ''), {}).get('unit', '')
                explanation += f"**Current Value:** {values[primary_node]:.2f} {unit}\n\n"
            
            # Add environmental context
            explanation += "**Environmental Context:**\n"
            if 'Tout' in values:
                explanation += f"- Outside Temperature: {values['Tout']:.1f}°C\n"
            if 'Qrad' in values:
                explanation += f"- Solar Radiation: {values['Qrad']:.0f} W/m²\n"
            
            # Add other state variables
            explanation += "\n**Greenhouse Conditions:**\n"
            for node in self.base_state_variables:
                if node in values and node != primary_node:
                    unit = self.feature_metadata.get(self.node_to_column.get(node, ''), {}).get('unit', '')
                    explanation += f"- {self.node_to_name.get(node, node)}: {values[node]:.2f} {unit}\n"
            
            # Add control actions
            explanation += "\n**Control Actions:**\n"
            for node in self.control_action_nodes:
                if node in values:
                    unit = self.feature_metadata.get(self.node_to_column.get(node, ''), {}).get('unit', '')
                    explanation += f"- {self.node_to_name.get(node, node)}: {values[node]:.2f} {unit}\n"
            
            # Add analysis
            explanation += "\n**Analysis:**\n"
            explanation += "This appears to be a normal operational condition given the environmental factors present at the time."
            
            return explanation
            
        except Exception as e:
            return f"Error analyzing anomaly: {str(e)}"
    
    
    def _interpret_system_event_changes(self, changes: Dict, context: Dict) -> Dict[str, Any]:
        """
        Interprets the detected changes in the context of environmental conditions.
        """
        interpretation = {
            'event_type': 'system_change',
            'description': 'Multiple system parameters changed simultaneously',
            'likely_cause': 'Unknown'
        }
        
        # Check for strategy shift pattern: CO2 off + humidity setpoint down
        co2_changes = [k for k in changes.keys() if 'uC' in k or 'CO2' in k.lower()]
        humidity_changes = [k for k in changes.keys() if 'H' in k or 'relhum' in k.lower()]
        
        if co2_changes and humidity_changes:
            # Check if CO2 was shut off and humidity setpoint was lowered
            co2_shut_off = any(changes[k].get('action') == 'shut_off' for k in co2_changes)
            humidity_dropped = any(changes[k].get('change', 0) < -10 for k in humidity_changes)  # Significant drop
            
            if co2_shut_off and humidity_dropped:
                interpretation.update({
                    'event_type': 'strategy_shift',
                    'description': 'Shift from growth-focused to safety-focused operation',
                    'likely_cause': 'Environmental conditions requiring safety prioritization over growth'
                })
                
                # Check environmental context
                qrad = context.get('Qrad', 0)
                if qrad < 100:  # Low light
                    interpretation['likely_cause'] = 'Low sunlight making CO2 injection ineffective'
                elif context.get('Tout', 20) > 25:  # Hot outside
                    interpretation['likely_cause'] = 'High outside temperature requiring humidity control'
        
        return interpretation
    
    def format_system_event_for_llm(self, analysis_result: Dict[str, Any]) -> str:
        """
        Formats the system event analysis for the LLM.
        """
        if 'error' in analysis_result:
            return f"Error during system event analysis: {analysis_result['error']}"
        
        context = [f"--- System Event Analysis at {analysis_result['event_timestamp'].strftime('%Y-%m-%d %H:%M')} ---"]
        
        # Describe the changes
        changes = analysis_result.get('changes_detected', {})
        if changes:
            context.append("\n**Changes Detected:**")
            for var, change_info in changes.items():
                friendly_name = self.node_to_name.get(var, var)
                if 'action' in change_info:
                    if change_info['action'] == 'shut_off':
                        context.append(f"- **{friendly_name}** was shut off (from {change_info['start_value']:.2f} to {change_info['end_value']:.2f})")
                    elif change_info['action'] == 'turned_on':
                        context.append(f"- **{friendly_name}** was turned on (from {change_info['start_value']:.2f} to {change_info['end_value']:.2f})")
                elif 'change' in change_info:
                    direction = "increased" if change_info['change'] > 0 else "decreased"
                    context.append(f"- **{friendly_name}** {direction} from {change_info['start_value']:.2f} to {change_info['end_value']:.2f}")
        
        # Environmental context
        env_context = analysis_result.get('environmental_context', {})
        if env_context:
            context.append("\n**Environmental Conditions at the Time:**")
            for var, value in env_context.items():
                friendly_name = self.node_to_name.get(var, var)
                unit = self.feature_metadata.get(var, {}).get('unit', '')
                context.append(f"- **{friendly_name}**: {value:.2f} {unit}")
        
        # Analysis interpretation
        analysis = analysis_result.get('analysis', {})
        if analysis:
            context.append(f"\n**Analysis:** {analysis.get('description', 'Unknown event')}")
            context.append(f"**Likely Cause:** {analysis.get('likely_cause', 'Unknown')}")
        
        return "\n".join(context)
    
    # NLP-based parser
    def parse_query_nlp(self, query: str) -> Dict[str, Any]:
        """
        (DEFINITIVE, FINAL, HIERARCHICAL PARSER V3)
        This final version contains a corrected and precisely ordered intent hierarchy
        that properly distinguishes between single control action queries, true multi-variable
        system events, and high-level strategy questions. This is the final version.
        """
        from datetime import datetime
        print(f"\n--- Parsing Query --- \nReceived Query: {query}")
        query_lower = query.lower()
        parsed_info = {
            'query': query, 'intent': 'unknown', 'target_nodes': [], 'primary_node': None,
            'variable_code': None, 'query_term': None, 'date': None, 'time': None,
            'start_time': None, 'end_time': None, 'pattern_type': None,
            'error': None, 'visualize_request': False
        }

        # --- 1. Entity and Time Extraction ---
        all_target_nodes_with_pos = self.extract_kg_entities(query)
        all_target_nodes = [node for node, pos in all_target_nodes_with_pos]
        parsed_info['target_nodes'] = all_target_nodes
        time_info = self.parse_temporal_expressions(query) # This now returns a dict
        time_expressions = time_info.get('datetimes', [])
        parsed_info['date'] = time_info.get('primary_date')
        parsed_info['datetimes'] = time_info.get('datetimes', [])

        # --- 2. Primary Node Selection (Position-Aware) ---
        # --- 2. REVISED Primary Node Selection Logic ---
        primary_node = None
        all_target_nodes_with_pos.sort(key=lambda x: x[1]) # Sort entities by their position in the query

        # ** NEW PRIORITY LOGIC **
        # Rule 1: If the user explicitly mentions a setpoint ('_ref') first, that is the subject.
        first_entity_node = all_target_nodes_with_pos[0][0] if all_target_nodes_with_pos else None
        if first_entity_node and '_ref' in first_entity_node:
            primary_node = first_entity_node
        else:
            # Rule 2 (Fallback): If no setpoint is first, prioritize the first-mentioned control action.
            control_actions_found = [node for node, pos in all_target_nodes_with_pos if self.get_control_action_base_code(node) in self.control_action_nodes]
            if control_actions_found:
                primary_node = self.get_control_action_base_code(control_actions_found[0])
            elif all_target_nodes:
                # Rule 3 (Final Fallback): Just take the first entity found.
                primary_node = all_target_nodes[0]
                
        parsed_info['primary_node'] = primary_node
        if primary_node:
            parsed_info['variable_code'] = primary_node
            parsed_info['query_term'] = self.node_to_name.get(primary_node, primary_node)
        
        print(f"DEBUG (Parser): Identified Primary Node='{primary_node}'")

        # --- 3. Time Resolution ---
        # Check for descriptive time range phrases
        descriptive_range_detected = False
        if "night and morning" in query_lower or "all night and morning" in query_lower:
            # Night and morning typically means ~00:00 to ~12:00
            start_time_obj = time(0, 0)  # midnight
            end_time_obj = time(12, 0)  # noon
            descriptive_range_detected = True
            print(f"DEBUG (Time Parser): Detected descriptive range 'night and morning' -> 00:00 to 12:00")
        elif "all night" in query_lower:
            # All night typically means ~18:00 to ~06:00 (evening to morning)
            start_time_obj = time(18, 0)  # 6 PM
            end_time_obj = time(6, 0)   # 6 AM
            descriptive_range_detected = True
            print(f"DEBUG (Time Parser): Detected descriptive range 'all night' -> 18:00 to 06:00")
        elif "during the night" in query_lower:
            # During the night typically means ~18:00 to ~06:00 (evening to morning)
            start_time_obj = time(18, 0)  # 6 PM
            end_time_obj = time(6, 0)   # 6 AM
            descriptive_range_detected = True
            print(f"DEBUG (Time Parser): Detected descriptive range 'during the night' -> 18:00 to 06:00")
        elif "all day" in query_lower:
            # All day means 00:00 to 23:59
            start_time_obj = time(0, 0)   # midnight
            end_time_obj = time(23, 59)  # almost midnight
            descriptive_range_detected = True
            print(f"DEBUG (Time Parser): Detected descriptive range 'all day' -> 00:00 to 23:59")
        elif "afternoon" in query_lower:
            # Afternoon means ~12:00 to 18:00
            start_time_obj = time(12, 0)  # noon
            end_time_obj = time(18, 0)    # 6 PM
            descriptive_range_detected = True
            print(f"DEBUG (Time Parser): Detected descriptive range 'afternoon' -> 12:00 to 18:00")
        elif "during the day" in query_lower:
            # During the day means ~06:00 to 18:00
            start_time_obj = time(6, 0)  # 6 AM
            end_time_obj = time(18, 0)  # 6 PM
            descriptive_range_detected = True
            print(f"DEBUG (Time Parser): Detected descriptive range 'during the day' -> 06:00 to 18:00")
        elif "right after midnight" in query_lower or "after midnight" in query_lower:
            # Right after midnight means ~00:00
            parsed_info['time'] = time(0, 0)
            print(f"DEBUG (Time Parser): Detected time 'right after midnight' -> 00:00")
        elif "midnight" in query_lower:
            # Midnight means exactly 00:00
            parsed_info['time'] = time(0, 0)
            print(f"DEBUG (Time Parser): Detected time 'midnight' -> 00:00")
        
        if descriptive_range_detected and parsed_info.get('date'):
            parsed_info.update({
                'start_time': start_time_obj, 
                'end_time': end_time_obj
            })
            if not parsed_info.get('time'):
                parsed_info['time'] = None  # Clear single time since we have a range
        
        # Process explicit time expressions
        if time_expressions:
            specific_dt = time_expressions[-1]['datetime']
            parsed_info.update({'date': specific_dt.date(), 'time': specific_dt.time()})
            print(f"DEBUG (Time Parser): Detected SINGLE timestamp: {specific_dt.time()}")

        is_range_query = ("between" in query_lower and "and" in query_lower) or \
                (len(time_expressions) > 1 and any(":" in t['text'] for t in time_expressions)) or \
                descriptive_range_detected

        if is_range_query:
            parsed_info['intent'] = 'pattern'
            
            # --- NEW LOGIC: DISTINGUISH PATTERN TYPE ---
            if 'gradual' in query_lower or 'steady' in query_lower or 'slowly' in query_lower:
                parsed_info['pattern_type'] = 'gradual_trend'
            elif 'spike' in query_lower or 'drop' in query_lower or 'sharp' in query_lower or 'sudden' in query_lower:
                parsed_info['pattern_type'] = 'sudden_event'

        if is_range_query:
            if len(time_expressions) >= 2:
                # Handle explicit time expressions (from/to, between/and)
                sorted_expressions = sorted(time_expressions, key=lambda x: x['datetime'])
                start_dt, end_dt = sorted_expressions[0]['datetime'], sorted_expressions[1]['datetime']
                parsed_info.update({'date': start_dt.date(), 'start_time': start_dt.time(), 'end_time': end_dt.time(), 'time': start_dt.time()})
                print(f"DEBUG (Time Parser): Detected RANGE from {start_dt.time()} to {end_dt.time()}")
            elif descriptive_range_detected:
                # Descriptive range already handled above, just log it
                print(f"DEBUG (Time Parser): Using descriptive range: {parsed_info.get('start_time')} to {parsed_info.get('end_time')}")
            # For other range queries without explicit times, they should be handled by the descriptive logic above
        # Time expressions already processed above

        # --- 3.5. Dialogue State Fallback for Date/Time (NEW LOGIC) ---
        has_previous_dialogue = hasattr(self, 'dialogue_state') and self.dialogue_state.get('last_timestamp') is not None
        user_mentioned_new_date = time_info.get('primary_date') is not None

        # Case 1: User provides a new date. We should NOT use the old time.
        if user_mentioned_new_date:
            print("DEBUG (Time Parser): New date mentioned. Ignoring previous time context.")
            pass # Correctly do nothing and let the time remain None if not specified
        elif not user_mentioned_new_date and has_previous_dialogue:
            last_timestamp = self.dialogue_state.get('last_timestamp')
            if last_timestamp:
                parsed_info['date'] = last_timestamp.date()
                if not parsed_info.get('time') and not is_range_query:
                    parsed_info['time'] = last_timestamp.time()
                print(f"DEBUG (Time Parser): Inherited date/time from dialogue state: {parsed_info['date']} {parsed_info.get('time')}")
        elif not parsed_info.get('date'):
            parsed_info['error'] = "Please specify a date for your question (e.g., 'on May 26, 2011')."

        why_keywords = ['why', 'reason', 'cause', 'explain', 'due to', 'because', 'what caused', 'what led to', 'how', 'triggered', 'what triggered','what happened to', 'was this', 'is this', 'did this', 'does this', 'good idea', 'worth it', 'what was that for','is this a sign of','purpose']
        strategy_keywords = ['strategy', 'reveal', 'management', 'approach', 'overall', 'synthesis', 'synthesize', 'summarize','why was the setpoint so high', 'all day', 'dipping to','during the day']
        relationship_keywords = ['relationship', 'connection', 'correlation', 'link between', 'related to', 'affect', 'impact', 'influence', 'interact', 'interplay']
        anomaly_keywords = ['unusual', 'strange', 'anomaly', 'anomalous', 'weird', 'fault','double-dip','error', 'massive spike', 'sharp drop','impossible', 'impossibly', 'instant jump', 'instantly jumped','strange dip',
            'data error', 'data artifact', 'sensor reset', 'vertical line','so low', 'so high', 'crazy', 'erratic', 'wild', 'chaotic', 'turning on and off', 'on and off', 'flickering']
        trade_off_keywords = ['prioritizing', 'priority', 'trade-off', 'vs', 'versus', 'balance between', 'or the', 'acceptable trade-off', 'worth it', 'primary purpose']
        evaluation_keywords = ['necessary', 'proof', 'how effectively', 'how well', 'impact on', 'without impacting', 'effect on', 'manage to','good day for growth', 'was it successful', 'performance', 'aggressive', 'aggressively', 'effective', 'was it effective', 'bad for plants', 'good for growth', 'is that a good idea', 'is this a good strategy', "isn't that bad", 'worth it']
        total_keywords = ['total', 'sum', 'integrate', 'how much', 'compare', 'contribution','did more work']
        find_event_keywords = ['moment of maximum', 'peak of', 'when was the highest', 'when was the lowest', 'coldest moment','maximum heat load', 'maximum cooling load']
        find_explain_keywords = ['find one of these events and explain', 'find a drop and explain', 'how did the system react to']
        obstacle_keywords = ['obstacle', 'preventing', 'slow', 'why did it take so long', 'what was the problem', 'what was stopping it']
        oscillation_keywords = ['noisy', 'noise', 'oscillation', 'oscillating', 'jagged', 'volatile', 'high-frequency', 'unstable', 'erratic', 'instability','tuning issue','jumping up and down', 'all over the place', 'swinging wildly', 'turning on and off', 'opening and closing']
        model_keywords = ['inaccurate', 'model mismatch', 'evidence that', 'compare the predicted', 'underestimate', 'overestimate', 'underestimating', 'overestimating','prediction error']
        lagrangian_state_keywords = ['imply', 'implication', 'mean', 'what does this mean', 'flat', 'near-zero', 'all day','signify','objective function', 'shadow price', 'constraint', 'equality constraint', 'internal model', 'signal']
        net_effect_keywords = ['net effect', 'opposing forces', 'balance between', 'combined effect']
        performance_keywords = ['performing', 'performance', 'how well']
        inaction_keywords = ["why didn't", "why was it not", "why was it off", "why was it zero"]
        trend_event_keywords = ['peak', 'peaked', 'drop', 'dropped', 'spike', 'spiked', 'rise', 'rose', 'fall', 'fell', 'change', 'changed']
        profitability_keywords = [
            'profitable', 'worth it', 'wasting money', 'cost-benefit',
            'economic', 'financial', 'fighting each other'
        ]
        efficiency_keywords = [
            'wasteful', 'inefficient', 'effective', 'efficiency', 'useful', 'pointless', 
            'necessary', 'unnecessary', 'beneficial', 'worthwhile', 'justified', 'waste'
        ]
        disease_keywords = ['disease', 'botrytis', 'risk', 'pressure', 'infection', 'fungal', 'mold', 'pathogen', 'condensation', 'dew', 'humidity risk', 'crop health', 'plant health']
        is_inaction_query = any(kw in query_lower for kw in inaction_keywords)
        is_evaluation_query = any(kw in query_lower for kw in evaluation_keywords)
        is_relationship_query = any(kw in query_lower for kw in relationship_keywords)
        is_causal_query = any(kw in query_lower for kw in why_keywords)
        is_strategy_query = any(kw in query_lower for kw in strategy_keywords)
        is_anomaly_query = any(kw in query_lower for kw in anomaly_keywords)
        is_trade_off_query = any(kw in query_lower for kw in trade_off_keywords)
        is_peak_query = 'peak' in query_lower or 'at' in query_lower
        is_net_effect_query = any(kw in query_lower for kw in net_effect_keywords)
        is_find_explain_query = any(kw in query_lower for kw in find_explain_keywords)
        is_total_query = any(kw in query_lower for kw in total_keywords)
        is_obstacle_query = any(kw in query_lower for kw in obstacle_keywords)
        is_oscillation_query = any(kw in query_lower for kw in oscillation_keywords)
        is_model_query = any(kw in query_lower for kw in model_keywords)
        is_day_query = parsed_info.get('date') and not parsed_info.get('time') and not is_range_query
        is_objective_query = 'objective' in query_lower or 'purpose' in query_lower
        is_profitability_query = any(kw in query_lower for kw in profitability_keywords)
        is_disease_query = any(kw in query_lower for kw in disease_keywords)
        is_efficiency_query = any(kw in query_lower for kw in efficiency_keywords)
        # --- 5. Intent Overrides Based on Definitive Patterns ---
        mentioned_controls = [node for node, pos in all_target_nodes_with_pos if self.get_control_action_base_code(node) in self.control_action_nodes]
        mentioned_base_states = {node.replace('_ref', '') for node in all_target_nodes if node.replace('_ref', '') in self.base_state_variables}
        # Identify mentioned fluxes and a potential target control action
        mentioned_fluxes = [node for node, pos in all_target_nodes_with_pos if 'phi_' in self.node_to_column.get(node, '').lower()]
        mentioned_disturbances = [node for node, pos in all_target_nodes_with_pos if self.kg.G.nodes[node].get('type') == 'Disturbance']
        mentioned_lagrangians = [node for node, pos in all_target_nodes_with_pos if '_ieq' in node or '_eq' in node]
        mentioned_setpoints = [node for node in all_target_nodes if '_ref' in node]
        mentioned_targets = [node for node, pos in all_target_nodes_with_pos if '_ref' in node or node in self.base_state_variables]
        mentioned_fluxes_or_states = [node for node, pos in all_target_nodes_with_pos if self.G.nodes[node].get('type') in ['Flux', 'Process', 'State']]
        mentioned_states = [node for node, pos in all_target_nodes_with_pos if node in self.base_state_variables]
        is_single_time_query = parsed_info.get('time') and not parsed_info.get('start_time')
        user_mentioned_specific_time = len(time_expressions) > 0
        is_day_query = parsed_info.get('date') is not None and not user_mentioned_specific_time and not is_range_query
        is_no_day_query = ('day' in query_lower and not parsed_info.get('time')) or \
               (parsed_info.get('start_time') and parsed_info.get('end_time') and \
               (parsed_info['end_time'].hour - parsed_info['start_time'].hour) > 4)
        is_lagrangian_explanation_query = any(kw in query_lower for kw in lagrangian_state_keywords)
        is_multi_action_query = (" and " in query_lower and len(set(mentioned_controls) | set(mentioned_targets)) >= 2)
        is_trend_event_query = any(kw in query_lower for kw in trend_event_keywords)
        unique_target_nodes = set(all_target_nodes)
        mentioned_actions_or_targets = {node for node, pos in all_target_nodes_with_pos if node in self.base_state_variables or self.get_control_action_base_code(node) in self.control_action_nodes}
        
        # PRIORITY: Check for Lagrangian state explanation queries FIRST (before trend/pattern checks)
        if mentioned_lagrangians and is_lagrangian_explanation_query:
            print("DEBUG - Intent Override: Detected a Lagrangian state explanation query.")
            parsed_info['intent'] = 'explain_lagrangian_state'
            # The user might provide a range, but for this intent, we analyze the whole day
            # So we ignore the specific time/range for the analysis function.
            parsed_info['time'] = None
            parsed_info['start_time'] = None
            parsed_info['end_time'] = None
            # The primary nodes are the Lagrangians themselves
            parsed_info['target_nodes'] = mentioned_lagrangians
            # Set a primary node for consistency, e.g., for plotting
            parsed_info['primary_node'] = mentioned_lagrangians[0]

        # HIGH PRIORITY: Check for disease risk assessment queries (before strategy checks)
        elif is_disease_query and parsed_info.get('date') and not (is_strategy_query or 'why' in query_lower):
            print("DEBUG - Intent Override: Detected a DISEASE RISK assessment query.")
            parsed_info['intent'] = 'analyze_disease_risk'

        # HIGH PRIORITY: Check for strategy explanation queries (after disease, before anomaly)
        elif is_strategy_query and is_no_day_query and mentioned_base_states and not ('good growing day' in query_lower or 'good day for growth' in query_lower or 'biomass' in query_lower or 'growth' in query_lower):
            print("DEBUG - Intent Override: Detected a high-level daily STRATEGY explanation query.")
            parsed_info['intent'] = 'explain_strategy'
            # Clear out any time-range info ONLY if it's genuinely a full-day query
            # If the user specified a specific time window (< 4 hours), keep it
            has_short_time_range = (parsed_info.get('start_time') and parsed_info.get('end_time') and 
                                   (parsed_info['end_time'].hour - parsed_info['start_time'].hour) <= 4)
            if not has_short_time_range:
                parsed_info['start_time'] = None
                parsed_info['end_time'] = None
            else:
                print(f"DEBUG - Preserving time range: {parsed_info['start_time']} to {parsed_info['end_time']}")
        
        # Check for specific anomaly explanation before general timestamp
        elif is_causal_query and is_anomaly_query and is_single_time_query and len(mentioned_states) == 1:
            print("DEBUG - Intent Override: Detected a specific state anomaly explanation query.")
            parsed_info['intent'] = 'explain_anomaly'
        
        # Check for biomass/growth day evaluation queries (prioritize this over generic strategy)
        # But don't override Lagrangian state queries
        elif parsed_info['intent'] != 'explain_lagrangian_state' and ('good growing day' in query_lower or 'good day for growth' in query_lower or 
            'biomass' in query_lower or 'growth' in query_lower) and parsed_info.get('date'):
            print("DEBUG - Intent Override: Detected a biomass/growth day evaluation query.")
            parsed_info['intent'] = 'evaluate_day_strategy'
        
        # Check for temperature-photosynthesis relationship queries
        if (parsed_info['intent'] == 'unknown' and 
            ('photosynthesis' in query_lower or 'too hot' in query_lower or 'optimal' in query_lower) and
            ('temperature' in query_lower or 'temp' in query_lower or '24' in query_lower or 'celsius' in query_lower)):
            print("DEBUG - Intent Override: Detected a temperature-photosynthesis relationship query.")
            parsed_info['intent'] = 'correlation'
            # Set up the correlation between temperature and photosynthesis
            parsed_info['target_nodes'] = ['T', 'C_phot']
        
        # Check for strategy evaluation queries
        if is_strategy_query and is_evaluation_query and primary_node and primary_node in mentioned_states:
            print("DEBUG - Intent Override: Detected a strategy evaluation query for a state variable.")
            parsed_info['intent'] = 'explain_strategy'
        
        # Check for system-wide event queries (what happened + time + change indicators + no entities)
        if (parsed_info['intent'] == 'unknown' and trend_event_keywords and
            is_single_time_query and 
            len(all_target_nodes) == 0 and  # No entities found
            ('happened' in query_lower or 'what happened' in query_lower) and
                ('change' in query_lower or 'changed' in query_lower or 'everything' in query_lower)):
            print("DEBUG - Intent Override: Detected a system-wide event query ('what happened' at specific time with change indicators).")
            parsed_info['intent'] = 'explain_system_event'
        
        # Check for specific causal timestamp query before other intent overrides
        if parsed_info['intent'] == 'unknown' and is_causal_query and mentioned_states and is_single_time_query and not is_anomaly_query:
            
            if is_trend_event_query:
                print("DEBUG - Intent Override: Detected a CAUSAL query about a specific EVENT (e.g., peak/drop). Routing to pattern analysis.")
                parsed_info['intent'] = 'pattern'
            else:
                print("DEBUG - Intent Override: Detected a CAUSAL query about a specific state. Routing to single-point context analysis.")
                parsed_info['intent'] = 'timestamp'
        
        if is_model_query and mentioned_controls:
            print("DEBUG - Intent Override: Detected a MODEL DISCREPANCY query.")
            parsed_info['intent'] = 'analyze_model_discrepancy'
            
            # --- Linking of Control and State ---
            control_entities = [item for item in all_target_nodes_with_pos if item[0] in mentioned_controls]
            state_entities = [item for item in all_target_nodes_with_pos if item[0] in mentioned_base_states]
            
            if state_entities:
                # Original logic when states are mentioned
                best_pair = (None, None)
                min_distance = float('inf')

                # Find the control/state pair that are closest to each other in the query
                for control_node, control_pos in control_entities:
                    for state_node, state_pos in state_entities:
                        distance = abs(control_pos - state_pos)
                        if distance < min_distance:
                            min_distance = distance
                            best_pair = (control_node, state_node)
                
                if best_pair[0] and best_pair[1]:
                     parsed_info['primary_node'] = best_pair[0]
                     parsed_info['target_nodes'] = [best_pair[1]]
                     print(f"DEBUG (Model Query): Intelligently linked control '{best_pair[0]}' with state '{best_pair[1]}'.")
                else:
                    # Fallback to the simpler logic if pairing fails
                    parsed_info['primary_node'] = mentioned_controls[0]
                    parsed_info['target_nodes'] = list(mentioned_base_states)
                    print(f"DEBUG (Model Query): Fallback to control '{mentioned_controls[0]}' with states {list(mentioned_base_states)}.")
            else:
                # No states mentioned - infer state from control
                control_node = mentioned_controls[0]
                base_control = self.get_control_action_base_code(control_node)
                
                # Infer the appropriate state variable based on the control
                if base_control == 'uQh':
                    inferred_state = 'T'  # Heating affects temperature
                elif base_control == 'uV':
                    # Ventilation affects both humidity and CO2, but prioritize based on query context
                    if 'humidity' in query_lower or 'botrytis' in query_lower:
                        inferred_state = 'H'
                    else:
                        inferred_state = 'C'  # Default to CO2 for ventilation
                elif base_control == 'uQc':
                    inferred_state = 'T'  # Cooling affects temperature
                else:
                    inferred_state = 'T'  # Default fallback
                
                parsed_info['primary_node'] = control_node
                parsed_info['target_nodes'] = [inferred_state]
                print(f"DEBUG (Model Query): Inferred state '{inferred_state}' for control '{control_node}' (no states explicitly mentioned).")

        elif any(kw in query_lower for kw in find_event_keywords):
            print("DEBUG - Intent Override: Detected a FIND AND ANALYZE EVENT query.")
            parsed_info['intent'] = 'find_and_analyze_event'
            
            # Dynamically find the event type from the query text
            event_type_found = None
            for event_key in self.extreme_event_configs.keys():
                if event_key in query_lower:
                    event_type_found = event_key
                    break
            
            if event_type_found:
                parsed_info['event_type'] = event_type_found
            else:
                # Could add more sophisticated logic here later
                parsed_info['error'] = "I understood you want to find an extreme event, but I don't know which one. Please be more specific (e.g., 'maximum heat load')."
        
        elif is_evaluation_query and is_day_query:
            print("DEBUG - Intent Override: Detected a daily strategy evaluation query.")
            parsed_info['intent'] = 'evaluate_day_strategy'

        elif is_profitability_query and len(set(mentioned_controls)) >= 2:
            print("DEBUG - Intent Override: Detected a PROFITABILITY & TRADE-OFF query.")
            parsed_info['intent'] = 'analyze_profitability_strategy'

        elif is_efficiency_query and mentioned_controls:
            print("DEBUG - Intent Override: Detected an EFFICIENCY query.")
            parsed_info['intent'] = 'analyze_control_efficiency'
            parsed_info['primary_node'] = mentioned_controls[0]

        elif is_find_explain_query and mentioned_disturbances and mentioned_controls:
            print("DEBUG - Intent Override: Detected a FIND AND EXPLAIN REACTION query.")
            parsed_info['intent'] = 'find_and_explain_reaction'
            parsed_info['primary_node'] = mentioned_disturbances[0] 
            parsed_info['target_nodes'] = mentioned_controls

        # CRITICAL: Check for causal anomaly BEFORE general relationship/correlation
        # This handles queries like "X dropped even though Y was high" or "X seems to drop sharply...even though Y"
        # But don't override Lagrangian state queries
        elif parsed_info['intent'] != 'explain_lagrangian_state' and is_causal_query and len(mentioned_fluxes_or_states) >= 2 and ('even though' in query_lower or 'despite' in query_lower or 'but' in query_lower):
            print("DEBUG - Intent Override: Detected a Causal Anomaly Explanation query (even though/despite/but pattern).")
            parsed_info['intent'] = 'explain_causal_anomaly'
            # The primary node is the one the user is asking about.
            parsed_info['primary_node'] = mentioned_fluxes_or_states[0]
        
        # Also check for anomaly patterns without explicit "even though" - e.g., "X drops sharply...is this a sign of stress?"
        # But don't override Lagrangian state queries, disease risk queries, or strategy queries
        elif parsed_info['intent'] not in ['explain_lagrangian_state', 'analyze_disease_risk', 'explain_strategy'] and is_causal_query and is_anomaly_query and len(mentioned_fluxes_or_states) >= 1:
            print("DEBUG - Intent Override: Detected a Causal Anomaly Explanation query (anomaly + causal pattern).")
            parsed_info['intent'] = 'explain_causal_anomaly'
            parsed_info['primary_node'] = mentioned_fluxes_or_states[0]

        elif any(kw in query_lower for kw in relationship_keywords) and len(all_target_nodes) >= 2:
            print("DEBUG - Intent Override: Detected a specific RELATIONSHIP/CORRELATION query.")
            parsed_info['intent'] = 'correlation'
            relationship_pair = self._extract_relationship_entities(query)
            if relationship_pair:
                parsed_info['target_nodes'] = relationship_pair
                parsed_info['primary_node'] = None
            else:
                parsed_info['target_nodes'] = all_target_nodes
                parsed_info['primary_node'] = None

        # PRIORITY: Check for simple causal control action queries BEFORE daily total
        # This prevents false positives from substring matches (e.g., "sum" in "summer")
        elif is_causal_query and mentioned_controls and is_single_time_query and not is_evaluation_query and not is_trade_off_query:
            # Only treat as analyze_daily_total if EXPLICIT total keywords are present
            explicit_total_keywords = ['total', 'how much', 'integrate', 'contribution', 'did more work']
            has_explicit_total = any(kw in query_lower for kw in explicit_total_keywords)
            
            if has_explicit_total:
                # This is actually a total query, let it fall through to the daily total check
                print("DEBUG - Intent Override: Has explicit total keyword despite single time. Will check daily total.")
            else:
                # This is a simple control action explanation
                print("DEBUG - Intent Override: Detected a CAUSAL query about a control action at a specific time.")
                parsed_info['intent'] = 'explain_control_action'
                parsed_info['primary_node'] = self.get_control_action_base_code(mentioned_controls[0])

        # CRITICAL: Check for daily total queries
        # This ensures "total" keyword queries get proper intent classification
        # BUT: Don't trigger for simple "why" questions at specific times with single controls  
        if parsed_info['intent'] == 'unknown' and is_total_query and (len(mentioned_fluxes) >= 1 or len(mentioned_controls) >= 1):
            # Additional check: If this is a simple causal query about a single control at a specific time,
            # it should be explain_control_action, NOT analyze_daily_total
            is_simple_control_why = (
                is_causal_query and 
                len(mentioned_controls) == 1 and 
                is_single_time_query and
                not any(explicit_kw in query_lower for explicit_kw in ['total', 'sum', 'how much', 'integrate', 'contribution', 'did more work'])
            )
            
            if is_simple_control_why:
                print("DEBUG - Intent Override: This looks like a simple control action question, NOT a daily total query.")
                # Don't set intent here - let it fall through to explain_control_action check
                pass
            else:
                print("DEBUG - Intent Override: Detected a DAILY TOTAL & COMPARISON query.")
                parsed_info['intent'] = 'analyze_daily_total'
                
                # --- Start of New, Intelligent Node Selection ---
                
                # We will build a list of the specific fluxes the user is interested in.
                fluxes_for_analysis = []
                
                # 1. Add any fluxes the user mentioned by name.
                fluxes_for_analysis.extend(mentioned_fluxes)

                # 2. If the user mentioned a control system (e.g., "cooling system"),
                # find its corresponding primary flux and add it.
                control_to_flux_map = {
                    'uQc': 'Q_cool', # Active Cooling
                    'uQh': 'Q_heat', # Active Heating
                    'uV' : 'Q_vent',
                }
                for control_node in mentioned_controls:
                    if control_node in control_to_flux_map:
                        fluxes_for_analysis.append(control_to_flux_map[control_node])

                # 3. Add key natural fluxes if relevant keywords are in the query.
                if "transpiration" in query_lower or "sweating" in query_lower:
                    fluxes_for_analysis.append('Q_trans')
                if "sun" in query_lower or "solar" in query_lower:
                    fluxes_for_analysis.append('Q_sun')
                    
                # 4. Finalize the list by removing duplicates
                parsed_info['target_nodes'] = list(dict.fromkeys(fluxes_for_analysis))
                # A single primary node isn't relevant here
                parsed_info['primary_node'] = None

        elif is_causal_query and mentioned_controls and not is_trade_off_query and parsed_info['intent'] != 'analyze_daily_total': # Moved before evaluation to handle causal+evaluation queries
             print("DEBUG - Intent Override: Detected a CAUSAL query about a control action.")
             parsed_info['intent'] = 'explain_control_action'

        elif any(kw in query_lower for kw in evaluation_keywords) and mentioned_controls and (mentioned_base_states or mentioned_disturbances):
            print("DEBUG - Intent Override: Detected a PERFORMANCE EVALUATION query.")
            parsed_info['intent'] = 'evaluate_control_strategy'
            # Correctly set the primary node to the control action
            parsed_info['primary_node'] = self.get_control_action_base_code(mentioned_controls[0])
            # Order the state nodes as they appear in the query
            ordered_states = []
            sorted_entities = sorted(all_target_nodes_with_pos, key=lambda x: x[1])
            for node, pos in sorted_entities:
                base_node = node.replace('_ref', '')
                if base_node in self.base_state_variables and base_node not in ordered_states:
                    ordered_states.append(base_node)
            parsed_info['target_nodes'] = ordered_states

        elif is_causal_query and is_single_time_query and len(mentioned_actions_or_targets) >= 2 and len(set(self.get_control_action_base_code(c) for c in mentioned_controls)) >= 2:
            print("DEBUG - Intent Override: Detected a coordinated SYSTEM EVENT.")
            parsed_info['intent'] = 'explain_system_event'
        
        # NOTE: analyze_daily_total check moved earlier (before general causal control check) for proper priority
        
        elif is_causal_query and mentioned_controls and not is_evaluation_query and not is_trade_off_query: # This will now catch the CO2 spike query
             print("DEBUG - Intent Override: Detected a CAUSAL query about a control action.")
             parsed_info['intent'] = 'explain_control_action'
        
        elif is_inaction_query and mentioned_controls and is_range_query:
            print("DEBUG - Intent Override: Detected an INACTION query over a range. Finding the most critical moment.")
            
            # This is an inaction query. We need to find the point of MAXIMUM conflict within the range.
            control_node_of_interest = self.get_control_action_base_code(mentioned_controls[0])
            states_affected = self.get_states_typically_affected_by_control(control_node_of_interest)
            
            start_dt = datetime.combine(parsed_info['date'], parsed_info['start_time'])
            end_dt = datetime.combine(parsed_info['date'], parsed_info['end_time'])
            window_data = self.data.loc[start_dt:end_dt]

            max_conflict_score = -1
            critical_timestamp = None

            for state in states_affected:
                effect = self.get_effect_direction(control_node_of_interest, state)
                lg_node = self.lagrangian_nodes.get(state, {}).get('ieq')
                lg_col = self.node_to_column.get(lg_node)

                if effect == 'decrease' and lg_col in window_data:
                    # Conflict is when the lower limit is active (most negative Lagrangian)
                    conflict_point = window_data[lg_col].idxmin()
                    conflict_score = abs(window_data[lg_col].min())
                    if conflict_score > max_conflict_score:
                        max_conflict_score = conflict_score
                        critical_timestamp = conflict_point
            
            if critical_timestamp:
                print(f"DEBUG - Found most critical conflict point at {critical_timestamp}. Routing to single-event analysis.")
                # Override the time to be this single, critical moment
                parsed_info['time'] = critical_timestamp.time()
                parsed_info['start_time'] = None
                parsed_info['end_time'] = None
            
            # The intent is still a single control action explanation, just for a specific time.
            parsed_info['intent'] = 'explain_control_action'

        elif parsed_info['intent'] == 'unknown' and is_causal_query and mentioned_states and is_peak_query and is_single_time_query:
            print("DEBUG - Intent Override: Detected a CAUSAL query about a specific state. Routing to single-point context analysis.")
            parsed_info['intent'] = 'timestamp'

        elif is_causal_query and mentioned_controls and is_single_time_query:
            print("DEBUG - Intent Override: Detected a CAUSAL query about a control action.")
            parsed_info['intent'] = 'explain_control_action'
            parsed_info['primary_node'] = self.get_control_action_base_code(mentioned_controls[0])

        elif is_causal_query and 'sacrifice' in query_lower and len(mentioned_controls) >= 2:
            print("DEBUG - Intent Override: Detected a complex 'Sacrifice Growth for Safety' query.")
            parsed_info['intent'] = 'explain_control_action' # We can reuse this intent
            
            # CRITICAL: Set the primary node to the SAFETY action (ventilation/cooling),
            # as this is the action that overrides the growth strategy.
            if 'ventilation' in query_lower or 'Vent_ref' in query:
                parsed_info['primary_node'] = 'uV'
                parsed_info['variable_code'] = 'uV'
                parsed_info['query_term'] = self.node_to_name.get('uV')
            elif 'cooling' in query_lower or 'cool_ref' in query:
                parsed_info['primary_node'] = 'uQc'
                parsed_info['variable_code'] = 'uQc'
                parsed_info['query_term'] = self.node_to_name.get('uQc')

        # PRIORITY 1: Oscillation queries (check before general anomaly)
        # But don't override Lagrangian state queries
        if parsed_info['intent'] != 'explain_lagrangian_state' and is_oscillation_query:
            if (is_range_query or (parsed_info.get('date') and not parsed_info.get('time') and not parsed_info.get('start_time') and not parsed_info.get('end_time'))) and len(all_target_nodes) >= 1:
                print("DEBUG - Intent Override: Detected a Volatility and Performance Analysis query.")
                parsed_info['intent'] = 'analyze_volatility'
            elif time_expressions and len(all_target_nodes) >= 1:
                print("DEBUG - Intent Override: Detected an EXPLAIN OSCILLATION query.")
                parsed_info['intent'] = 'explain_oscillation'

        # PRIORITY 1: General anomaly query (catches anomaly queries that don't have specific time constraints)
        # But don't override Lagrangian state queries
        elif parsed_info['intent'] != 'explain_lagrangian_state' and is_causal_query and is_anomaly_query and is_single_time_query and len(mentioned_states) == 1:
            print("DEBUG - Intent Override: Detected a general anomaly query.")
            parsed_info['intent'] = 'explain_anomaly'
            parsed_info['primary_node'] = mentioned_states[0]

        # CRITICAL: Only match setpoint evaluation if NO controls are mentioned
        # If controls are mentioned, it's about the control's effectiveness, not the setpoint's rationale
        elif any(kw in query_lower for kw in evaluation_keywords) and mentioned_setpoints and not mentioned_controls:
            print("DEBUG - Intent Override: Detected a Setpoint Strategy Evaluation query.")
            parsed_info['intent'] = 'evaluate_setpoint_strategy'
            # Make sure the primary node IS the setpoint - prioritize actual setpoint nodes over state nodes
            setpoint_nodes = [node for node in mentioned_setpoints if '_ref' in node]
            if setpoint_nodes:
                primary_setpoint_node = setpoint_nodes[0]
            else:
                primary_setpoint_node = mentioned_setpoints[0]
            parsed_info['primary_node'] = primary_setpoint_node
            parsed_info['variable_code'] = primary_setpoint_node
            parsed_info['query_term'] = self.node_to_name.get(primary_setpoint_node, primary_setpoint_node)

        elif len(set(mentioned_controls)) >= 2 and is_objective_query and parsed_info.get('time'):
            print("DEBUG - Intent Override: Detected a multi-action objective query. Upgrading to 'explain_control_action'.")
            parsed_info['intent'] = 'explain_control_action'

        elif is_obstacle_query and mentioned_base_states and time_expressions:
            print("DEBUG - Intent Override: Detected an EXPLAIN OBSTACLE query.")
            parsed_info['intent'] = 'explain_obstacle'
            parsed_info['primary_node'] = list(mentioned_base_states)[0]

            start_dt = time_expressions[0]['datetime']
            end_dt = start_dt + timedelta(hours=4)
            parsed_info['date'] = start_dt.date()
            parsed_info['start_time'] = start_dt.time()
            parsed_info['end_time'] = end_dt.time()
            print(f"DEBUG (Obstacle Parser): Created analysis window from single point: {start_dt.time()} to {end_dt.time()}")

        # Check for strategy shift queries that might not have "why" keywords
        elif 'strategy shift' in query_lower or 'strategy change' in query_lower or ('sacrificed' in query_lower and 'safety' in query_lower):
            print("DEBUG - Intent Override: Detected a strategy shift explanation query.")
            parsed_info['intent'] = 'explain_strategy_shift'
            # Set default nodes - the handler will determine the specifics
            parsed_info['primary_node'] = 'uV'  # Default to ventilation
            parsed_info['target_nodes'] = ['uC']  # CO2 as target

        # --- NET EFFECT HANDLING (Corrected Logic) ---
        elif is_net_effect_query and len(mentioned_fluxes) >= 2 and mentioned_controls:
            print("DEBUG - Intent Override: Detected a NET EFFECT query.")
            parsed_info['intent'] = 'explain_net_effect'
            
            # <<< --- THE DEFINITIVE FIX IS HERE --- >>>
            # Context-aware selection of the target control.
            # If the user says "cooling demand", we MUST choose 'uQc'.
            target_control_node = None
            if 'cooling' in query_lower or 'cool' in query_lower:
                target_control_node = 'uQc'
            elif 'heating' in query_lower or 'heat' in query_lower:
                target_control_node = 'uQh'
            else:
                # Fallback to the first one found if no specific context is available
                target_control_node = self.get_control_action_base_code(mentioned_controls[0])
                
            print(f"DEBUG (Net Effect Parser): Contextually selected '{target_control_node}' as the target control.")
            
            parsed_info['primary_node'] = target_control_node
            parsed_info['target_nodes'] = mentioned_fluxes

        elif any(kw in query_lower for kw in evaluation_keywords) and mentioned_controls and len(mentioned_base_states) >= 1:
            print("DEBUG - Intent Override: Detected a PERFORMANCE EVALUATION query.")
            # Check if this is a full-day query (no specific time range)
            has_time_range = parsed_info.get('start_time') is not None and parsed_info.get('end_time') is not None
            is_full_day_eval = parsed_info.get('date') and not has_time_range and not user_mentioned_specific_time
            
            if is_full_day_eval:
                # Route to full-day strategy evaluation
                print("DEBUG - Routing to evaluate_day_strategy (full day performance evaluation)")
                parsed_info['intent'] = 'evaluate_day_strategy'
                # For full-day evaluation, we analyze all mentioned variables
                ordered_states = []
                sorted_entities = sorted(all_target_nodes_with_pos, key=lambda x: x[1])
                for node, pos in sorted_entities:
                    base_node = node.replace('_ref', '')
                    if base_node in self.base_state_variables and base_node not in ordered_states:
                        ordered_states.append(base_node)
                parsed_info['target_nodes'] = ordered_states if ordered_states else list(self.base_state_variables)
            else:
                # Route to time-range-specific control strategy evaluation
                parsed_info['intent'] = 'evaluate_control_strategy'
                parsed_info['primary_node'] = self.get_control_action_base_code(mentioned_controls[0])
                ordered_states = []
                sorted_entities = sorted(all_target_nodes_with_pos, key=lambda x: x[1])
                for node, pos in sorted_entities:
                    base_node = node.replace('_ref', '')
                    if base_node in self.base_state_variables and base_node not in ordered_states:
                        ordered_states.append(base_node)
                
                parsed_info['target_nodes'] = ordered_states
        
        # Check for control saturation queries (independent of trade-off keywords)
        elif any(kw in query_lower for kw in ['pegged at maximum', 'pegged at max', 'saturated', 'saturation', 'maximum value', 'max value', 'at maximum', 'at max','100% capacity', 'flat at max']) and mentioned_controls:
            print("DEBUG - Intent Override: Detected CONTROL SATURATION query (control pegged at maximum).")
            parsed_info['intent'] = 'analyze_control_saturation'
            parsed_info['control_node'] = self.get_control_action_base_code(mentioned_controls[0])
        
        # PRIORITY 1c: A true system event (must be explicit to avoid misfiring)
        elif is_causal_query and not is_trade_off_query and ("coordinated" in query_lower or "system-wide" in query_lower or len([n for n in all_target_nodes if '_ref' in n]) >= 2):
            print("DEBUG - Intent Override: Detected a coordinated system event.")
            parsed_info['intent'] = 'explain_system_event'
        
        
        elif parsed_info['intent'] == 'unknown' and is_trade_off_query and len(unique_target_nodes) >= 2:
            print("DEBUG - Intent Override: Detected a specific TRADE-OFF/PRIORITY query.")
            parsed_info['intent'] = 'explain_trade_off'
            parsed_info['target_nodes'] = list(mentioned_base_states)
        
        # PRIORITY N+1: A "control action" query is a causal question that mentions at least one control.
        # CRITICAL: Don't override if already classified as analyze_daily_total
        elif is_causal_query and mentioned_controls and parsed_info['intent'] != 'analyze_daily_total':
            print("DEBUG - Intent Override: Detected a CAUSAL query about control actions.")
            parsed_info['intent'] = 'explain_control_action'

        elif (any(kw in query_lower for kw in trade_off_keywords) and len(set(all_target_nodes)) >= 2) or ("or" in query_lower and len(set(all_target_nodes)) == 2) or (is_oscillation_query and len(set(all_target_nodes)) >= 2):
            # Normalize nodes to base variables (remove _ref, _ieq, etc.) to avoid counting variants as separate variables
            unique_base_variables = set()
            for node in all_target_nodes:
                # Remove common suffixes to get base variable name
                base_var = node.replace('_ref', '').replace('_ieq', '').replace('_eq', '')
                unique_base_variables.add(base_var)
            
            # Only consider it a trade-off if there are truly different base variables
            if len(unique_base_variables) >= 2:
                # Special case: If one of the nodes is a Lagrangian (ends with _ieq), this is control saturation analysis
                lagrangian_nodes = [node for node in all_target_nodes if node.endswith('_ieq')]
                control_nodes = [node for node in all_target_nodes if not node.endswith('_ieq') and node.startswith('u')]
                
                if lagrangian_nodes and control_nodes and len(lagrangian_nodes) == 1 and len(control_nodes) == 1:
                    # This is control saturation: e.g., uC_ieq and uC
                    print("DEBUG - Intent Override: Detected CONTROL SATURATION query (Lagrangian pegged at max).")
                    parsed_info['intent'] = 'analyze_control_saturation'
                    parsed_info['control_node'] = control_nodes[0]
                    parsed_info['lagrangian_node'] = lagrangian_nodes[0]
                else:
                    if parsed_info['intent'] == 'unknown':
                        print("DEBUG - Intent Override: Detected a specific TRADE-OFF/PRIORITY or MULTI-VARIABLE OSCILLATION query.")
                        parsed_info['intent'] = 'explain_trade_off'
                        parsed_info['target_nodes'] = list(set(all_target_nodes))  # Deduplicate

        elif any(kw in query_lower for kw in why_keywords) and mentioned_controls and not is_anomaly_query:
            # Check if this is actually a strategy shift query before defaulting to control action
            co2_off = ('uc' in query_lower and ('shut off' in query_lower or 'shut down' in query_lower)) or ('co2' in query_lower and ('shut off' in query_lower or 'shut down' in query_lower))
            safety_on = ('ventilation' in query_lower and ('ramped up' in query_lower or 'increased' in query_lower or 'ramp up' in query_lower or 'increase' in query_lower)) or ('cooling' in query_lower and ('ramped up' in query_lower or 'increased' in query_lower or 'ramp up' in query_lower))
            strategy_keywords = any(kw in query_lower for kw in ['strategy shift', 'strategy change', 'shifted from', 'switched to', 'sacrificed', 'prioritized safety'])
            
            print(f"DEBUG - Strategy shift check: mentioned_controls={mentioned_controls}, len={len(mentioned_controls)}, co2_off={co2_off}, safety_on={safety_on}, strategy_keywords={strategy_keywords}, sacrifice={'sacrifice' in query_lower}")
            
            # Strategy shift detection: either multiple controls with patterns OR strategy keywords OR CO2+ventilation pattern
            is_strategy_shift = (len(mentioned_controls) >= 2 and ('sacrifice' in query_lower or (co2_off and safety_on))) or strategy_keywords or (co2_off and safety_on)
            
            if is_strategy_shift:
                print("DEBUG - Intent Override: Detected a complex strategy shift query.")
                parsed_info['intent'] = 'explain_strategy_shift'
                # For strategy shift, we need both the growth action (CO2) and safety action (ventilation/cooling)
                parsed_info['target_nodes'] = ['uC']  # CO2 injection that was shut off
                if 'ventilation' in query_lower:
                    parsed_info['primary_node'] = 'uV'  # Safety action that was ramped up
                elif 'cooling' in query_lower:
                    parsed_info['primary_node'] = 'uQc'
                else:
                    parsed_info['primary_node'] = 'uV'  # Default to ventilation
            else:
                # CRITICAL: Don't override if already classified as analyze_daily_total
                if parsed_info['intent'] != 'analyze_daily_total':
                    print("DEBUG - Intent Override: Detected a CAUSAL query about control actions.")
                    parsed_info['intent'] = 'explain_control_action'
                    parsed_info['primary_node'] = self.get_control_action_base_code(mentioned_controls[0])
                    # Include all mentioned control actions as target nodes for multi-variable plotting
                    if len(mentioned_controls) > 1:
                        parsed_info['target_nodes'] = [self.get_control_action_base_code(ctrl) for ctrl in mentioned_controls[1:]]

        # PRIORITY 1: A question about a specific, unusual pattern (anomaly).
        elif parsed_info['intent'] not in ['analyze_disease_risk', 'explain_strategy'] and is_anomaly_query and is_range_query:
            print("DEBUG - Intent Override: Detected a specific anomaly explanation query.")
            parsed_info['intent'] = 'explain_anomaly'
            all_setpoint_nodes = [node for node, data in self.kg.G.nodes(data=True) if data.get('type') == 'Reference']

            # 2. Find the first setpoint that was mentioned in the user's query.
            found_setpoint_node = None
            # We iterate through the nodes in the order they appeared in the query.
            for node in all_target_nodes:
                if node in all_setpoint_nodes:
                    found_setpoint_node = node
                    break # Stop as soon as we find the first one.

            # 3. If we found a mentioned setpoint, make it the primary focus of the query.
            if found_setpoint_node:
                print(f"DEBUG - Anomaly query identified. Prioritizing setpoint: {found_setpoint_node}")
                parsed_info['primary_node'] = found_setpoint_node
                # The variable_code is the same as the node ID for setpoints.
                parsed_info['variable_code'] = found_setpoint_node
                # Get the user-friendly name from our central mapping.
                parsed_info['query_term'] = self.node_to_name.get(found_setpoint_node, found_setpoint_node)
            else:
                print("DEBUG - Anomaly query detected, but no specific setpoint was mentioned.")

        # PRIORITY 2: High-level strategy query (The old Priority 1)
        elif is_strategy_query and len(all_target_nodes) > 1:
            print("DEBUG - Intent Override: Detected a multi-variable strategy query.")
            parsed_info['intent'] = 'explain_strategy'
            # Only clear times if this is a full-day query, not a specific time window
            has_short_time_range = (parsed_info.get('start_time') and parsed_info.get('end_time') and 
                                   (parsed_info['end_time'].hour - parsed_info['start_time'].hour) <= 4)
            if not has_short_time_range:
                parsed_info.update({'time': None, 'start_time': None, 'end_time': None})
            else:
                print(f"DEBUG - Preserving time range for multi-variable strategy: {parsed_info['start_time']} to {parsed_info['end_time']}")
                # Keep existing start_time and end_time, just clear 'time' if present
                parsed_info['time'] = None

        # PRIORITY 1c: A true system event (must be explicit to avoid misfiring)
        # PRIORITY 1b: A relationship/correlation query involving multiple variables
        # CRITICAL: Don't override if already classified as explain_causal_anomaly
        elif (is_relationship_query or ("or" in query_lower and len(unique_target_nodes) >= 2)) and len(unique_target_nodes) >= 2 and parsed_info['intent'] != 'explain_causal_anomaly':
            print("DEBUG - Intent Override: Detected a relationship/correlation or comparative query.")
            parsed_info['intent'] = 'correlation'

            # For relationship queries, use our precise extractor.
            relationship_pair = self._extract_relationship_entities(query)
            if relationship_pair:
                print(f"DEBUG - Precisely extracted relationship pair: {relationship_pair}")
                # OVERRIDE the general target_nodes with our precise pair.
                parsed_info['target_nodes'] = relationship_pair
                # For this intent, a single "primary_node" is meaningless.
                parsed_info['primary_node'] = None
                parsed_info['variable_code'] = None
                parsed_info['query_term'] = None

        # PRIORITY 4: Fallback to standard intents
        if parsed_info['intent'] == 'unknown':
            vis_keywords = ['show', 'plot', 'visualize', 'graph', 'chart']
            pattern_keywords = ['increase', 'decrease', 'go up', 'go down', 'rise', 'drop', 'spike', 'fall', 'change', 'trend', 'pattern']
            
            if is_anomaly_query:
                parsed_info['intent'] = 'explain_anomaly'
            elif is_causal_query and any(kw in query_lower for kw in pattern_keywords):
                parsed_info['intent'] = 'pattern'
            elif any(kw in query_lower for kw in vis_keywords):
                parsed_info['intent'] = 'visualize'
            elif (is_range_query or any(kw in query_lower for kw in pattern_keywords)) and all_target_nodes:
                parsed_info['intent'] = 'pattern'
            else:
                parsed_info['intent'] = 'timestamp'

        print(f"DEBUG - NLP Detected Intent: {parsed_info['intent']}")
        
        # Final fix: If intent is explain_trade_off but all target_nodes are the same control, change to explain_control_action
        if parsed_info['intent'] == 'explain_trade_off' and 'target_nodes' in parsed_info:
            unique_targets = list(set(parsed_info['target_nodes']))
            # Normalize control nodes to base codes
            normalized_unique = []
            for node in unique_targets:
                if node.startswith('u'):
                    normalized_unique.append(self.get_control_action_base_code(node))
                else:
                    normalized_unique.append(node)
            normalized_unique = list(set(normalized_unique))
            if len(normalized_unique) == 1 and normalized_unique[0].startswith('u'):
                print("DEBUG - Intent Override: Single control variable with trade-off intent, changing to explain_control_action")
                parsed_info['intent'] = 'explain_control_action'
                parsed_info['primary_node'] = normalized_unique[0]
                parsed_info['target_nodes'] = [normalized_unique[0]]
        
        return parsed_info

    def load_data(self, data_path):
        from datetime import datetime
        try:
            data = pd.read_csv(data_path, low_memory=False)
            if 'Date' in data.columns:
                # Robust date parsing attempt - try multiple formats
                # First try YYYY-MM-DD format (like 2011-05-01) - this is the format used in hc_ref_filtered_MJJUA_2011.csv
                data['Parsed_Date'] = pd.to_datetime(data['Date'], format='%Y-%m-%d', errors='coerce')
                
                # Check if we got any valid dates
                if data['Parsed_Date'].isna().all():
                    # Try DD-MM-YYYY format (like 24-08-2011)
                    data['Parsed_Date'] = pd.to_datetime(data['Date'], format='%d-%m-%Y', errors='coerce')
                    
                    # If still no valid dates, fall back to automatic parsing
                    if data['Parsed_Date'].isna().all():
                        data['Parsed_Date'] = pd.to_datetime(data['Date'], errors='coerce') # Try inferring format

                data.dropna(subset=['Parsed_Date'], inplace=True) # Drop rows where date couldn't be parsed

                if data.empty:
                    print("Error: No valid dates found after parsing.")
                    return pd.DataFrame()

                # For multi-day datasets, create timestamps based on actual dates
                # Group by date and assign sequential timestamps within each day
                data = data.sort_values('Parsed_Date').reset_index(drop=True)
                
                # Create timestamps by combining parsed date with sequential 5-minute intervals within each day
                timestamps = []
                current_date = None
                day_start_time = None
                
                for idx, row in data.iterrows():
                    row_date = row['Parsed_Date'].date()
                    
                    if current_date != row_date:
                        # New day started
                        current_date = row_date
                        day_start_time = datetime.combine(row_date, datetime.min.time())
                        day_counter = 0
                    else:
                        day_counter += 1
                    
                    # Generate timestamp for this row within the day
                    timestamp = day_start_time + timedelta(minutes=5 * day_counter)
                    timestamps.append(timestamp)
                
                if len(timestamps) == len(data):
                    data['Timestamp'] = timestamps
                    data.set_index('Timestamp', inplace=True)
                    # Ensure the index is DatetimeIndex
                    data.index = pd.to_datetime(data.index)
                    data.sort_index(inplace=True)
                    data.drop(columns=['Date', 'Parsed_Date'], inplace=True) # Remove original/parsed Date columns
                    print(f"Data loaded successfully. Index type: {type(data.index)}, Range: {data.index.min()} to {data.index.max()}")
                    print(f"Total rows: {len(data)}, Date range: {data.index.date.min()} to {data.index.date.max()}")
                    
                    # Convert numeric columns to float
                    numeric_columns = [col for col in data.columns if col not in ['Date', 'Parsed_Date']]
                    for col in numeric_columns:
                        data[col] = pd.to_numeric(data[col], errors='coerce')
                    
                    return data
                else:
                    print(f"Warning: Number of generated timestamps ({len(timestamps)}) does not match number of data rows ({len(data)}).")
                    return pd.DataFrame()
            else:
                print("Error: 'Date' column not found in the CSV file.")
                return pd.DataFrame()
        except FileNotFoundError:
            print(f"Error: CSV file not found at {data_path}")
            return pd.DataFrame()
        except Exception as e:
            print(f"Error loading data: {e}")
            return pd.DataFrame()

    def get_variable_info(self, query_term: str) -> Tuple[str, str, str]:
        """
        (REVISED)
        Helper to get the KG node, data column, and user-friendly name for a query term.
        This now works with the unified `__init__` mappings.

        Args:
            query_term (str): The term from the user's query (e.g., "temperature", "uV").

        Returns:
            Tuple[str, str, str]: (kg_node, data_column, friendly_name)
        """
        kg_node = None
        data_column = None
        friendly_name = None

        # Try to find the canonical KG node code first
        # This handles both friendly names ("temperature") and direct codes ("T")
        kg_node = self.name_to_code.get(query_term.lower(), query_term)

         # --- ADD THIS DEBUG BLOCK ---
        print("\n--- DEBUGGING has_node ---")
        print(f"Candidate KG Node: '{kg_node}'")
        print(f"Is self.kg valid? {self.kg is not None}")
        if self.kg:
            print(f"Is self.kg.G a valid graph? {hasattr(self.kg, 'G') and self.kg.G is not None}")
            if hasattr(self.kg, 'G') and self.kg.G:
                print(f"Number of nodes in graph: {len(self.kg.G.nodes())}")
                print(f"Does the graph have the node '{kg_node}'? {self.kg.G.has_node(kg_node)}")
        print("--------------------------\n")
        # Now that we have a candidate for the KG node, find its corresponding column and friendly name
        if self.kg.G.has_node(kg_node):
            data_column = self.node_to_column.get(kg_node)
            friendly_name = self.node_to_name.get(kg_node, kg_node)
        else:
            # If the initial term wasn't found, it might be a direct KG node that isn't in name_to_code
            if self.kg.G.has_node(query_term):
                kg_node = query_term
                data_column = self.node_to_column.get(kg_node)
                friendly_name = self.node_to_name.get(kg_node, kg_node)

        # If we couldn't find a mapping, the inputs might be invalid
        if not data_column:
            print(f"WARN - get_variable_info: Could not find a data column for the term '{query_term}' (resolved to node '{kg_node}').")
            # Return None for all to signal failure
            return None, None, None

        return kg_node, data_column, friendly_name

    def explain_pattern(self, variable: str, start_time: str = None, end_time: str = None, 
        
                    date: date = None, pattern_type: str = None) -> Dict[str, Any]:
        """
        (REVISED FOR ROBUSTNESS)
        Orchestrates a full analysis of a data pattern over a given period.
        """
        from datetime import datetime
        print(f"DEBUG - explain_pattern: Analyzing '{variable}' on {date} from {start_time} to {end_time}")

        # --- 1. Resolve Inputs and Slice Data ---
        # First, resolve the user-facing variable to our canonical KG node and column name
        kg_node, data_col, friendly_name = self.get_variable_info(variable)
        if not kg_node or not data_col:
            return {"error": f"Could not resolve variable '{variable}' to a valid data column."}

        try:
            if start_time and end_time:
                start_dt = datetime.combine(date, datetime.strptime(start_time, '%H:%M').time())
                end_dt = datetime.combine(date, datetime.strptime(end_time, '%H:%M').time())
                data_slice = self.data.loc[start_dt:end_dt].copy()
            else:
                data_slice = self.data[self.data.index.date == date].copy()

            if data_slice.empty:
                return {"error": f"No data available for the specified period."}
        except Exception as e:
            return {"error": f"Error slicing data for the specified period: {e}"}

        # --- 2. Detect Basic Pattern and Events ---
        # The detect_patterns function should operate on the specific data column
        patterns = self.detect_patterns(data_col, start_time,end_time,date) # Assume detect_patterns takes a series/slice
        
        if "error" in patterns:
            return {"error": patterns["error"]}
        
        # Store basic info about the overall pattern
        overall_pattern_summary = {
            'variable_name': friendly_name,
            'period_start': data_slice.index.min().strftime('%H:%M'),
            'period_end': data_slice.index.max().strftime('%H:%M'),
            'overall_change': patterns.get('change'),
            'mean_value': patterns.get('mean'),
            'min_value': patterns.get('min'),
            'max_value': patterns.get('max'),
        }

        # --- 3. Deep-Dive Analysis of Notable Events ---
        notable_event_analysis_list = []
        sudden_changes = patterns.get('sudden_changes', [])
        for event in sudden_changes:
            event_ts_str = event.get('timestamp')
            if not event_ts_str: continue

            event_datetime = datetime.combine(date, datetime.strptime(event_ts_str, '%H:%M').time())
            
            # A. Get the flux context for this specific event
            flux_context = self.analyze_flux_context_for_pattern(kg_node, event_datetime)
            
            # B. Get the local correlations for this event
            local_correlations = self.analyze_correlations_around_timestamp(data_col, event_datetime)
            
            # Combine all analysis for this single event
            notable_event_analysis_list.append({
                'basic_event_info': event,
                'flux_context': flux_context,
                'local_correlations': local_correlations
            })

        # Update dialogue state with the detected events
        self.dialogue_state['last_detected_events'] = [
            {**evt, 'variable': kg_node, 'date': date} for evt in sudden_changes
        ]
        print(f"DEBUG - Stored {len(sudden_changes)} detected events in dialogue state.")

        # --- 4. Overall Analysis for the Entire Period ---
        # We can perform a broader correlation analysis on the whole slice
        overall_correlations = self.analyze_correlations_on_slice(data_col, data_slice)

        # --- 5. Compile the Final Structured Result ---
        final_structured_result = {
            'overall_pattern_summary': overall_pattern_summary,
            'notable_events_analysis': notable_event_analysis_list,
            'overall_correlations_for_period': overall_correlations
        }

        return final_structured_result


    def analyze_correlations_on_slice(self, target_variable_col: str, data_slice: pd.DataFrame) -> Dict[str, Any]:
        """
        (NEW HELPER) Analyzes correlations for a target variable against all others
        within a given DataFrame slice.
        """
        # This function is very similar to analyze_correlations_around_timestamp,
        # but operates on a pre-sliced DataFrame.
        correlations = {}
        if data_slice.empty or target_variable_col not in data_slice.columns:
            return {'error': 'Invalid data slice or target column for correlation.'}

        target_series = data_slice[target_variable_col]
        if target_series.notna().sum() < 2:
            return {'summary': 'Not enough valid data for target variable.'}

        for col in data_slice.columns:
            if col == target_variable_col: continue
            
            comparison_series = data_slice[col]
            if pd.api.types.is_numeric_dtype(comparison_series.dtype):
                correlation_value = target_series.corr(comparison_series)
                if pd.notna(correlation_value):
                    correlations[col] = correlation_value
                    
        if not correlations:
            return {'summary': 'No significant correlations found.'}

        sorted_correlations = sorted(correlations.items(), key=lambda item: abs(item[1]), reverse=True)
        top_correlations = {}
        for col, val in sorted_correlations[:5]:
            node_id = self.column_to_node.get(col, col)
            friendly_name = self.node_to_name.get(node_id, col)
            top_correlations[friendly_name] = val
            
        return {'top_correlations': top_correlations}

    def analyze_flux_context_for_pattern(self, variable_code: str, target_datetime: datetime) -> Dict[str, Any]:
        """
        (REVISED to fix .get() error)
        Analyzes the active fluxes influencing a state variable at a specific datetime.
        """
        flux_context = {'positive_fluxes': {}, 'negative_fluxes': {}}
        
        if variable_code not in self.base_state_variables:
            return {'error': f"'{variable_code}' is not a base state variable; flux analysis is not applicable."}

        try:
            nearest_idx = self.data.index.get_indexer([target_datetime], method='nearest')[0]
            data_row = self.data.iloc[nearest_idx] # data_row is now a pandas Series
        except Exception as e:
            return {'error': f"Could not retrieve data for flux analysis at {target_datetime}: {e}"}

        for source_node, _, edge_data in self.kg.G.in_edges(variable_code, data=True):
            flux_column = self.node_to_column.get(source_node)
            
            if flux_column and ('phi_' in flux_column.lower() or '_flow' in flux_column.lower()):
                # --- THE FIX IS HERE ---
                # Use series['key'] or series.get('key') for pandas Series, not dict.get()
                flux_value = data_row.get(flux_column) # Using .get() on a Series is safe and returns None if key is missing
                
                if pd.notna(flux_value) and abs(flux_value) > 1e-6:
                    flux_name = self.node_to_name.get(source_node, source_node)
                    if flux_value > 0:
                        flux_context['positive_fluxes'][flux_name] = flux_value
                    else:
                        flux_context['negative_fluxes'][flux_name] = flux_value
        
        return flux_context

    def format_pattern_context_for_llm(self, analysis_results: Dict[str, Any]) -> str:
        """
        (CORRECTED VERSION 2) Formats the structured analysis results from `explain_pattern`
        into a clear and comprehensive text block for the LLM. This version fixes the
        'unsupported format string' error by correctly parsing the nested correlation dictionary.
        """
        # --- 0. Initial Validation ---
        if not isinstance(analysis_results, dict) or 'error' in analysis_results:
            return f"An error occurred during pattern analysis: {analysis_results.get('error', 'Unknown error')}"

        # --- 1. Safely Extract Top-Level Data Structures ---
        summary = analysis_results.get('overall_pattern_summary', {})
        events = analysis_results.get('notable_events_analysis', [])
        overall_corrs = analysis_results.get('overall_correlations_for_period', {})

        context_parts = []

        # --- 2. Format the Overall Summary Section ---
        var_name = summary.get('variable_name', 'the variable')
        start, end = summary.get('period_start', 'N/A'), summary.get('period_end', 'N/A')
        
        context_parts.append(f"--- Analysis Summary for '{var_name}' between {start} and {end} ---")
        
        mean_val = summary.get('mean_value')
        min_val, max_val = summary.get('min_value'), summary.get('max_value')
        if all(v is not None for v in [mean_val, min_val, max_val]):
            summary_line = (
                f"Over this period, the value averaged {mean_val:.2f}, "
                f"ranging from a minimum of {min_val:.2f} to a maximum of {max_val:.2f}."
            )
            context_parts.append(summary_line)
        context_parts.append("")

        # --- 3. Format the Detailed Events Section ---
        if events:
            context_parts.append("--- Detailed Analysis of Notable Events ---")
            for i, event_analysis in enumerate(events):
                basic_info = event_analysis.get('basic_event_info', {})
                ts = basic_info.get('timestamp', 'Unknown Time')
                change_type = basic_info.get('change_type', 'change').replace('_', ' ')
                magnitude = basic_info.get('change_magnitude', 0)
                
                context_parts.append(f"\nEvent #{i+1} at ~{ts}: A {change_type} of {magnitude:.2f} was detected.")
                
                flux_context = event_analysis.get('flux_context', {})
                if flux_context and flux_context.get('significant_changes'):
                    context_parts.append("  Potential Cause (based on Flux Analysis):")
                    for change in flux_context['significant_changes']:
                        context_parts.append(f"    - {change.get('description', 'N/A')}")
                
                local_corrs = event_analysis.get('local_correlations', {})
                if local_corrs and local_corrs.get('top_correlations'):
                    top_corrs = local_corrs['top_correlations']
                    corr_strings = [f"{name} ({val:.2f})" for name, val in top_corrs.items()]
                    if corr_strings:
                        context_parts.append(f"  Locally Correlated With: {', '.join(corr_strings)}")
        else:
            context_parts.append("No specific sudden changes or notable events were detected in this time range.")

        # --- 4. Format the Overall Correlations Section [FIXED] ---
        context_parts.append("\n--- Overall Correlations for the Entire Period ---")
        if overall_corrs and isinstance(overall_corrs, dict):
            # <<< FIX #1: The sorting key now correctly accesses the nested correlation value.
            sorted_correlations = sorted(
                overall_corrs.items(),
                key=lambda item: abs(item[1].get('max_lag_correlation', 0)) if isinstance(item[1], dict) else 0,
                reverse=True
            )
            
            top_corr_strings = []
            for col_name, analysis_dict in sorted_correlations[:4]:
                # Ensure the value is a dictionary before trying to access its keys
                if not isinstance(analysis_dict, dict):
                    continue
                
                # <<< FIX #2: Extract the numeric correlation value from the inner dictionary.
                corr_value = analysis_dict.get('max_lag_correlation')
                
                # Add a safety check in case the value is missing
                if corr_value is None:
                    continue

                node_id = self.column_to_node.get(col_name, col_name)
                friendly_name = self.node_to_name.get(node_id, col_name)
                
                # Now, the formatting is applied to the numeric `corr_value`, not the dictionary.
                top_corr_strings.append(f"{friendly_name} ({corr_value:.2f})")
            
            if top_corr_strings:
                context_parts.append("The strongest relationships were found with: " + ", ".join(top_corr_strings) + ".")
            else:
                context_parts.append("No significant correlations were found with other variables in this period.")
        else:
            context_parts.append("Overall correlation analysis was not performed or yielded no results.")

        # --- 5. Combine all parts into a final string ---
        return "\n".join(context_parts)

    

    def detect_patterns(self,
        
                        variable: str, start_time=None, end_time=None, date=None) -> Dict[str, Any]:
        print(f"Input parameters: variable={variable}, start_time={start_time}, end_time={end_time}, date={date}")
        from datetime import datetime
        # --- Get Column Name ---
        column = self.node_to_column.get(variable)
        if column is None:
            ref_node = f"{variable}_ref"
            column = self.node_to_column.get(ref_node)
        if column is None:
            if hasattr(self, 'data') and variable in self.data.columns:
                column = variable
            else:
                return {"error": f"Could not find data column for variable '{variable}'"}
        print(f"DEBUG - detect_patterns: Resolved column name: '{column}'")

        # --- Pre-checks ---
        if not hasattr(self, 'data') or self.data.empty:
            print("ERROR - detect_patterns: self.data is empty or missing.")
            return {"error": "Dataframe is empty. Cannot detect patterns."}

        if not isinstance(self.data.index, pd.DatetimeIndex):
            print("ERROR - detect_patterns: self.data does not have a DatetimeIndex.")
            return {"error": "Dataframe must have a DatetimeIndex for time-based filtering."}

        # --- Check if Date is provided (moved earlier) ---
        if not date:
            print("ERROR - detect_patterns: Date is required for pattern analysis.")
            return {"error": "Date is required for pattern analysis."}
        # --- End Date Check ---


        # --- Data Filtering using DatetimeIndex ---
        start_dt = None
        end_dt = None
        period_data = pd.DataFrame() # Initialize empty

        # Combine date with optional start/end times (date is guaranteed to exist here)
        start_date = date
        end_date = date # Analysis is typically within a single day

        try:
            # Construct full datetime objects for filtering
            if start_time:
                start_hour, start_min = map(int, start_time.split(':'))
                start_dt = datetime.combine(start_date, time(start_hour, start_min))
            else:
                # Default to start of the day if no start_time
                start_dt = datetime.combine(start_date, datetime.min.time())

            if end_time:
                end_hour, end_min = map(int, end_time.split(':'))
                end_dt = datetime.combine(end_date, time(end_hour, end_min))
                # Adjust end_dt slightly past the minute mark to ensure inclusion
                # If data is at HH:MM:00, loc slice [HH:MM:00 : HH:MM:00] includes only that point.
                # Extend slightly into the interval to capture data ending at HH:MM
                end_dt = end_dt + timedelta(minutes=4, seconds=59) # Adjust as per data timestamping (e.g., if 5min intervals START at HH:MM)
            else:
                # Default to end of the day if no end_time
                end_dt = datetime.combine(end_date, datetime.max.time())

            print(f"DEBUG - detect_patterns: Filtering data using DatetimeIndex from {start_dt} to {end_dt}")

            # Filter using .loc with the datetime range
            period_data = self.data.loc[start_dt:end_dt].copy() # Use .copy()

            print(f"DEBUG - detect_patterns: After DatetimeIndex filtering: {len(period_data)} rows. Index range: {period_data.index.min()} to {period_data.index.max()}")

        except ValueError:
            print("Error parsing start or end time. Please use HH:MM format.")
            return {"error": "Invalid time format"}
        except Exception as e:
            print(f"ERROR - detect_patterns: Unexpected error during DatetimeIndex filtering: {e}")
            traceback.print_exc()
            return {"error": f"An unexpected error occurred during data filtering: {e}"}

        # --- Check if Filtering Resulted in Empty Data ---
        if period_data.empty: # Check AFTER filtering attempt
            if start_dt and end_dt and not self.data.index.empty:
                if start_dt > self.data.index.max() or end_dt < self.data.index.min():
                    return {"error": f"Specified period ({start_dt.strftime('%Y-%m-%d %H:%M')} to {end_dt.strftime('%Y-%m-%d %H:%M')}) is outside the data's date range ({self.data.index.min().strftime('%Y-%m-%d %H:%M')} to {self.data.index.max().strftime('%Y-%m-%d %H:%M')})."}
                else:
                    # Range overlaps, but slice is empty (gaps or timestamp mismatch)
                    return {"error": f"No data points found within the specified period ({start_dt.strftime('%Y-%m-%d %H:%M')} to {end_dt.strftime('%Y-%m-%d %H:%M')}). There might be gaps or timestamp misalignment."}
            else: # Catch-all if index was empty to begin with or dt objects are None
                return {"error": "No data found for the specified period."}

        # --- Calculate Patterns (Only if period_data is not empty) ---
        print(f"DEBUG - detect_patterns: Calculating patterns for '{column}' on {len(period_data)} data points.") # Added print
        try:
            patterns = {
                'variable': variable,
                'column': column,
                'data_points': len(period_data),
                'mean': period_data[column].mean(),
                'std': period_data[column].std(),
                'min': period_data[column].min(),
                'max': period_data[column].max(),
                'start_value': period_data[column].iloc[0] if not period_data.empty else None,
                'end_value': period_data[column].iloc[-1] if not period_data.empty else None,
            }
            # Calculate change safely
            if len(period_data) > 1:
                start_val = patterns['start_value']
                end_val = patterns['end_value']
                patterns['change'] = end_val - start_val if start_val is not None and end_val is not None else 0
                # Calculate percent change safely
                if start_val is not None and start_val != 0:
                    patterns['percent_change'] = ((end_val / start_val) - 1) * 100 if end_val is not None else 0
                elif start_val == 0 and end_val is not None and end_val != 0:
                    patterns['percent_change'] = float('inf') # Or handle as appropriate
                else:
                    patterns['percent_change'] = 0
            else:
                patterns['change'] = 0
                patterns['percent_change'] = 0

        except KeyError:
            print(f"ERROR - detect_patterns: Column '{column}' not found in period_data after filtering.")
            return {"error": f"Internal error: Column '{column}' disappeared after filtering."}
        except Exception as e:
            print(f"ERROR - detect_patterns: Error during basic pattern calculation: {e}")
            traceback.print_exc()
            return {"error": f"Error calculating basic patterns: {e}"}


        # --- Detect Sudden Changes ---
        print(f"DEBUG - detect_patterns: Detecting sudden changes for '{column}'.") # Added print
        sudden_changes = []
        try:
            if len(period_data) > 1:
                raw_diffs = period_data[column].diff()
                abs_diffs = raw_diffs.abs()
                # Ensure std_diff is calculated only on valid numbers and is not zero
                valid_abs_diffs = abs_diffs.dropna()
                if not valid_abs_diffs.empty:
                    std_diff = valid_abs_diffs.std()
                    if std_diff > 0: # Avoid threshold being zero if all diffs are same/zero
                        threshold = 2 * std_diff
                        # Filter indices where absolute difference exceeds the threshold
                        sudden_change_indices = abs_diffs[abs_diffs > threshold].index

                        # Iterate through the indices where a sudden change occurred
                        for original_index in sudden_change_indices:
                            # Get the raw (signed) change and the absolute magnitude using the original index
                            signed_change_value = raw_diffs.loc[original_index]
                            change_magnitude_value = abs_diffs.loc[original_index]

                            # Find the row and previous row in the original period_data based on the index
                            try:
                                row_in_period = period_data.loc[original_index]
                                current_index_pos = period_data.index.get_loc(original_index)
                                value_after = row_in_period[column]
                                value_before = None

                                if current_index_pos > 0:
                                    prev_index = period_data.index[current_index_pos - 1]
                                    prev_row_in_period = period_data.loc[prev_index]
                                    value_before = prev_row_in_period[column]

                                # Determine the time string for the detected change's timestamp
                                time_str = original_index.strftime('%H:%M') if isinstance(original_index, pd.Timestamp) else str(original_index)

                                # Append the sudden change details, including the signed change
                                sudden_changes.append({
                                    'index': original_index, # Store original index for potential later use
                                    'timestamp': time_str,
                                    'value_before': value_before,
                                    'value_after': value_after,
                                    'change_magnitude': change_magnitude_value, # Keep absolute magnitude
                                    'signed_change': signed_change_value # Add signed change
                                })
                            except KeyError as e_inner:
                                print(f"Warning: KeyError accessing data for sudden change at index {original_index}: {e_inner}. Skipping.")
                                continue
                            except Exception as e_inner:
                                print(f"Warning: Unexpected error processing sudden change at index {original_index}: {e_inner}. Skipping.")
                                traceback.print_exc()
                                continue
                    else:
                        print("DEBUG - detect_patterns: Standard deviation of differences is zero. No sudden changes detected.")
                else:
                    print("DEBUG - detect_patterns: No valid differences found to calculate standard deviation.")

        except Exception as e:
            print(f"ERROR - detect_patterns: Error during sudden change detection: {e}")
            traceback.print_exc()
            # Decide if you want to return error or just empty changes
            patterns['sudden_changes'] = [] # Continue without sudden changes if detection failed
            patterns['error_sudden_change'] = f"Error during sudden change detection: {e}" # Optionally note the error

        patterns['sudden_changes'] = sudden_changes
        print(f"DEBUG - detect_patterns: Found {len(sudden_changes)} sudden changes.") # Added print
        return patterns

    def analyze_correlations(self, target_var: str, related_vars: list[str]=None,
                             lag_range=range(0, 12),
                             time_period=None) -> dict[str, any]:
        """Analyze correlations between variables, including time-lagged correlations"""
        print(f"DEBUG - analyze_correlations: START for target '{target_var}'. Object ID: {id(self)}") # Check object ID
        print("DEBUG - analyze_correlations: self.data columns AT START:", self.data.columns.tolist() if hasattr(self, 'data') else "self.data attribute not found.")
        print("DEBUG - analyze_correlations: self.data shape AT START:", self.data.shape if hasattr(self, 'data') and not self.data.empty else "self.data is empty or missing.")

        # Get column name
        target_col = self.node_to_column.get(target_var) or self.node_to_column.get(f"{target_var}_ref")

        if not related_vars:
            # Find related variables from knowledge graph, prioritize control actions
            related_vars = []
            target_node = target_var if self.kg.G.has_node(target_var) else f"{target_var}_ref"

            if self.kg.G.has_node(target_node):
                # Get incoming edges (factors affecting the target)
                potential_related = list(self.kg.G.predecessors(target_node))
                control_actions = [var for var in potential_related if self.kg.is_control_action(var)]
                other_related = [var for var in potential_related if not self.kg.is_control_action(var)]
                related_vars = control_actions + other_related

        if not target_col or target_col not in self.data.columns:
            return {"error": f"Target variable {target_var} not found in dataset"}

        if not related_vars:
            return {"error": "No related variables found in dataset"}

        # Filter by time period if specified
        if time_period:
            # Implementation depends on how time_period is specified
            data_filtered = self.data # Placeholder
        else:
            data_filtered = self.data

        results = {
            'target_variable': target_var,
            'target_column': target_col,
            'lag_analysis': {}
        }

        for related_var in related_vars:
            related_col = self.node_to_column.get(related_var) or self.node_to_column.get(f"{related_var}_ref")

            if not related_col or related_col not in self.data.columns:
                print(f"Warning: Related variable {related_var} (column {related_col}) not found in dataset.")
                continue

            lag_corrs = []
            # Calculate standard correlation (lag 0)
            standard_corr = data_filtered[target_col].corr(data_filtered[related_col])
            lag_corrs.append(standard_corr)

            for lag in lag_range[1:]: # Start from lag 1
                lag_corr = data_filtered[target_col].corr(data_filtered[related_col].shift(lag))
                lag_corrs.append(lag_corr)

            lag_corrs_array = np.array(lag_corrs)

            if lag_corrs_array.size > 0:
                max_lag_idx = np.argmax(np.abs(lag_corrs_array))
                max_lag = lag_range[max_lag_idx]
                max_lag_correlation = lag_corrs[max_lag_idx]
                direction = 'positive' if max_lag_correlation > 0 else 'negative'
            else:
                max_lag = 0
                max_lag_correlation = 0
                direction = 'neutral'

            results['lag_analysis'][related_var] = {
                'column': related_col,
                'lag_correlations': dict(zip(lag_range, lag_corrs)),
                'max_lag': max_lag,
                'max_lag_correlation': max_lag_correlation,
                'direction': direction
            }

        return results

    def get_pcmci_node_for_control(self, control_node: str) -> str:
        """
        Translates any control-related node name (e.g., 'uV_ref', 'uV')
        to the canonical node name used in the PCMCI analysis.

        Based on the provided mappings, the PCMCI analysis uses the base codes
        (e.g., 'uV') because they are linked to the data columns.
        """
        # If the input is a reference node (e.g., 'uV_ref'), map it back to its base ('uV')
        if control_node in self.ref_node_to_base_code:
            return self.ref_node_to_base_code[control_node]
        
        # If the input is already the base code or something else, return it as is.
        # The subsequent PCMCI lookup will either find it or fail gracefully.
        return control_node
    
    def precompute_pcmci(self, data_file_path: str, cache_file_path: str = "./pcmci_cache.pkl"):
        """
        Precomputes PCMCI results by loading from a cache if available and valid,
        otherwise runs the full computation and saves the new results to the cache.

        Args:
            data_file_path (str): The path to the source data file (e.g., 'my_data.csv').
                                This is used to validate the cache's freshness.
            cache_file_path (str): The path where the cache file will be stored/loaded.
        """
        # 1. Validate that the source data file exists
        if not os.path.exists(data_file_path):
            print(f"Error: Source data file not found at '{data_file_path}'. Cannot precompute PCMCI.")
            self.pcmci_results = None
            return

        # 2. Generate a "fingerprint" of the current data file to check for changes
        try:
            current_fingerprint = {
                'path': os.path.abspath(data_file_path),
                'size': os.path.getsize(data_file_path),
                'modified_time': os.path.getmtime(data_file_path)
            }
        except OSError as e:
            print(f"Error: Could not access metadata for data file: {e}. Aborting PCMCI precomputation.")
            return

        # 3. Check for a valid cache file
        if os.path.exists(cache_file_path):
            try:
                print(f"INFO: Found cache file at '{cache_file_path}'. Validating...")
                with open(cache_file_path, 'rb') as f:
                    cached_data = pickle.load(f)
                
                cached_fingerprint = cached_data.get('fingerprint')
                
                # 4. If fingerprints match, load from cache and exit
                if cached_fingerprint == current_fingerprint:
                    print("INFO: Cache is valid. Loading PCMCI results from file.")
                    self.pcmci_results = cached_data.get('results')
                    # Restore the helper mapping as well
                    self.pcmci_var_to_idx = {name: idx for idx, name in enumerate(self.pcmci_results['var_names'])}
                    print("PCMCI precomputation complete (loaded from cache).")
                    return # Success! No need to run the expensive computation.
                else:
                    print("INFO: Cache is stale (source data has changed). Re-running PCMCI.")

            except (pickle.UnpicklingError, EOFError, KeyError, AttributeError) as e:
                print(f"Warning: Cache file '{cache_file_path}' is corrupted or invalid: {e}. Re-running PCMCI.")

        # 5. If no valid cache, run the full PCMCI computation
        print("INFO: Running full PCMCI computation. This may take some time...")
        self.run_pcmci(tau_max=48, alpha=0.05) # Call the detailed worker function
        
        # 6. After running, save the new results to the cache file
        if self.pcmci_results:
            data_to_cache = {
                'fingerprint': current_fingerprint,
                'results': self.pcmci_results
            }
            try:
                with open(cache_file_path, 'wb') as f:
                    pickle.dump(data_to_cache, f)
                print(f"INFO: New PCMCI results saved to cache at '{cache_file_path}'.")
            except IOError as e:
                print(f"Error: Could not save PCMCI cache to '{cache_file_path}': {e}")
                
        print("PCMCI precomputation complete (ran full computation).")


    def run_pcmci(self, tau_max=48, alpha=0.05, pc_alpha=None, seed=None, independence_test='ParCorr'):
        """
        (REVISED to fix ValueError in KG update)
        Runs the core PCMCI algorithm and safely updates the knowledge graph.
        """
        if not hasattr(self, 'data') or self.data.empty:
            print("Error: No data loaded to run PCMCI.")
            self.pcmci_results = None
            return

        # Check if tigramite is available
        if pp is None:
            print("Warning: Tigramite not available. Skipping PCMCI computation.")
            print("The system will work without causal analysis capabilities.")
            self.pcmci_results = None
            return

        # Identify variables that exist in both your knowledge graph and your data columns
        pcmci_vars = [node for node in self.kg.G.nodes() if self.node_to_column.get(node) in self.data.columns]
        if len(pcmci_vars) < 2:
            print("Warning: Not enough overlapping variables to run PCMCI.")
            self.pcmci_results = None
            return
        print(f"DEBUG: Running PCMCI on {len(pcmci_vars)} variables.")

        # Prepare data for Tigramite
        relevant_columns = [self.node_to_column[node] for node in pcmci_vars]
        pcmci_data = self.data[relevant_columns].copy()
        
        # Handle NaN values in the data
        print(f"DEBUG: Checking for NaN values in PCMCI data...")
        nan_counts = pcmci_data.isna().sum()
        total_nans = nan_counts.sum()
        
        if total_nans > 0:
            print(f"DEBUG: Found {total_nans} NaN values across columns:")
            for col, count in nan_counts.items():
                if count > 0:
                    print(f"  {col}: {count} NaNs")
            
            # Option 1: Remove rows with any NaN values
            print(f"DEBUG: Removing rows with NaN values...")
            original_shape = pcmci_data.shape
            pcmci_data = pcmci_data.dropna()
            removed_rows = original_shape[0] - pcmci_data.shape[0]
            print(f"DEBUG: Removed {removed_rows} rows with NaN values. Data shape: {original_shape} -> {pcmci_data.shape}")
            
            if pcmci_data.empty:
                print("ERROR: All data removed due to NaN values. Cannot run PCMCI.")
                self.pcmci_results = None
                return
        
        data_for_pcmci = pcmci_data.values
        var_names = pcmci_vars
        tigramite_data = pp.DataFrame(data_for_pcmci, var_names=var_names)

        # Initialize the independence test
        if independence_test == 'GPDC': indtest = GPDC()
        elif independence_test == 'ParCorr': indtest = ParCorr(significance='analytic')
        elif independence_test == 'CMIknn': indtest = CMIknn()
        else: raise ValueError(f"Unknown independence test: {independence_test}")

        # Initialize and run the PCMCI algorithm
        pcmci = PCMCI(dataframe=tigramite_data, cond_ind_test=indtest, verbosity=0)
        results = pcmci.run_pcmci(tau_max=tau_max, pc_alpha=pc_alpha, alpha_level=alpha)
        
        # Store all results and helper attributes
        self.pcmci_results = results
        self.pcmci_results['alpha'] = alpha
        self.pcmci_results['var_names'] = var_names
        self.pcmci_results['tau_max'] = tau_max
        self.pcmci_var_to_idx = {name: idx for idx, name in enumerate(var_names)}
        
        # --- Update the knowledge graph with discovered causal links [CORRECTED LOGIC] ---
        print("INFO: Updating Knowledge Graph with discovered PCMCI links...")
        causal_graph = results['graph']
        added_edges_count = 0
        
        for lag in range(1, tau_max + 1):
            if lag >= len(causal_graph):
                continue
                
            # The graph matrix is (target, source) or (j, i)
            for j, target_node in enumerate(var_names):
                for i, source_node in enumerate(var_names):
                    # Get the link information from the matrix
                    link_info = causal_graph[lag][j, i]
                    
                    # --- THE ROBUST FIX ---
                    # Use the same 'in str()' trick to safely check for the link
                    if '-->' in str(link_info):
                        if not self.kg.G.has_edge(source_node, target_node):
                            self.kg.G.add_edge(
                                source_node, 
                                target_node, 
                                relationship='causal_PCMCI', 
                                lag=lag, 
                                description=f'PCMCI suggests {source_node} causes {target_node} with a lag of {lag*5} minutes.'
                            )
                            added_edges_count += 1

        print(f"INFO: Added {added_edges_count} new causal edges to the Knowledge Graph from PCMCI results.")

    def enhance_event_analysis(self, variable: str, sudden_change_time: str,
        
                               date=None) -> Dict[str, Any]:
        """Explore KG neighborhood and preceding events around a sudden change."""
        print(f"DEBUG - enhance_event_analysis: START. Analyzing sudden change for {variable} at {sudden_change_time} on {date}")
        print(f"DEBUG - enhance_event_analysis: Object ID: {id(self)}") # Print ID of the instance
        print("DEBUG - enhance_event_analysis: self.data columns at function start:", self.data.columns.tolist() if hasattr(self, 'data') else "self.data attribute not found.") # Check columns at the very beginning
        print("DEBUG - enhance_event_analysis: self.data shape at function start:", self.data.shape if hasattr(self, 'data') and not self.data.empty else "self.data is empty or missing.")
        from datetime import datetime
        current_data_series = None
        preceding_data_series = None
        actual_event_timestamp = None # The actual timestamp of the data point found
        column = None # Initialize column
        kg_relationships = {'incoming_edges': [], 'outgoing_edges': []}
        preceding_values: Dict[str, Any] = {} # Initialize preceding_values as an empty dictionary
        related_nodes_list: List[str] = [] # Initialize related_nodes_list as an empty list
        local_correlations: Dict[str, Any] = {} #

        # Get column name from node name
        # Ensure node_to_column exists and has the mapping
        if not hasattr(self, 'node_to_column') or self.node_to_column is None:
            print("ERROR - enhance_event_analysis: self.node_to_column mapping is not initialized.")
            return {'error': "Internal error: Node to column mapping is missing."}

        column = self.node_to_column.get(variable) or self.node_to_column.get(f"{variable}_ref")

        if not column:
            print(f"DEBUG - enhance_event_analysis: Could not map variable '{variable}' to column using self.node_to_column.get('{variable}') or .get('{variable}_ref').")
            # Check if variable itself is a column name as a fallback (less robust)
            if variable in self.data.columns:
                column = variable
                print(f"DEBUG - enhance_event_analysis: Falling back, '{variable}' found directly as column name.")
            else:
                print(f"DEBUG - enhance_event_analysis: '{variable}' not found directly as a column name either.")
                return {'error': f"Could not find data column for variable '{variable}'"}

        print(f"DEBUG - enhance_event_analysis: Resolved column name: '{column}'")


        # --- Data Lookup (similar to analyze_context_at_timestamp fix) ---
        if not hasattr(self, 'data') or self.data.empty:
            print("ERROR - enhance_event_analysis: self.data is empty or missing before data lookup.")
            return {'error': "Dataframe is empty. Cannot analyze context."} # Ensure this error is returned if data is empty

        if not isinstance(self.data.index, pd.DatetimeIndex):
            print("ERROR - enhance_event_analysis: self.data does not have a DatetimeIndex.")
            return {"error": "Dataframe must have a DatetimeIndex for time-based lookup."}

        if date is None:
            print("ERROR - enhance_event_analysis: Date input is None.")
            return {"error": "Analysis requires a specific date."}

        current_data_series = None
        preceding_data_series = None
        actual_event_timestamp = None # The actual timestamp of the data point found

        try:
            # Construct the exact datetime object for the sudden change time
            target_time_obj = datetime.strptime(sudden_change_time, '%H:%M').time()
            target_datetime_obj = datetime.combine(date, target_time_obj)
            print(f"DEBUG - enhance_event_analysis: Target Datetime (parsed): {target_datetime_obj}")

            # Find the location of the data point closest to the target datetime.
            # get_loc with method='nearest' returns an integer position by default if the key is not in the index.
            # It returns the label or a list/slice if the key is an exact match or the index has duplicates.
            loc_result = self.data.index.get_loc(target_datetime_obj)
            print(f"DEBUG - enhance_event_analysis: Result of get_loc({target_datetime_obj}, method='nearest'): {loc_result}")

            # Handle the different potential return types of get_loc
            if isinstance(loc_result, int):
                # Single integer position returned (most common case for 'nearest' lookup)
                # Use .iloc for integer-position-based indexing
                current_data_series = self.data.iloc[loc_result]
                actual_event_timestamp = self.data.index[loc_result] # Get the actual Timestamp label using the position
                print(f"DEBUG - enhance_event_analysis: Retrieved data using .iloc[{loc_result}]. Actual timestamp: {actual_event_timestamp}.")

            # The following elif/else cases handle scenarios less common with 'nearest' method
            # but included for robustness if get_loc behavior varies or index is complex.
            elif isinstance(loc_result, slice):
                # This could happen if multiple nearest points are found within tolerance (though tolerance is not explicitly used with get_loc here)
                # or if the index has duplicates.
                print(f"Warning: get_loc with nearest method returned a slice: {loc_result}. Taking the first row.")
                current_data_df = self.data.iloc[loc_result]
                if not current_data_df.empty:
                    current_data_series = current_data_df.iloc[0]
                    actual_event_timestamp = current_data_df.index[0]
                    print(f"DEBUG - enhance_event_analysis: Retrieved data using .iloc slice, took first row. Actual timestamp: {actual_event_timestamp}.")
                else:
                    print(f"ERROR - enhance_event_analysis: get_loc with nearest method returned an empty slice.")
                    return {"error": f"Could not find data point near time {sudden_change_time} on {date}. Data might be empty or outside date range."}

            elif isinstance(loc_result, np.ndarray) and loc_result.dtype == bool:
                # This could happen with boolean indexing results, less expected for nearest.
                print(f"Warning: get_loc with nearest method returned a boolean mask. Taking the first row.")
                current_data_df = self.data.iloc[loc_result] # Using iloc with boolean mask is okay
                if not current_data_df.empty:
                    current_data_series = current_data_df.iloc[0]
                    actual_event_timestamp = current_data_df.index[0]
                    print(f"DEBUG - enhance_event_analysis: Retrieved data using .iloc boolean mask, took first row. Actual timestamp: {actual_event_timestamp}.")
                else:
                    print(f"ERROR - enhance_event_analysis: get_loc with nearest method returned an empty boolean mask.")
                    return {"error": f"Could not find data point near time {sudden_change_time} on {date}. Data might be empty or outside date range."}

            else:
                # Fallback for other potential return types, like a single label or list of labels
                # This is less likely for method='nearest' when key is not exact, but robust.
                print(f"Warning: get_loc with nearest method returned a label-based result: {loc_result}. Attempting .loc.")
                current_data_series = self.data.loc[loc_result] # Handles single label or list of labels
                # If loc_result is a list of labels, current_data_series is a DataFrame. Get the first row's timestamp.
                actual_event_timestamp = loc_result if not isinstance(loc_result, list) else loc_result[0]
                if isinstance(current_data_series, pd.DataFrame):
                    if not current_data_series.empty:
                        actual_event_timestamp = current_data_series.index[0]
                        current_data_series = current_data_series.iloc[0] # Take the first row
                    else:
                        print(f"ERROR - enhance_event_analysis: Dataframe.loc resulted in an empty DataFrame.")
                        return {"error": f"Failed to retrieve data series for the event timestamp {sudden_change_time} on {date}."}


            # --- Check if data was actually retrieved into a Series ---
            # After handling all potential return types, ensure we have a non-empty Series
            if not isinstance(current_data_series, pd.Series) or current_data_series.empty:
                print(f"ERROR - enhance_event_analysis: Data series lookup resulted in unexpected type or empty series for timestamp {target_datetime_obj}.")
                # Add more detailed check if current_data_series is not None but not Series/empty
                if current_data_series is not None:
                    print(f"DEBUG - enhance_event_analysis: Retrieved object type: {type(current_data_series)}")

                return {"error": f"Failed to retrieve data series for the event timestamp {sudden_change_time} on {date}."}

            print(f"DEBUG - enhance_event_analysis: Successfully retrieved data series for actual event timestamp: {actual_event_timestamp}.")


            # --- Get the preceding data point ---
            # Now that we have the actual_event_timestamp (a pandas Timestamp), find its exact position
            try:
                # Use get_loc again, this time with the actual found timestamp, which should be an exact match
                current_index_pos = self.data.index.get_loc(actual_event_timestamp) # This should return an integer position

                if current_index_pos > 0:
                    # Get the preceding index label using integer position
                    previous_index_label = self.data.index[current_index_pos - 1]
                    print(f"DEBUG - enhance_event_analysis: Previous index label (using integer position): {previous_index_label}")

                    # Retrieve the preceding data series using the index label with .loc
                    preceding_data_series = self.data.loc[previous_index_label]
                    print(f"DEBUG - enhance_event_analysis: Successfully retrieved preceding data series for index label {previous_index_label}.")

                else:
                    print(f"DEBUG - enhance_event_analysis: Event timestamp {actual_event_timestamp} is the first point in the DataFrame, no preceding data.")

            except KeyError:
                # This is a safety catch; if actual_event_timestamp came from the index, this shouldn't fail.
                print(f"Warning: Could not get integer location of actual event timestamp {actual_event_timestamp} in self.data.index for preceding step.")
            except Exception as e:
                print(f"Error retrieving preceding data series: {e}")
                traceback.print_exc()

        except KeyError: # Catches KeyError from the initial get_loc with method='nearest' if index is truly empty or date outside range
            print(f"ERROR - enhance_event_analysis: KeyError finding nearest index for {target_datetime_obj}. Index might be empty or date outside range.")
            return {"error": f"Could not find data point near time {sudden_change_time} on {date}. Data might be empty or outside date range."}
        except ValueError:
            print(f"DEBUG - enhance_event_analysis: ValueError parsing time {sudden_change_time}.")
            return {'error': "Error parsing sudden change time."}
        except Exception as e:
            # Catch any other unexpected errors during the primary data lookup
            print(f"ERROR - enhance_event_analysis: Unexpected error during primary data lookup: {e}")
            traceback.print_exc()
            return {'error': f"An unexpected error occurred during data retrieval: {e}"}

        # Use the retrieved data series for current values if needed, though enhanced analysis focuses on preceding/KG
        # current_value = current_data_series.get(column) # Example

        # KG Exploration needs the KG node name, not the column name
        # 'variable' input parameter is assumed to be the KG node name (e.g., 'T')
        kg_node_for_relationships = variable # Assuming variable input is the node name

        kg_relationships = {'incoming_edges': [], 'outgoing_edges': []}
        # preceding_values is already initialized
        related_nodes_list = [] # Initialize list for related nodes


        if kg_node_for_relationships and self.kg.G and self.kg.G.has_node(kg_node_for_relationships):
            print(f"DEBUG - enhance_event_analysis: Exploring KG neighborhood for node '{kg_node_for_relationships}'.")
            # Get related nodes (KG neighborhood)
            # Assuming self.kg.get_related_variables exists and works with node names
            related_nodes_set = self.kg.get_related_variables(kg_node_for_relationships, depth=1)
            related_nodes_list = list(related_nodes_set) # Convert set to list for consistent handling

            print(f"DEBUG - enhance_event_analysis: Found related nodes: {related_nodes_list}")


            # Get incoming and outgoing edges for the specific node
            if self.kg.G.has_node(kg_node_for_relationships):
                for source, target, data in self.kg.G.edges(nbunch=[kg_node_for_relationships], data=True):
                    if target == kg_node_for_relationships: # Incoming edge
                        # Get source node name if available in node_to_name
                        source_name = self.node_to_name.get(source, source)
                        kg_relationships['incoming_edges'].append({
                            'source_node': source,
                            'source_name': source_name, # Include source name
                            'relationship': data.get('relationship'),
                            'description': data.get('description')
                        })
                    elif source == kg_node_for_relationships: # Outgoing edge
                        # Get target node name if available in node_to_name
                        target_name = self.node_to_name.get(target, target)
                        kg_relationships['outgoing_edges'].append({
                            'target_node': target,
                            'target_name': target_name, # Include target name
                            'relationship': data.get('relationship'),
                            'description': data.get('description')
                        })
            else:
                print(f"DEBUG - enhance_event_analysis: KG node '{kg_node_for_relationships}' not found for edge iteration.")


            # Get preceding values for related nodes
            if preceding_data_series is not None:
                print(f"DEBUG - enhance_event_analysis: Retrieving preceding values for related nodes.")
                for node in related_nodes_list: # Iterate through nodes found in get_related_variables
                    # Try to find the column for the related node (might be node or node_ref or its direct column name)
                    col_for_related_node = self.node_to_column.get(node) # Look up node name in node_to_column
                    if col_for_related_node is None:
                        col_for_related_node = self.node_to_column.get(f"{node}_ref") # Try _ref suffix if node name didn't work

                    if col_for_related_node and col_for_related_node in preceding_data_series.index:
                        preceding_values[node] = preceding_data_series[col_for_related_node]
                        print(f"DEBUG - enhance_event_analysis: Found preceding value for '{node}' using column '{col_for_related_node}': {preceding_values[node]}")
                    else:
                        # As a last resort, check if the node name itself is a column name
                        if node in preceding_data_series.index:
                            preceding_values[node] = preceding_data_series[node]
                            print(f"DEBUG - enhance_event_analysis: Found preceding value for '{node}' using node name directly as column: {preceding_values[node]}")
                        else:
                            print(f"DEBUG - enhance_event_analysis: Could not find column for related node '{node}' ({col_for_related_node}) in preceding data series.")

            else:
                print("DEBUG - enhance_event_analysis: No preceding data series available to retrieve related values.")


        elif not kg_node_for_relationships:
            print(f"DEBUG - enhance_event_analysis: Variable input '{variable}' did not map to a KG node name.")
        else:
            print(f"DEBUG - enhance_event_analysis: KG node '{kg_node_for_relationships}' not found in graph.")

        local_correlations = {} # Initialize dictionary
        if 'analyze_correlations_around_timestamp' in dir(self) and actual_event_timestamp:
            try:
                # Need the target_column for the variable ('Temp_ref', etc.) using the node name ('T')
                target_column_for_corr = self.node_to_column.get(variable) # Look up 'T' in node_to_column
                if target_column_for_corr is None:
                    target_column_for_corr = self.node_to_column.get(f"{variable}_ref") # Try 'T_ref' if 'T' didn't work

                if target_column_for_corr:
                    print(f"DEBUG - enhance_event_analysis: Performing local correlation analysis for column '{target_column_for_corr}' around {actual_event_timestamp}.")
                    local_correlations = self.analyze_correlations_around_timestamp(
                        target_variable=target_column_for_corr,
                        timestamp=pd.Timestamp(actual_event_timestamp), # Convert to pandas Timestamp
                        window_minutes=15 # Example window
                    )
                    if isinstance(local_correlations, dict) and 'error' in local_correlations:
                        print(f"Warning: Local correlation analysis returned error: {local_correlations['error']}")
                    else:
                        print(f"DEBUG - enhance_event_analysis: Local correlations result: {local_correlations}")

                else:
                    print(f"DEBUG - enhance_event_analysis: Could not get target column for local correlation analysis for variable '{variable}'.")
            except Exception as e:
                print(f"Error during local correlation analysis in enhance_event_analysis: {e}")
                traceback.print_exc()


        # --- Compile Structured Result ---
        analysis_result: Dict[str, Any] = {
            'event_variable_node': variable, # The variable node name (e.g., 'T')
            'event_timestamp_str': sudden_change_time, # The HH:MM string
            'event_date': date, # The date object
            'actual_event_timestamp': actual_event_timestamp, # The actual datetime object of the data point
            'data_column_used': column, # The data column name used for the variable
            'current_value': current_data_series.get(column) if current_data_series is not None and column in current_data_series.index else None, # Add the value at the event timestamp
            'kg_neighborhood_nodes': related_nodes_list, # List of related nodes
            'kg_direct_relationships': kg_relationships, # Structured incoming/outgoing edges
            'preceding_values_of_related': preceding_values, # Dict of {node: value}
            'local_correlations': local_correlations # Include if implemented
            # 'explanation': f"Detailed analysis for {variable} at {sudden_change_time}." # Basic summary for fallback/intro
        }

        print(f"DEBUG - enhance_event_analysis: END. Returning analysis_result dictionary.")
        return analysis_result # Return the dictionary

    def analyze_causal_anomaly(self, primary_node: str, timestamp: datetime) -> Dict[str, Any]:
        """
        (EXPANDED V2 - MULTI-SCENARIO EXPERT)
        Analyzes counter-intuitive behaviors by checking for several known physical
        limitations, flawed strategies, or trade-offs.
        """
        print(f"DEBUG (Causal Anomaly Expert V2): Analyzing {primary_node} around {timestamp}.")
        
        try:
            window_start = timestamp - timedelta(minutes=30)
            window_end = timestamp + timedelta(minutes=30)
            window_data = self.data.loc[window_start:window_end]
            if window_data.empty: return {'error': "No data for this time."}
        except Exception as e:
            return {'error': f"Error slicing data: {e}"}

        # ==============================================================================
        # --- SCENARIO 1: Transpiration limited by high humidity ---
        # Query: "Why did transpiration drop even though it was sunny?"
        # ==============================================================================
        if primary_node in ['H_trans', 'Q_trans']:
            trans_col = self.node_to_column.get(primary_node)
            light_col = self.node_to_column.get('Qrad')
            hum_col = self.node_to_column.get('H_ref')

            trans_series = window_data[trans_col]
            trans_trend = trans_series.iloc[-1] - trans_series.iloc[0]
            
            avg_light = window_data[light_col].mean()
            avg_humidity = window_data[hum_col].mean()
            
            is_transpiration_dropping = trans_trend < 0 
            is_light_still_high = avg_light > 300
            is_humidity_saturated = avg_humidity > 80

            print(f"DEBUG: trans_trend={trans_trend}, avg_light={avg_light}, avg_humidity={avg_humidity}")
            print(f"DEBUG: is_transpiration_dropping={is_transpiration_dropping}, is_light_still_high={is_light_still_high}, is_humidity_saturated={is_humidity_saturated}")

            if is_transpiration_dropping and is_light_still_high and is_humidity_saturated:
                return {
                    'conclusion': "No, this is not a sign of plant stress. It's a sign of the controller's success.",
                    'explanation': (
                        "The drop in transpiration is a natural physical response to the high-humidity environment. As the air becomes saturated, it's physically harder for plants to release more moisture. This reduction in 'vapor pressure deficit' is the direct cause."
                    ),
                    'evidence': {
                        "Controller's Strategy": f"The controller was successfully maintaining a high humidity setpoint of around {avg_humidity:.1f}%.",
                        "The Physical Consequence": "This reduced the 'drying power' of the air, slowing transpiration even with ample sun.",
                        "Plant State": "The plants are not stressed; they simply don't need to 'sweat' (transpire) as much in this comfortable environment."
                    }
                }

        # ==============================================================================
        # --- SCENARIO 2: Ineffective CO₂ Injection due to low light ---
        # Query: "Why is CO₂ injection on when it's cloudy?" or "Why isn't biomass increasing?"
        # ==============================================================================
        if primary_node in ['uC', 'C_inj', 'B']: # Triggered by questions about CO2 injection or slow growth
            co2_inj_col = self.node_to_column.get('uC')
            light_col = self.node_to_column.get('Qrad')
            
            avg_co2_inj = window_data[co2_inj_col].mean()
            avg_light = window_data[light_col].mean()
            
            is_injecting = avg_co2_inj > 0.1 # Is the system actively injecting?
            is_dark = avg_light < 100      # Is there insufficient light for photosynthesis?
            
            if is_injecting and is_dark:
                return {
                    'conclusion': "This is a sign of a flawed or lagging control strategy.",
                    'explanation': (
                        "The controller was injecting expensive CO₂ at a time when there was not enough sunlight for the plants to use it for photosynthesis. This action is ineffective and wastes resources."
                    ),
                    'evidence': {
                        "Controller's Action": f"The system was actively injecting CO₂ (average rate: {avg_co2_inj:.2f}).",
                        "Contradictory Condition": f"However, solar radiation was too low for effective photosynthesis (average: {avg_light:.1f} W/m²).",
                        "Diagnosis": "This indicates a lag in the controller's reaction to the sudden change in weather (e.g., cloud cover) or a potential flaw in its predictive model."
                    }
                }

        # ==============================================================================
        # --- SCENARIO 3: Ineffective Ventilation due to outside conditions ---
        # Query: "Why is the temperature/humidity still high even though the vents are open?"
        # ==============================================================================
        if primary_node in ['T', 'H']: # Triggered by questions about high temp or humidity
            vent_col = self.node_to_column.get('uV')
            avg_vent = window_data[vent_col].mean()
            is_venting = avg_vent > 0.01 # Are the vents significantly open?

            if is_venting:
                # Check for ineffective cooling
                if primary_node == 'T':
                    t_in = window_data[self.node_to_column.get('T')].mean()
                    t_out = window_data[self.node_to_column.get('Tout')].mean()
                    if t_out >= t_in:
                        return {
                            'conclusion': "The ventilation was ineffective for cooling because of unfavorable outside conditions.",
                            'explanation': (
                                "The controller was attempting to cool the greenhouse using ventilation, but this action was counter-productive. The outside air was warmer than the inside air, so opening the vents actually introduced more heat into the system."
                            ),
                            'evidence': {
                                "Controller's Action": f"The ventilation system was active (average opening: {avg_vent:.3f}).",
                                "Counter-Productive Condition": f"The outside air was hotter ({t_out:.1f}°C) than the inside air ({t_in:.1f}°C).",
                                "Diagnosis": "This shows the physical limitation of ventilation. To cool further, the system would need to switch to a more energy-intensive method like active cooling."
                            }
                        }

                # Check for ineffective dehumidification
                if primary_node == 'H':
                    h_in = window_data[self.node_to_column.get('H')].mean()
                    h_out = window_data[self.node_to_column.get('Hout')].mean()
                    if h_out >= h_in:
                        return {
                            'conclusion': "The ventilation was ineffective for dehumidification because of unfavorable outside conditions.",
                            'explanation': (
                                "The controller was attempting to reduce humidity using ventilation, but this was ineffective. The outside air was more humid than the inside air, so opening the vents actually introduced more moisture."
                            ),
                            'evidence': {
                                "Controller's Action": f"The ventilation system was active (average opening: {avg_vent:.3f}).",
                                "Counter-Productive Condition": f"The outside air was more humid ({h_out:.1f}%) than the inside air ({h_in:.1f}%).",
                                "Diagnosis": "This highlights a physical limitation. To dehumidify under these conditions, the system would need to use a different method, such as the condensation effect from the active cooling system."
                            }
                        }

        # --- FALLBACK ---
        return {'error': "Could not determine the specific cause for this counter-intuitive behavior."}

    def analyze_model_discrepancy(self, control_node: str, state_node: str, start_dt: datetime, end_dt: datetime, query: str = "") -> Dict[str, Any]:
        """
        (DEFINITIVE - V8)
        Diagnoses model mismatch by checking for two distinct signatures and using
        variable-specific thresholds for control activation. This is the definitive version.
        """
        from datetime import datetime
        print(f"DEBUG (Model Discrepancy Expert v8): Analyzing '{control_node}' vs '{state_node}'.")
        
        base_control_node = self.get_control_action_base_code(control_node)

        try:
            if end_dt.time() < start_dt.time():
                end_of_first_day = datetime.combine(start_dt.date(), time(23, 59, 59))
                part1 = self.data.loc[start_dt:end_of_first_day]
                next_day_date = start_dt.date() + timedelta(days=1)
                start_of_next_day = datetime.combine(next_day_date, time(0, 0, 0))
                end_dt_next_day = datetime.combine(next_day_date, end_dt.time())
                part2 = self.data.loc[start_of_next_day:end_dt_next_day]
                window_data = pd.concat([part1, part2]).copy()
                print(f"DEBUG (Model Discrepancy): Handled overnight range. Total data points: {len(window_data)}")
            else:
                window_data = self.data.loc[start_dt:end_dt].copy()

            if window_data.empty: return {'error': "No data for the specified time range."}
        except Exception as e: return {'error': f"Error slicing data: {e}"}

        control_col = self.node_to_column.get(control_node) 
        state_col = self.node_to_column.get(state_node)
        lg_node = self.lagrangian_nodes.get(state_node, {}).get('ieq')
        lg_col = self.node_to_column.get(lg_node) if lg_node else None
        qrad_col = self.node_to_column.get('Qrad')

        required_cols = [control_col, state_col, lg_col, qrad_col]
        if not all(c and c in window_data.columns for c in required_cols):
            return {'finding': "Inconclusive", 'explanation': "Could not perform analysis due to missing data.", 'evidence': {}}

        qrad_volatility = window_data[qrad_col].std()
        is_driver_volatile = qrad_volatility > 100
        control_mean_day = window_data[control_col].mean()
        control_cv_day = window_data[control_col].std() / (abs(control_mean_day) + 1e-9)
        is_control_jagged = control_cv_day > 0.5
        state_cv_day = window_data[state_col].std() / (abs(window_data[state_col].mean()) + 1e-9)
        is_state_unstable = state_cv_day > 0.02

        if is_driver_volatile and is_control_jagged and is_state_unstable:
            avg_lg = window_data[lg_col].mean()
            return {
                'finding': "Yes, there is evidence the model was underestimating the dynamic heat load.",
                'explanation': "The jagged control signal suggests the model could not fully capture the rapid, chaotic heat load from the volatile sun. The controller was forced to constantly react rather than smoothly anticipate.",
                'evidence': {
                    "Root Cause": f"Highly volatile solar radiation (Std Dev: {qrad_volatility:.2f} W/m²).",
                    "Controller's Struggle": f"The control signal was extremely jagged (CV: {control_cv_day:.2f}).",
                    "Resulting Instability": f"The state reference itself became unstable (CV: {state_cv_day:.2f})."
                }
            }

        # --- SIGNATURE 2: Steady-State Underestimation (Nighttime Scenario) ---
        expected_effect_direction = self.get_effect_direction(base_control_node, state_node)
        avg_control = window_data[control_col].mean()
        actual_state_change = window_data[state_col].iloc[-1] - window_data[state_col].iloc[0]
        avg_lg = window_data[lg_col].mean()

        # --- <<< THE DEFINITIVE FIX IS HERE >>> ---
        # 1. Get the dictionary of thresholds.
        thresholds = self.config.get('control_active_threshold', {})
        # 2. Get the specific threshold for this control, or fall back to the default.
        specific_threshold = thresholds.get(base_control_node, thresholds.get('default', 1E-6))
        # 3. Perform the check with the correct threshold.
        is_control_acting = abs(avg_control) > specific_threshold
        print(f"DEBUG (Model Discrepancy): Checking if control is active. Avg: {avg_control:.2E}, Threshold: {specific_threshold:.1E}")
        # --- <<< END OF FIX >>> ---

        moved_opposite = False
        if expected_effect_direction == 'increase' and actual_state_change < -self.constraint_tolerance.get(state_node, 0.1):
            moved_opposite = True
        elif expected_effect_direction == 'decrease' and actual_state_change > self.constraint_tolerance.get(state_node, 0.1):
            moved_opposite = True

        is_optimizer_fighting = False
        lg_thresholds_dict = self.config.get('lagrangian_active_threshold', {})
        lg_specific_threshold = lg_thresholds_dict.get(lg_node, lg_thresholds_dict.get('default', 1e-7))
        if expected_effect_direction == 'increase' and avg_lg < -lg_specific_threshold:
            is_optimizer_fighting = True
        elif expected_effect_direction == 'decrease' and avg_lg > lg_specific_threshold:
            is_optimizer_fighting = True

        if is_control_acting and moved_opposite and is_optimizer_fighting:
            # Customize conclusion based on the control type and query context
            query_lower = query.lower()
            if base_control_node == 'uQh' and 'heat loss' in query_lower:
                conclusion = "The model underestimated the heat loss rate during the night. The greenhouse was losing heat faster than the model predicted, requiring continuous heating to maintain temperature."
            else:
                conclusion = "This mismatch likely occurred due to higher-than-expected external influences (like wind) which were not adequately represented in the model."
            
            return {
                'finding': "Yes, there is strong evidence the model was inaccurate.",
                'explanation': f"The discrepancy arises because the controller underestimated the forces affecting {self.node_to_name.get(state_node)}. Despite applying {self.node_to_name.get(base_control_node)}, the actual state moved in the opposite direction, forcing the optimizer to continuously fight the constraint.",
                'evidence': {
                    "The Controller's Plan": f"The system applied a consistent {self.node_to_name.get(base_control_node)} with an average value of {avg_control:.2E}.",
                    "The Physical Reality": f"Contrary to the plan, the actual {self.node_to_name.get(state_node)} moved in the opposite direction by {actual_state_change:.2f}.",
                    "The 'Effort' Signal (Lagrangian)": f"The Lagrangian (`{lg_col}`) was significantly active with an average of {avg_lg:.2E}, proving the controller knew it was losing the battle."
                },
                "conclusion": conclusion
            }
        
        # --- SIGNATURE 3 (NEW): High-Effort, Low-Result Mismatch ---
        # This signature is perfect for the "underestimated heat loss" scenario.
        teq_lg_col = self.node_to_column.get('T_eq') # The EQUALITY Lagrangian is the key
        state_ref_col = self.node_to_column.get(state_node) # e.g., Temp_ref

        if teq_lg_col and state_ref_col and all(c in window_data.columns for c in [teq_lg_col, state_ref_col]):
            # Evidence 1: Is the optimizer working extremely hard and erratically?
            # A high standard deviation in Teq_lg means the model is constantly being "surprised".
            teq_lg_series = window_data[teq_lg_col]
            teq_lg_std = teq_lg_series.std()
            teq_lg_mean_abs = teq_lg_series.abs().mean()
            
            # Calculate Coefficient of Variation (CV) to normalize the volatility.
            # Add a small epsilon to prevent division by zero if the mean is 0.
            teq_lg_cv = teq_lg_std / (teq_lg_mean_abs + 1e-9)
            
            # A CV > 0.5 is a robust, general indicator of high relative volatility.
            is_optimizer_struggling = teq_lg_cv > 0.5
            # --- END OF GENERALIZED FIX ---

            state_change = window_data[state_ref_col].iloc[-1] - window_data[state_ref_col].iloc[0]
            # This threshold is already relative to the variable, so it's fine.
            is_result_poor = abs(state_change) < 0.5

            print(f"DEBUG (Model Discrepancy V10): Inefficiency Check - Optimizer Struggling? {is_optimizer_struggling} (CV: {teq_lg_cv:.2f}, threshold: >0.5), Poor Result? {is_result_poor} (change: {state_change:.2f})")

            if is_optimizer_struggling and is_result_poor:
                # We found the mismatch!
                return {
                    'finding': "Yes, there is strong evidence the model was underestimating the nighttime heat loss.",
                    'explanation': (
                        "The proof of the mismatch is in the `Teq_lg` signal, which represents the error between the model's prediction and reality. "
                        "Its high volatility shows the model was constantly being surprised by how fast the temperature was dropping, forcing it to make large, reactive adjustments with the heating system. "
                        "This 'high effort, low result' pattern is a classic sign that the physical heat loss, likely from wind or poor insulation, was greater than the model predicted."
                    ),
                    'evidence': {
                        "Optimizer's Struggle": f"The Temperature Equality Lagrangian (`Teq_lg`) was highly volatile (Std Dev: {teq_lg_volatility:.4f}), which is the definitive proof that the controller's internal model was inaccurate.",
                        "Inefficient Action": "Despite the controller applying a consistently high level of heat...",
                        "Poor Physical Result": f"...the actual temperature barely changed over the period, increasing by only {state_change:.2f}°C, indicating a significant, unmodeled heat loss."
                    }
                }

        # Fallback if no signature is found
        return {
            'finding': "No, there is no clear evidence of a model mismatch in this period.",
            'explanation': "The controller's actions appear to be consistent with the physical outcomes observed.",
            'evidence': {}
        }

    def _format_discrepancy_result(self, finding, explanation, control_node, state_node, lg_col, control_action_value, state_change_value, lg_value):
        """
        (REVISED V1.1)
        Helper to format the output for a model discrepancy finding.
        This version uses high-precision formatting for very small numbers to avoid misleading zeros.
        """
        
        # --- NEW PRECISION FORMATTING LOGIC ---
        # Format the control action value with scientific notation if it's very small
        if abs(control_action_value) > 0 and abs(control_action_value) < 1e-3:
            control_display_value = f"{control_action_value:.2E}"
        else:
            control_display_value = f"{control_action_value:.4f}"
        # --- END OF NEW LOGIC ---

        return {
            'finding': finding,
            'explanation': explanation,
            'evidence': {
                "Controller's Plan": f"The system applied a consistent action with `{self.node_to_name.get(control_node)}` (average value: {control_display_value}).",
                "Physical Reality": f"Despite this action, the actual `{self.node_to_name.get(state_node)}` moved in the opposite direction by {state_change_value:.2f}.",
                "Optimizer's 'Struggle'": f"The Lagrangian (`{lg_col}`) was strongly active (avg: {lg_value:.2E}), proving the system knew it was losing the battle and the state was moving away from the desired setpoint."
            }
        }
    
    def _find_potential_control_drivers(self, target_node: str, max_depth: int = 4) -> List[str]:
        """
        (REFINED) Finds all control actions that are upstream of a target state node in the KG.
        This version correctly traverses backwards through State nodes.
        """
        if not self.kg.G.has_node(target_node):
            return []

        control_drivers = set()
        queue = deque([(target_node, 0)]) # Queue stores (node, current_depth)
        visited = {target_node}

        while queue:
            current_node, depth = queue.popleft()

            if depth >= max_depth:
                continue

            for predecessor in self.kg.G.predecessors(current_node):
                if predecessor not in visited:
                    visited.add(predecessor)
                    node_type = self.kg.G.nodes[predecessor].get('type')

                    if node_type == 'Control':
                        control_drivers.add(predecessor)
                        # We found a control, no need to look further back from this path.
                    
                    # --- THE FIX IS HERE ---
                    # Allow the search to continue backwards through these intermediate node types.
                    elif node_type in ['Flux', 'Process', 'Reference', 'State']:
                        queue.append((predecessor, depth + 1))
                    # --- END OF FIX ---
        
        return list(control_drivers)

    def extract_kg_context(self, target_node: str, depth: int = 2) -> Dict[str,
    Any]:
        """Extract relevant context from knowledge graph for a target node"""
        # First check if node exists directly
        if target_node in self.kg.G:
            kg_node = target_node
        # Check if it's a state variable with a reference version
        elif target_node in self.state_variables:
            ref_node = f"{target_node}_ref"
            kg_node = ref_node if ref_node in self.kg.G else None
        else:
            kg_node = None

        if not kg_node:
            return {"error": f"Node {target_node} not found in knowledge graph"}

        # Get context at specified depth
        context = {
            'node': kg_node,
            'node_attributes': dict(self.kg.G.nodes()[kg_node]),
            'incoming_edges': [],
            'outgoing_edges': [],
            'related_nodes': {} # Will store info about nodes up to 'depth' away
        }

        # Get direct edges
        for source, _, edge_data in self.kg.G.in_edges(kg_node, data=True):
            context['incoming_edges'].append({
                'source': source,
                'relationship': edge_data.get('relationship', ''),
                'equation': edge_data.get('equation', ''),
                'description': edge_data.get('description', '')
            })

        for _, target, edge_data in self.kg.G.out_edges(kg_node, data=True):
            context['outgoing_edges'].append({
                'target': target,
                'relationship': edge_data.get('relationship', ''),
                'equation': edge_data.get('equation', ''),
                'description': edge_data.get('description', '')
            })

        # For depth > 1, recursively gather information about related nodes
        if depth > 1:
            # Gather nodes from incoming edges
            for edge in context['incoming_edges']:
                source_node = edge['source']
                # Recursively get context for this node, with reduced depth
                source_context = self.extract_kg_context(source_node, depth - 1)
                if 'error' not in source_context:
                    context['related_nodes'][source_node] = source_context

            # Gather nodes from outgoing edges
            for edge in context['outgoing_edges']:
                target_node = edge['target']
                # Recursively get context for this node, with reduced depth
                target_context = self.extract_kg_context(target_node, depth - 1)
                if 'error' not in target_context:
                    context['related_nodes'][target_node] = target_context

        return context


    def analyze_control_saturation(self, lagrangian_node: str, date: date) -> Dict[str, Any]:
        """
        Analyzes whether a saturated control Lagrangian indicates a failure or a
        deliberate, aggressive strategy.
        """
        print(f"DEBUG (Control Saturation Expert): Evaluating {lagrangian_node} for {date}.")
        
        try:
            # Use the existing data - it already contains the date range we need
            day_data = self.data[self.data.index.date == date].copy()
                
            if day_data.empty: return {'error': "No data for this day."}
        except Exception as e: return {'error': f"Error slicing data: {e}"}

        # --- 1. Identify the associated control and gather data ---
        control_node = next((key for key, val in self.lagrangian_nodes.items() if lagrangian_node in val.values()), None)
        if not control_node:
            return {'error': f"Could not map Lagrangian {lagrangian_node} to a control action."}

        lg_col = self.node_to_column.get(lagrangian_node)
        control_col = self.node_to_column.get(control_node)
        bieq_lg_col = self.node_to_column.get('B_ieq')
        qrad_col = self.node_to_column.get('Qrad')

        # Define the window where CO2 injection is active
        injection_window = day_data[day_data[control_col] > 0.1]
        if injection_window.empty:
            return {'conclusion': "The CO2 injection system was not active, so the Lagrangian saturation is likely an anomaly or indicates a different issue.", 'evidence': {}}

        # --- 2. Check for the "Aggressive Growth" Signature ---
        
        # Condition A: Is the Lagrangian saturated DURING the injection period?
        lg_series_in_window = injection_window[lg_col]
        saturated_points = lg_series_in_window[lg_series_in_window > 0.1]
        saturation_percentage = len(saturated_points) / len(lg_series_in_window)
        is_saturated = saturation_percentage > 0.8 # Saturated for >80% of the time it's on

        # Condition B: Was the overarching goal profit?
        avg_bieq_lg = injection_window[bieq_lg_col].mean()
        is_profit_driven = avg_bieq_lg < -1e-7 # Use a slightly stronger threshold for this extreme strategy

        # Condition C: Was the OVERALL day good for photosynthesis?
        # We check the TOTAL light for the day (integral), not the average.
        # A day can be volatile but still have a high total light energy.
        total_light_integral = day_data[qrad_col].sum()
        # A good day might have an integral over 50,000 (arbitrary threshold, tune as needed)
        is_day_good_for_growth = total_light_integral > 50000 

        if is_saturated and is_profit_driven and is_day_good_for_growth:
            return {
                'conclusion': "This is the signature of a deliberate and extremely aggressive growth strategy, not a failure.",
                'evidence': {
                    "Optimizer's Demand Signal": f"The saturated '{lagrangian_node}' is the definitive proof. For {saturation_percentage:.0%} of the time CO₂ was being injected, the optimizer was signaling that it wanted to inject even more but was constrained by the hardware's maximum capacity.",
                    "Overarching Goal": f"This aggressive demand was triggered by the controller's primary goal to maximize profit, proven by the consistently negative Biomass Lagrangian (average: {avg_bieq_lg:.2E}) during the growth period.",
                    "Favorable Conditions": f"This 'all-in' strategy was viable because it was a high-light day overall (Total Light Integral: {total_light_integral:.0f}), providing ample energy for the plants to use the supplemental CO₂."
                }
            }

        # --- Fallback if the specific signature is not found ---
        return {
            'conclusion': "The sustained high value on the Lagrangian is unusual, but the full signature for an aggressive growth strategy was not met.",
            'evidence': {
                "Saturation": f"The Lagrangian was saturated {saturation_percentage:.0%} of the injection time.",
                "Profit Goal": f"The profit goal was {'active' if is_profit_driven else 'not active'}.",
                "Good Growth Day": f"The day was rated as {'good' if is_day_good_for_growth else 'not good'} for growth based on total sunlight."
            }
        }

    def _get_related_state_variables(self, control_node: str) -> List[str]:
        """Get state variables that are directly influenced by this control."""
        related_states = []

        # Check outgoing edges from control node
        if control_node in self.kg.G:
            for _, target, edge_data in self.kg.G.out_edges(control_node, data=True):
                if target in self.base_state_variables:
                    related_states.append(target)

        # Also check incoming edges to see what states influence this control
        if control_node in self.kg.G:
            for source, _, edge_data in self.kg.G.in_edges(control_node, data=True):
                if source in self.base_state_variables:
                    related_states.append(source)

        return list(set(related_states))  # Remove duplicates


    def analyze_context_over_time_range(self, variable_code: str, query_term: str, date_str: str, start_time_str: str, end_time_str: str) -> Dict[str, Any]:
        """
        Analyzes and summarizes the context for a variable over a specific time range.
        """
        print(f"DEBUG - analyze_range: Analyzing '{query_term}' ({variable_code}) from {start_time_str} to {end_time_str} on {date_str}")

        if not hasattr(self, 'data') or self.data.empty:
            return {"error": "Dataframe is empty. Cannot analyze context."}

        # --- Map variable_code to the correct data column name ---
        base_code, target_column, _ = self.get_variable_info(query_term)
        if not target_column or target_column not in self.data.columns:
            return {"error": f"Could not find data column '{target_column}' for variable '{query_term}'."}

        # --- Slice the DataFrame for the given time range ---
        try:
            start_dt = pd.Timestamp(f"{date_str} {start_time_str}")
            end_dt = pd.Timestamp(f"{date_str} {end_time_str}")
            
            # Use .loc for slicing based on the DatetimeIndex
            range_data = self.data.loc[start_dt:end_dt]
            
            if range_data.empty:
                return {"error": f"No data available in the range from {start_time_str} to {end_time_str} on {date_str}."}

        except Exception as e:
            return {"error": f"Failed to slice data for the given time range: {e}"}

        # --- Calculate Statistics for the target variable ---
        target_series = range_data[target_column]
        
        start_value = target_series.iloc[0]
        end_value = target_series.iloc[-1]
        change = end_value - start_value
        change_percent = (change / start_value) * 100 if start_value != 0 else 0
        
        summary_stats = {
            "start_value": start_value,
            "end_value": end_value,
            "change": change,
            "change_percent": change_percent,
            "mean_value": target_series.mean(),
            "min_value": target_series.min(),
            "max_value": target_series.max(),
            "min_time": target_series.idxmin(),
            "max_time": target_series.idxmax()
        }

        # --- Compile the final context dictionary ---
        context = {
            'time_range_analysis': {
                'target_variable_query': query_term,
                'variable_code': variable_code,
                'column_name': target_column,
                'start_time': start_dt,
                'end_time': end_dt,
                'summary_stats': summary_stats
            }
        }
        
        print(f"DEBUG - analyze_range: END. Returning context dictionary: {context}")
        return context

    def analyze_context_at_timestamp(self, variable_node: str, query_term: str, date_str: str, time_str: str) -> Dict[str, Any]:
        """
        (REVISED to fix pandas version issue)
        Analyzes the context at a specific timestamp for a given KG variable node.
        """
        print(f"DEBUG - analyze_context: Analyzing context for node '{variable_node}' ({query_term}) at {date_str} {time_str}")

        if not hasattr(self, 'data') or self.data.empty:
            return {"error": "Dataframe is empty. Cannot analyze context."}

        # --- Data and Timestamp Lookup (REVISED LOGIC) ---
        try:
            target_timestamp = pd.Timestamp(f"{date_str} {time_str}")
            
            # Handle timezone compatibility between timestamp and data index
            if hasattr(self.data.index, 'tz') and self.data.index.tz is not None:
                # If data index has timezone, make timestamp timezone-aware
                import pytz
                utc = pytz.UTC
                target_timestamp = utc.localize(target_timestamp)
            else:
                # Ensure timestamp is timezone-naive to match naive index
                target_timestamp = pd.to_datetime(target_timestamp).tz_localize(None)
            
            # Ensure the index is datetime type
            if not isinstance(self.data.index, pd.DatetimeIndex):
                print(f"WARNING: DataFrame index is not DatetimeIndex, it's {type(self.data.index)}")
                return {"error": f"Data index is not datetime-compatible: {type(self.data.index)}"}
            
            # Use get_indexer for robust, version-independent nearest-neighbor lookup
            # This returns the integer index of the nearest timestamp
            nearest_integer_index = self.data.index.get_indexer([target_timestamp], method='nearest')[0]
            
            # Validate the index
            if nearest_integer_index < 0 or nearest_integer_index >= len(self.data.index):
                return {"error": f"Could not find valid data index for timestamp {target_timestamp}"}
            
            # Use the integer index to get the actual timestamp label and the data row
            actual_data_timestamp = self.data.index[nearest_integer_index]
            current_data_series = self.data.iloc[nearest_integer_index]

            # Optional: Check if the found timestamp is too far from the requested one
            time_difference = abs(actual_data_timestamp - target_timestamp)
            if time_difference > pd.Timedelta(minutes=15): # 15-minute tolerance
                print(f"WARN: No data found within 15 minutes of the requested time {target_timestamp}.")
                # You could return an error here if desired
                # return {"error": f"No data available near {time_str}."}

        except Exception as e:
            return {"error": f"Could not retrieve data series for timestamp {date_str} {time_str}: {e}"}

        # Use the unified helper to get all necessary names
        kg_node, data_column_name, friendly_name = self.get_variable_info(variable_node) 
    
        if not kg_node or not data_column_name:
            # Add a fallback to the query_term just in case, but log a warning.
            print(f"WARN - Could not resolve '{variable_node}' via get_variable_info. Falling back to query_term '{query_term}'.")
            kg_node, data_column_name, friendly_name = self.get_variable_info(query_term)
            if not kg_node or not data_column_name:
                return {"error": f"Could not resolve '{query_term}' or '{variable_node}' to a valid variable with data."}

        # Retrieve the value from the data series
        value_at_timestamp = current_data_series.get(data_column_name)
        if value_at_timestamp is None:
            return {"error": f"Data column '{data_column_name}' has no value at {actual_data_timestamp}."}
        
        variable_value_numeric = float(value_at_timestamp) if pd.notna(value_at_timestamp) else None
        
        # --- Context Analysis ---
        
        # 1. KG Relationships
        kg_relationships = {'incoming_edges': [], 'outgoing_edges': []}
        if self.kg.G.has_node(kg_node):
            for source, _, data in self.kg.G.in_edges(kg_node, data=True):
                source_col = self.node_to_column.get(source)
                kg_relationships['incoming_edges'].append({
                    'source_name': self.node_to_name.get(source, source),
                    'description': data.get('description', ''),
                    'value_at_timestamp': current_data_series.get(source_col) if source_col else 'N/A'
                })
            for _, target, data in self.kg.G.out_edges(kg_node, data=True):
                target_col = self.node_to_column.get(target)
                kg_relationships['outgoing_edges'].append({
                    'target_name': self.node_to_name.get(target, target),
                    'description': data.get('description', ''),
                    'value_at_timestamp': current_data_series.get(target_col) if target_col else 'N/A'
                })
        # 2. Associated Fluxes (for state variables)
        flux_values = {}
        if kg_node in self.base_state_variables: # Only look for fluxes for base states like T, C, H
            for source_node, _, edge_data in self.kg.G.in_edges(kg_node, data=True):
                source_column = self.node_to_column.get(source_node)
                if source_column and 'phi_' in source_column.lower(): # A simple heuristic for flux columns
                    flux_value = current_data_series.get(source_column)
                    if pd.notna(flux_value):
                        flux_values[source_node] = {
                            'name': self.node_to_name.get(source_node, source_node),
                            'value': flux_value
                        }

        # 3. Local Correlations
        correlations_result  = self.analyze_correlations_around_timestamp(data_column_name, actual_data_timestamp)
       
        # 4. Causal Links from PCMCI
        causal_effects = {}
        if self.pcmci_results and 'graph' in self.pcmci_results:
            pcmci_node_id = kg_node
            
            if pcmci_node_id and self.pcmci_results['var_names'] and pcmci_node_id in self.pcmci_results['var_names']:
                try:
                    target_idx = self.pcmci_results['var_names'].index(pcmci_node_id)
                    
                    # --- NEW FILTERING LOGIC ---
                    significant_causes = []
                    graph = self.pcmci_results['graph']
                    val_matrix = self.pcmci_results['val_matrix']
                    tau_max = self.pcmci_results['tau_max']
                    var_names = self.pcmci_results['var_names']

                    for source_idx, source_node in enumerate(var_names):
                        if source_idx == target_idx: continue
                        
                        strongest_link = {'lag': 0, 'strength': 0}
                        # Find the strongest significant link from this source to the target across all lags
                        for lag in range(1, tau_max + 1):
                            if lag < len(graph) and '-->' in str(graph[lag][target_idx, source_idx]):
                                strength = val_matrix[lag][target_idx, source_idx]
                                if abs(strength) > abs(strongest_link['strength']):
                                    strongest_link = {'lag': lag, 'strength': strength}
                        
                        if strongest_link['strength'] != 0:
                            significant_causes.append({
                                'source': source_node,
                                'lag_minutes': strongest_link['lag'] * 5,
                                'strength': strongest_link['strength']
                            })

                    if significant_causes:
                        # Sort by the absolute strength of the causal link, strongest first
                        sorted_causes = sorted(significant_causes, key=lambda x: abs(x['strength']), reverse=True)
                        
                        # Format a concise summary of the top 3-4 causes
                        summary_lines = []
                        for cause in sorted_causes[:4]: # Limit to top 4 for clarity
                            source_name = self.node_to_name.get(cause['source'], cause['source'])
                            summary_lines.append(
                                f"{source_name} (lag ~{cause['lag_minutes']} min, strength: {cause['strength']:.2f})"
                            )
                        causal_effects['pcmci'] = "The strongest historical causal drivers are: " + "; ".join(summary_lines)
                    else:
                        causal_effects['pcmci'] = f"No significant causal drivers for {friendly_name} were found in the historical data."
                except Exception as e:
                    error_msg = f"An unexpected error occurred during PCMCI summarization: {e}"
                    print(f"ERROR: {error_msg}")
                    import traceback
                    traceback.print_exc()
                    causal_effects['pcmci'] = error_msg
            else:
                if not (self.pcmci_results and 'graph' in self.pcmci_results):
                    causal_effects['pcmci'] = "PCMCI results not precomputed or available."
                else:
                    causal_effects['pcmci'] = f"The node '{kg_node}' was not found in the pre-computed PCMCI analysis."


        # --- 5. (NEW) Optimizer State Interpretation ---
        optimizer_interpretation = {}
        # We check the main states that have inequality constraints.
        for state_node_check in ['T', 'C', 'H']:
            lg_node = self.lagrangian_nodes.get(state_node_check, {}).get('ieq')
            if lg_node:
                lg_col = self.node_to_column.get(lg_node)
                if lg_col in current_data_series:
                    lg_val = current_data_series[lg_col]
                    optimizer_interpretation[state_node_check] = self._interpret_lagrangian_state(state_node_check, lg_val)

        # --- Constraints ---
        print(f"Checking constraints for variable code '{kg_node}' with value {variable_value_numeric}")
        constraints = self.check_constraints_at_timestamp(
            variable_code=kg_node,           # Pass the canonical KG node (e.g., 'T')
            value=variable_value_numeric,    # Pass the single numeric value
            data_row=current_data_series     # Pass the data row for potential future context
        )
        print(f"Constraint check results: {constraints}")



        # --- Compile Structured Context ---
        context = {
            'timestamp_data': {
                'timestamp': actual_data_timestamp,
                'target_variable_query': query_term,
                'kg_node': kg_node,
                'column_name': data_column_name,
                'value': value_at_timestamp
            },
            'flux_values': flux_values,
            'kg_relationships': kg_relationships,
            
            # This now correctly handles the new structured output
            'correlations': correlations_result, 
            
            'causal_effects': causal_effects,
            'constraints': constraints,
            'optimizer_interpretation': optimizer_interpretation
        }

        context = self._filter_context_by_ablation(context)

        print(f"DEBUG - analyze_context: END. Returning context dictionary.")
        return context

    def analyze_correlations_around_timestamp(self, target_variable_col: str, timestamp: pd.Timestamp, window_minutes: int = 60) -> Dict[str, Any]:
        """
        (FINAL, ROBUST VERSION)
        Analyzes correlations of the target variable with others in a window around a given timestamp,
        ensuring no ambiguous boolean comparisons are made.
        """
        print(f"DEBUG - Analyzing local correlations for '{target_variable_col}' around {timestamp}.")
        
        # --- 1. Data Windowing and Validation ---
        try:
            start_time = timestamp - pd.Timedelta(minutes=window_minutes // 2)
            end_time = timestamp + pd.Timedelta(minutes=window_minutes // 2)
            window_data = self.data.loc[start_time:end_time]

            if window_data.empty or len(window_data) < 2:
                return {'summary': "Not enough data available in the window for correlation analysis."}
                
            if target_variable_col not in window_data.columns:
                return {'error': f"Target variable column '{target_variable_col}' not found in data."}
            
            # Ensure the target series itself has enough data to be useful
            if window_data[target_variable_col].notna().sum() < 2:
                return {'summary': f"Not enough valid data points for '{target_variable_col}' in the window."}

        except Exception as e:
            return {'error': f"Error during data windowing for correlation analysis: {e}"}

        # --- 2. Correlation Calculation ---
        correlations = {}
        target_series = window_data[target_variable_col]

        for col in window_data.columns:
            if col == target_variable_col:
                continue
                
            comparison_series = window_data[col]
            
            # Check if the comparison series is numeric before attempting correlation
            # This is a safe, scalar check.
            if pd.api.types.is_numeric_dtype(comparison_series.dtype):
                try:
                    # The .corr() method safely handles mis-aligned NaNs between the two series
                    # and always returns a single float or NaN.
                    correlation_value = target_series.corr(comparison_series)
                    
                    # Only store valid, non-NaN correlation values
                    if pd.notna(correlation_value):
                        correlations[col] = correlation_value
                except Exception as e:
                    print(f"WARN - Could not compute correlation between '{target_variable_col}' and '{col}': {e}")

        # --- 3. Format the Results ---
        if not correlations:
            return {'summary': "No significant correlations found with other variables in this time window."}

        sorted_correlations = sorted(correlations.items(), key=lambda item: abs(item[1]), reverse=True)
        
        top_correlations = {}
        for col, val in sorted_correlations[:5]: # Return top 5
            node_id = self.column_to_node.get(col, col)
            friendly_name = self.node_to_name.get(node_id, col)
            top_correlations[friendly_name] = val

        return {
            'summary': f"Found {len(top_correlations)} notable local correlations.",
            'top_correlations': top_correlations
        }

    def check_constraints_at_timestamp(self, variable_code: str, value: Any, data_row: pd.Series) -> dict[str, Any]:
        """
        (FINAL, BULLETPROOF VERSION)
        Checks the status of constraints by ensuring all comparisons are between single scalar numbers.
        """
        constraints_info = {'status': 'No constraints defined', 'is_violated': False, 'is_outside_optimal': False, 'details': []}

        # --- Aggressively convert input 'value' to a single scalar float ---
        scalar_value = None
        # This handles if 'value' is passed as a Series, array, int, float, or string number
        try:
            if isinstance(value, (pd.Series, np.ndarray)):
                if len(value) > 0:
                    scalar_value = float(value.iloc[0] if isinstance(value, pd.Series) else value[0])
            elif pd.notna(value):
                scalar_value = float(value)
        except (ValueError, TypeError):
            scalar_value = None # Failed to convert to float

        if scalar_value is None:
            constraints_info['status'] = 'Value is not available or not numeric; cannot check constraints.'
            return constraints_info
        
        # Get the defined limits for this variable from the central config
        limits = self.constraint_limits.get(variable_code, {})
        if not limits:
            return constraints_info

        constraints_info['status'] = 'Within limits'

        # --- Perform guaranteed scalar comparisons ---
        min_limit = limits.get('min')
        max_limit = limits.get('max')

        if min_limit is not None and scalar_value < min_limit:
            constraints_info['is_violated'] = True
            constraints_info['status'] = 'Violated (Below Minimum)'
            constraints_info['details'].append(f"Value ({scalar_value:.2f}) is below the minimum limit of {min_limit:.2f}.")

        if max_limit is not None and scalar_value > max_limit:
            constraints_info['is_violated'] = True
            constraints_info['status'] = 'Violated (Above Maximum)'
            constraints_info['details'].append(f"Value ({scalar_value:.2f}) is above the maximum limit of {max_limit:.2f}.")

        optimal_range = limits.get('optimal_range')
        if optimal_range and isinstance(optimal_range, list) and len(optimal_range) == 2:
            optimal_min, optimal_max = optimal_range
            
            if not (optimal_min <= scalar_value <= optimal_max):
                constraints_info['is_outside_optimal'] = True
                
                if scalar_value < optimal_min:
                    status_detail = 'Below Optimal Range'
                    detail_text = f"Value ({scalar_value:.2f}) is below the optimal range of {optimal_min:.2f}-{optimal_max:.2f}."
                else:
                    status_detail = 'Above Optimal Range'
                    detail_text = f"Value ({scalar_value:.2f}) is above the optimal range of {optimal_min:.2f}-{optimal_max:.2f}."
                
                if not constraints_info['is_violated']:
                    constraints_info['status'] = status_detail
                
                constraints_info['details'].append(detail_text)

        if not constraints_info['details']:
            constraints_info['details'].append("Value is within all defined hard and soft constraints.")

        return constraints_info

    
    def _check_for_afternoon_strategy_shift(self, timestamp: datetime) -> Optional[Dict[str, Any]]:
        """
        (DEFINITIVE V3)
        Checks for the "sacrifice growth for safety" strategy by analyzing the AVERAGE state of
        controls and climate within a tight, relevant window around the event.
        """
        print("DEBUG (Afternoon Shift Check V3): Searching for strategy shift pattern...")
        try:
            # This check is only relevant for events in the late afternoon
            if not (timestamp.hour >= 17 and timestamp.hour <= 20):
                return None
                
            # Define a tight 30-minute window around the event to capture the strategy
            window_start = timestamp - timedelta(minutes=15)
            window_end = timestamp + timedelta(minutes=15)
                
            window_data = self.data.loc[window_start:window_end]
            if window_data.empty:
                return None
                
            # Get relevant columns
            co2_inj_col = self.node_to_column.get('uC')
            vent_col = self.node_to_column.get('uV')
            cool_col = self.node_to_column.get('uQc')
            temp_col = self.node_to_column.get('T')
            hum_col = self.node_to_column.get('H')

            required_cols = [co2_inj_col, vent_col, cool_col, temp_col, hum_col]
            if not all(col and col in window_data.columns for col in required_cols):
                return None
                
            # --- Define the Signature of the "Sacrifice Growth for Safety" Strategy ---
            
            # 1. Growth is abandoned: AVERAGE CO2 injection in the window is very low.
            avg_co2_inj = window_data[co2_inj_col].mean()
            is_growth_sacrificed = avg_co2_inj < 0.01 # Injection is effectively off

            # 2. Safety is prioritized: AVERAGE Ventilation or Cooling in the window is high.
            avg_vent = window_data[vent_col].mean()
            avg_cooling = window_data[cool_col].mean()
            is_safety_prioritized = avg_vent > 0.01 or avg_cooling > 1e-5

            # 3. There is a valid reason: AVERAGE Temperature or Humidity is high.
            avg_temp = window_data[temp_col].mean()
            avg_hum = window_data[hum_col].mean()

            temp_optimal_max = self.constraint_limits.get('T', {}).get('optimal_range', [18, 26])[1]
            is_heat_risk = avg_temp > (temp_optimal_max + 1.0) # Must be at least 1°C above optimal

            humidity_critical_threshold = self.config['reasoning_thresholds'].get('humidity_critical_threshold', 85.0)
            is_humidity_risk = avg_hum > humidity_critical_threshold
            
            is_climate_risk = is_heat_risk or is_humidity_risk
            # <<< END OF FIX >>>
            
            print(f"DEBUG (Afternoon Shift v3): Growth Sacrificed? {is_growth_sacrificed}, Safety Prioritized? {is_safety_prioritized}, Genuine Climate Risk? {is_climate_risk}")

            if is_growth_sacrificed and is_safety_prioritized and is_climate_risk:
                confidence_score = 8.5
                return {
                    'confidence': confidence_score,
                    'primary_reason': "This was a deliberate strategic shift from maximizing growth to ensuring crop safety as evening approached.",
                    'causal_driver_type': "Afternoon Strategy Shift (Safety Priority)",
                    'supporting_evidence': {
                        'Growth Abandoned': f"Average CO₂ injection in the window was {avg_co2_inj:.6f}, effectively off.",
                        'Safety Prioritized': f"Average ventilation was {avg_vent:.6f} and cooling was {avg_cooling:.6f}, indicating active climate control.",
                        'Climate Risk Present': f"Average temperature was {avg_temp:.1f}°C and humidity was {avg_hum:.1f}%, indicating potential risk."
                    }
                }
            
            # --- FALLBACK: Time-based recognition for evening queries ---
            # If it's evening time and no specific control activation occurred,
            # still recognize this as a day-to-night transition scenario
            elif timestamp.hour >= 17 and timestamp.hour <= 20:
                confidence_score = 6.0  # Lower confidence since no controls activated
                return {
                    'confidence': confidence_score,
                    'primary_reason': "This time period represents the scheduled day-to-night transition when the controller shifts from growth optimization to maintenance mode.",
                    'causal_driver_type': "Evening Day-to-Night Transition (Scheduled)",
                    'supporting_evidence': {
                        'Time Context': f"The query is about {timestamp.strftime('%H:%M')} which falls within the typical evening transition window (17:00-20:00).",
                        'Strategic Context': "Even if specific controls didn't activate, this represents the controller's transition from daytime growth mode to nighttime maintenance mode.",
                        'Economic Rationale': "As solar radiation decreases in the evening, the optimal strategy shifts from maximizing photosynthesis to minimizing energy costs."
                    }
                }
                
        except Exception as e:
            print(f"Error in _check_for_afternoon_strategy_shift: {e}")
            traceback.print_exc()
            
        return None
    
    def get_states_typically_affected_by_control(self, control_action_node: str) -> List[str]:
        """
        Identifies state variables typically affected by a given control action node
        by traversing the Knowledge Graph using BFS. It handles both cases:
        1. control_action_node is a 'Reference' (e.g., uV_ref) leading to a 'Control' (uV).
        2. control_action_node is directly a 'Control' (e.g., uV).
        """
        affected_states = set()
        
        if not self.kg.G.has_node(control_action_node):
            print(f"DEBUG - get_states_typically_affected_by_control: Node '{control_action_node}' not found in KG.")
            return []

        # Use a queue for Breadth-First Search (BFS)
        queue = deque([(control_action_node, self.kg.G.nodes[control_action_node].get('type'), 0)])
        visited = {control_action_node} # Keep track of visited nodes to avoid cycles

        max_depth = 5 # Prevent infinite loops and too deep traversals
        
        # print(f"DEBUG - get_states_typically_affected_by_control: Starting BFS from '{control_action_node}' (Type: {self.kg.G.nodes[control_action_node].get('type')})")

        while queue:
            current_node, current_type, depth = queue.popleft()

            if depth > max_depth:
                # print(f"DEBUG - get_states_typically_affected_by_control: Reached max depth at {current_node}.")
                continue

            # If current_node is a State, add it to results
            if current_type == 'State':
                affected_states.add(current_node)
                # print(f"DEBUG - get_states_typically_affected_by_control: Found state: {current_node}")
                continue # No need to go deeper from a state node for *affected states*

            # Explore neighbors
            for neighbor in self.kg.G.successors(current_node):
                if neighbor not in visited:
                    neighbor_type = self.kg.G.nodes[neighbor].get('type')
                    # print(f"DEBUG - get_states_typically_affected_by_control: Exploring edge {current_node} -> {neighbor} (Type: {neighbor_type})")
                    
                    # Only continue traversal through relevant node types
                    if neighbor_type in ['Control', 'Flux', 'Process', 'Reference', 'State']:
                        visited.add(neighbor)
                        queue.append((neighbor, neighbor_type, depth + 1))
        
        # print(f"DEBUG - get_states_typically_affected_by_control: Finished BFS. Affected states found: {list(affected_states)}")
        return list(affected_states)


    def resolve_variable_name(self, variable: str) -> str:
        """
        Resolve variable name to the actual column name in the data.
        Handles the mapping from codes to column names.
        """
        # If it's already a column name, return as is
        if variable in self.data.columns:
            return variable
        
        # Check if it's a code that maps to a column
        if variable in self.code_to_column:
            return self.code_to_column[variable]
        
        # Check case variations
        possible_names = [variable, variable.upper(), variable.lower(), variable.capitalize()]
        for name in possible_names:
            if name in self.data.columns:
                return name
            if name in self.code_to_column:
                return self.code_to_column[name]
        
        return variable  # Return original if not found

    # Then update get_variable_value to use this helper:
    def get_variable_value(self, data: pd.DataFrame, timestamp: datetime, variable: str) -> Optional[float]:
        """
        Get the value of a variable at a specific timestamp.
        Handles cases where variable names might differ in casing.
        """
        try:
            # Resolve the variable name to the actual column name
            resolved_variable = self.resolve_variable_name(variable)
            
            # Find the closest timestamp
            if timestamp not in data.index:
                # Ensure the index is datetime type
                if not isinstance(data.index, pd.DatetimeIndex):
                    print(f"WARNING: DataFrame index is not DatetimeIndex, it's {type(data.index)}")
                    return None
                
                closest_idx = data.index.get_indexer([timestamp], method='nearest')[0]
                if closest_idx < 0 or closest_idx >= len(data.index):
                    return None
                closest_timestamp = data.index[closest_idx]
            else:
                closest_timestamp = timestamp

            # Check if the resolved variable exists in the data
            if resolved_variable in data.columns:
                return data.loc[closest_timestamp, resolved_variable]
            else:
                print(f"Warning: Variable '{variable}' (resolved to '{resolved_variable}') not found in data columns: {list(data.columns)}")
                return None
                
        except KeyError as e:
            print(f"KeyError getting variable '{variable}' value for timestamp {timestamp}: {e}")
            return None
        except Exception as e:
            print(f"Error getting variable '{variable}' value for timestamp {timestamp}: {e}")
            return None

    def _validate_mpc_predictions(self, predicted_violations: list[dict], 
                               actual_data: pd.DataFrame, states_affected: list[str]) -> dict:
        """
        Validates if the predicted violations would have occurred by checking against
        actual historical data. This shows if the control action was effective.
        """
        validation_results = {}
        if actual_data.empty:
            return {'status': 'No actual data available for validation.'}

        for prediction in predicted_violations:
            state_node = prediction['state_node']
            state_col = self.node_to_column.get(state_node)
            if not state_col or state_col not in actual_data.columns:
                continue
            
            state_name = self.node_to_name.get(state_node, state_node)
            actual_series = actual_data[state_col]
            
            # Check if the actual data violated the predicted limit
            limit_value = prediction['projected_violations'][0]['limit_value']
            limit_type = prediction['projected_violations'][0]['limit_type']
            
            actually_violated = False
            if limit_type == 'max' and actual_series.max() > limit_value:
                actually_violated = True
            elif limit_type == 'min' and actual_series.min() < limit_value:
                actually_violated = True

            conclusion = ""
            if not actually_violated:
                conclusion = f"The control action appears **justified and effective**, as the predicted violation for {state_name} did not occur."
            else:
                conclusion = f"The control action was **justified but may have been insufficient**, as {state_name} still violated the limit."
                
            validation_results[state_node] = {
                'predicted_violation_type': limit_type,
                'limit_value': limit_value,
                'actual_outcome': "Violation Prevented" if not actually_violated else "Violation Still Occurred",
                'conclusion': conclusion
            }
        return validation_results
    
    def analyze_mpc_predictive_reasoning(self, control_action_node: str, action_timestamp: datetime, 
                                     prediction_horizon_hours: int = 6) -> Dict[str, Any]:
        """
        Analyzes why MPC took a control action based on predicted future violations and disturbances.
        This captures the predictive nature of MPC rather than just reactive constraint checking.
        
        Args:
            control_action_node: The control action being analyzed
            action_timestamp: When the action was taken
            prediction_horizon_hours: How far ahead MPC was looking (e.g., 6 hours)
        """
        
        mpc_reasoning = {
            'action_timestamp': action_timestamp,
            'prediction_horizon_hours': prediction_horizon_hours,
            'predicted_violations': [],
            'disturbance_impacts': [],
            'trend_projections': [],
            'optimization_factors': [],
            'summary': ''
        }
        
        # Define the prediction window (from action time to horizon)
        prediction_end = action_timestamp + timedelta(hours=prediction_horizon_hours)
        
        # Get prediction window data (if available in historical data for validation)
        prediction_data = self.data.loc[action_timestamp:prediction_end] if not self.data.empty else pd.DataFrame()
        
        # 1. ANALYZE PREDICTED CONSTRAINT VIOLATIONS
        states_affected = self.get_states_typically_affected_by_control(control_action_node)
        
        for state_node in states_affected:
            state_col = self.node_to_column.get(state_node)
            if not state_col or state_col not in self.data.columns:
                continue
                
            # Get current state value at action time
            try:
                current_state = self.data.asof(action_timestamp)[state_col]
            except:
                continue
                
            # Analyze trend leading up to action
            lookback_start = action_timestamp - timedelta(hours=2)  # Look 2 hours back for trend
            trend_data = self.data.loc[lookback_start:action_timestamp][state_col]
            
            if len(trend_data) > 1:
                # Calculate trend rate (units per hour)
                time_diff_hours = (trend_data.index[-1] - trend_data.index[0]).total_seconds() / 3600
                if time_diff_hours > 0:
                    trend_rate = (trend_data.iloc[-1] - trend_data.iloc[0]) / time_diff_hours
                    
                    # Project where state would go without control action
                    projected_values = []
                    for hours_ahead in range(1, prediction_horizon_hours + 1):
                        projected_value = current_state + (trend_rate * hours_ahead)
                        projected_time = action_timestamp + timedelta(hours=hours_ahead)
                        projected_values.append({
                            'time': projected_time,
                            'projected_value': projected_value,
                            'hours_ahead': hours_ahead
                        })
                    
                    # Check if projections would violate constraints
                    limits_config = self.constraint_limits.get(state_node, {})
                    violations_found = []
                    
                    for proj in projected_values:
                        violations = self._check_projected_violations(
                            proj['projected_value'], 
                            limits_config, 
                            proj['time'],
                            state_node
                        )
                        if violations:
                            violations_found.extend(violations)
                    
                    if violations_found:
                        mpc_reasoning['predicted_violations'].append({
                            'state_node': state_node,
                            'state_name': self.node_to_name.get(state_node, state_node),
                            'current_value': current_state,
                            'trend_rate_per_hour': trend_rate,
                            'projected_violations': violations_found,
                            'earliest_violation_time': min(v['violation_time'] for v in violations_found),
                            'reasoning': f"Without control action, {self.node_to_name.get(state_node, state_node)} "
                                    f"trending at {trend_rate:.2f}/hr would violate constraints"
                        })
        
        # 2. ANALYZE DISTURBANCE IMPACTS
        disturbance_impacts = self._analyze_disturbance_predictions(
            action_timestamp, 
            prediction_horizon_hours,
            control_action_node
        )
        mpc_reasoning['disturbance_impacts'] = disturbance_impacts
        
        # 3. ANALYZE REFERENCE TRACKING vs CONSTRAINTS
        reference_tracking = self._analyze_reference_tracking_conflicts(
            action_timestamp,
            control_action_node,
            prediction_horizon_hours
        )
        mpc_reasoning['optimization_factors'] = reference_tracking
        
        prediction_end_time = action_timestamp + timedelta(hours=prediction_horizon_hours)
        actual_data_in_horizon = self.data.loc[action_timestamp:prediction_end_time]
        
        if not actual_data_in_horizon.empty:
            validation_results = self._validate_mpc_predictions(
                mpc_reasoning['predicted_violations'],
                actual_data_in_horizon,
                states_affected
            )
            mpc_reasoning['prediction_validation'] = validation_results
        
        # Generate summary
        mpc_reasoning['summary'] = self._generate_mpc_reasoning_summary(mpc_reasoning)
        
        return mpc_reasoning

    def _check_for_simultaneous_driver_reaction(self, control_action_node: str, timestamp: datetime) -> Dict[str, Any] | None:
        """
        (NEW EXPERT V2 - Robust Significance Check)
        Checks if a control action was a direct, simultaneous reaction to a change
        in its primary external driver using absolute, context-aware thresholds.
        """
        print("DEBUG (Simultaneous Reaction Expert V2): Checking for lock-step driver changes...")
        
        try:
            event_idx = self.data.index.get_loc(self.find_nearest_timestamp(timestamp))
            if event_idx == 0: return None
            data_at_event = self.data.iloc[event_idx]
            data_before_event = self.data.iloc[event_idx - 1]
        except Exception as e:
            print(f"DEBUG (Simultaneous Reaction V2): Error getting data - {e}")
            return None

        control_to_driver_map = { 'uQc': 'Qrad', 'uQh': 'Tout', 'uC': 'Qrad', 'uV': 'Hout' }
        driver_node = control_to_driver_map.get(control_action_node)
        if not driver_node: return None

        control_col = self.node_to_column.get(control_action_node)
        driver_col = self.node_to_column.get(driver_node)

        if not all(pd.notna(data_at_event.get(c)) and pd.notna(data_before_event.get(c)) for c in [control_col, driver_col]):
            return None

        control_change = data_at_event[control_col] - data_before_event[control_col]
        driver_change = data_at_event[driver_col] - data_before_event[driver_col]

        # --- THE DEFINITIVE FIX: Use robust, absolute thresholds for significance ---
        driver_thresholds = {
            'Qrad': 50.0,  # A change of 50 W/m^2 in 5 mins is always significant.
            'Tout': 1.0,   # A change of 1.0 °C in 5 mins is significant.
            'Hout': 5.0,   # A change of 5.0 %RH in 5 mins is significant.
        }
        significance_threshold = driver_thresholds.get(driver_node, 999) # Default to a high number
        is_driver_change_significant = abs(driver_change) > significance_threshold
        # --- END OF FIX ---

        # For cooling/heating, the reaction should be in the same direction as the driver change.
        is_correlated_change = np.sign(control_change) == np.sign(driver_change)

        print(f"[DEBUG V2] Driver: {driver_node}, Change: {driver_change:.2f}, Is Significant? {is_driver_change_significant} (Threshold: >{significance_threshold})")
        print(f"[DEBUG V2] Control: {control_action_node}, Change: {control_change:.5f}, Is Correlated? {is_correlated_change}")

        if is_driver_change_significant and is_correlated_change:
            confidence_score = 9.5
            driver_name = self.node_to_name.get(driver_node, driver_node)
            action_name = self.node_to_name.get(control_action_node, control_action_node)
            
            print(f"SUCCESS (Simultaneous Reaction Expert V2): Found lock-step reaction. Score: {confidence_score}")
            return {
                'confidence': confidence_score,
                'primary_reason': f"The sudden drop in the {action_name} was an immediate and correct reaction to a simultaneous drop in its primary driver, the {driver_name}.",
                'causal_driver_type': "Simultaneous Driver Reaction",
                'supporting_evidence': {
                    'The External Event': f"At {timestamp.strftime('%H:%M')}, the {driver_name} dropped sharply from {data_before_event[driver_col]:.2f} to {data_at_event[driver_col]:.2f}, likely due to cloud cover.",
                    'The Controller\'s Response': f"The controller reacted instantly, reducing the {action_name} from {data_before_event[control_col]:.5f} to {data_at_event[control_col]:.5f} to match the reduced heat load.",
                    'Conclusion': "This demonstrates a highly responsive and well-tuned controller that efficiently adjusts its actions in real-time to external environmental changes, saving energy by not over-cooling when the heat load is gone."
                }
            }
            
        return None

    def _check_projected_violations(self, projected_value: float, limits_config: dict, 
                                violation_time: datetime, state_node: str) -> List[Dict]:
        """Check if a projected value would violate constraints."""
        violations = []
        
        for limit_type in ['max_node', 'min_node']:
            if limit_type not in limits_config:
                continue
                
            limit_node = limits_config[limit_type]
            if not self.kg.has_node(limit_node):
                continue
                
            limit_value = self.kg.G.nodes[limit_node].get('value')
            if limit_value is None:
                continue
            
            is_violation = False
            severity = 'none'
            
            if limit_type == 'max_node' and projected_value > limit_value:
                is_violation = True
                severity = 'high' if projected_value > (limit_value + self.constraint_tolerance) else 'medium'
            elif limit_type == 'min_node' and projected_value < limit_value:
                is_violation = True
                severity = 'high' if projected_value < (limit_value - self.constraint_tolerance) else 'medium'
            
            if is_violation:
                violations.append({
                    'limit_type': limit_type.replace('_node', ''),
                    'limit_value': limit_value,
                    'projected_value': projected_value,
                    'violation_magnitude': abs(projected_value - limit_value),
                    'violation_time': violation_time,
                    'severity': severity
                })
        
        return violations

    def analyze_lagrangian_state(self, lagrangian_nodes: list, date: datetime.date) -> Dict[str, Any]:
        """
        (FINAL, DYNAMIC VERSION)
        Analyzes the state of Lagrangian multipliers over a full day and provides a dynamic
        interpretation based on their actual behavior (negative, positive, or near-zero).
        """
        analysis = {'results': {}}
        expert_explanations = {
        'B_ieq': (
            "This is the mathematical representation of the controller's primary objective: **profit maximization through biomass growth**. "
            "A consistently negative value for the biomass Lagrangian signifies a strong, persistent incentive to increase the final crop yield. The magnitude of this signal represents the marginal economic value of each additional unit of biomass. "
            "This is the **root driver** that justifies aggressive daytime strategies, as the future profit outweighs the immediate costs."
        ),
        'B_eq': (
            "This extreme spike is a numerical artifact that reveals a key aspect of the MPC's internal model. It signifies the moment of the **discontinuous switch in the biomass dynamic model** from its daytime to its nighttime formulation. "
            "At this moment, the photosynthesis term in the growth model is abruptly set to zero. This creates an instantaneous, massive residual in the equality constraint for the biomass dynamics, which manifests as this spike in the corresponding Lagrangian multiplier. "
            "It is the optimizer's way of handling the hard switch between the 'growth' and 'maintenance' models."
            ),
        'uC_ieq': (
            "This saturated signal is the optimizer's definitive **demand signal**. A negative Lagrangian on a control variable indicates that the optimizer wants to **increase** the control action but is being constrained by a limit. "
            "In this case, the sustained saturation at the lower limit means that for the entire daytime period, the optimizer wanted to inject **even more CO₂** than it was commanding, but was constrained by the physical maximum capacity of the injection hardware or a pre-set upper limit. "
            "This is the signature of an extremely aggressive growth strategy where the economic value of biomass was so high that the optimizer's ideal action was to run the CO₂ system at 100% capacity."
        ),
        'uQh_ieq': (
        "This saturated signal is the optimizer's definitive **demand signal**. A negative Lagrangian on a control variable means the optimizer wants to **increase** the action but is constrained by an upper limit. "
        "In this case, the sustained negative value means the optimizer wanted to apply **even more heating** but was already running the heating system at its maximum capacity. This indicates the greenhouse was experiencing severe heat loss, likely due to a very cold night or unmodeled factors like high wind."
        ),
        'uV_ieq': (
            "This saturated signal is the optimizer's definitive **demand signal**. A negative Lagrangian on a control variable means the optimizer wants to **increase** the action but is constrained by an upper limit. "
            "In this case, the sustained negative value means the optimizer wanted to **open the vents even wider** but they were already at their maximum opening. This signifies a strong, persistent need for either cooling or dehumidification that was being met as aggressively as possible with the ventilation system."
        )
        }
        try:
            day_data = self.data[self.data.index.date == date]
            if day_data.empty:
                return {'error': 'No data for the specified day.'}
        except Exception as e:
            return {'error': f'Error slicing data: {e}'}

        for lg_node in lagrangian_nodes:
            # --- THE FIX IS HERE: Check for a specific expert explanation FIRST ---
            if lg_node in expert_explanations:
                analysis['results'][lg_node] = {
                    'node_name': self.node_to_name.get(lg_node, lg_node),
                    'interpretation': expert_explanations[lg_node]
                }
                continue # Go to the next node in the loop

            # --- FALLBACK to dynamic analysis if no specific explanation exists ---
            lg_col = self.node_to_column.get(lg_node)
            if not lg_col or lg_col not in day_data.columns:
                analysis['results'][lg_node] = {'error': 'Data column not found.'}
                continue

            series = day_data[lg_col]
            mean_val = series.mean()
            std_dev = series.std()
            
            # Find the associated state/control variable
            related_var = next((key for key, val in self.lagrangian_nodes.items() if lg_node in val.values()), None)
            related_var_name = self.node_to_name.get(related_var, "the associated variable")
            
            interpretation = ""
            
            # --- DYNAMIC INTERPRETATION LOGIC ---
            
            # Condition 1: Is the signal consistently and significantly NEGATIVE?
            if mean_val < -1e-9 and (std_dev / abs(mean_val)) < 0.5: # Negative and relatively stable
                if related_var == 'B': # Special case for Biomass
                    interpretation = (
                        "This is the mathematical representation of the controller's primary objective: **profit maximization through biomass growth**. "
                        "A consistently negative value for the biomass Lagrangian signifies a strong, persistent incentive to increase the final crop yield. The magnitude of this signal represents the marginal economic value of each additional unit of biomass. "
                        "This is the **root driver** that justifies aggressive daytime strategies like high CO₂ injection, as the future profit from the resulting growth outweighs the immediate operational costs."
                    )
                else:
                    interpretation = (
                        f"This indicates the controller's primary objective was to prevent the **{related_var_name}** from falling below its **LOWER limit**. "
                        "The consistently negative signal shows this was a persistent pressure, and the controller was actively working to keep the state above its minimum safe value."
                    )

            # Condition 2: Is the signal consistently and significantly POSITIVE?
            elif mean_val > 1e-9 and (std_dev / abs(mean_val)) < 0.5: # Positive and relatively stable
                interpretation = (
                    f"This indicates the controller's primary objective was to prevent the **{related_var_name}** from exceeding its **UPPER limit**. "
                    "The consistently positive signal shows this was a persistent pressure, and the controller was actively working to keep the state below its maximum safe value."
                )

            # Condition 3: Is the signal flat and near-zero?
            elif abs(mean_val) < 1e-9 and std_dev < 1e-9:
                interpretation = (
                    f"The **{related_var_name}** system was **unconstrained**. This is the mathematical proof that the variable was operating comfortably within its limits. "
                    "The controller had full freedom to act, but the optimal strategy did not require any intervention for this variable."
                )
            
            # Fallback for other cases (e.g., highly oscillatory)
            else:
                interpretation = (
                    f"The signal for the **{related_var_name}** was **active and dynamic**. This means the controller was constantly adjusting its effort to manage this variable in response to changing conditions, balancing it against other objectives."
                )
                
            analysis['results'][lg_node] = {
                'node_name': self.node_to_name.get(lg_node, lg_node),
                'interpretation': interpretation
            }
            
        return analysis


    def format_lagrangian_state_for_llm(self, analysis_result: Dict[str, Any]) -> str:
        """
        (FINAL, FOCUSED VERSION)
        Formats the dynamic Lagrangian analysis, presenting ONLY the specific interpretation
        that was generated by the analysis function.
        """
        if 'error' in analysis_result:
            return f"Error: {analysis_result['error']}"

        context = ["--- Analysis of Controller Objective Function ---"]
        
        # This loop will now only have one item for this query, but it's good practice
        for lg_node, result in analysis_result.get('results', {}).items():
            if 'error' in result:
                context.append(f"\n- For {lg_node}: {result['error']}")
            else:
                # --- THE KEY CHANGE IS HERE ---
                # We ONLY pass the specific, dynamically generated interpretation.
                # We no longer add any generic text about "flat and near-zero".
                context.append(f"\n**Signal:** {result['node_name']} ({lg_node})")
                context.append(f"\n**Implication:** {result['interpretation']}")
                
        return "\n".join(context)


    def _get_states_affected_by_disturbance(self, disturbance_node: str) -> List[str]:
        """
        Identifies which state variables are affected by a given disturbance node.
        It checks PCMCI results first, then falls back to the knowledge graph.
        (This version includes enhanced safety checks for array bounds).
        """
        affected_states = set()

        # 1. Prioritize PCMCI results
        if self.pcmci_results and 'graph' in self.pcmci_results:
            dist_idx = self.pcmci_var_to_idx.get(disturbance_node)
            if dist_idx is not None:
                causal_graph = self.pcmci_results['graph']
                p_matrix = self.pcmci_results['p_matrix']
                var_names = self.pcmci_results['var_names']
                # The 'alpha' key should now exist thanks to the fix in run_pcmci
                alpha = self.pcmci_results.get('alpha', 0.05) # Use get with default for safety

                for lag in range(1, len(causal_graph)):
                    for target_idx, target_var_name in enumerate(var_names):
                        # --- ROBUSTNESS IMPROVEMENT: Check array bounds before access ---
                        if (target_idx < causal_graph[lag].shape[0] and
                            dist_idx < causal_graph[lag].shape[1] and
                            causal_graph[lag][target_idx, dist_idx] == '-->' and
                            p_matrix[lag][target_idx, dist_idx] < alpha):

                            if self.kg.G.nodes[target_var_name].get('type') == 'State':
                                affected_states.add(target_var_name)

        # 2. Fallback to Knowledge Graph
        if not affected_states and self.kg.has_node(disturbance_node):
            for successor in self.kg.G.successors(disturbance_node):
                if self.kg.G.nodes[successor].get('type') == 'State':
                    affected_states.add(successor)

        return list(affected_states)
    
    def _get_causal_strength(self, source_node: str, target_node: str) -> float:
        """
        Finds the strongest significant causal strength between a source and a target node.
        (This version includes enhanced safety checks for array bounds).
        """
        if not self.pcmci_results or 'val_matrix' not in self.pcmci_results:
            return 0.0

        source_idx = self.pcmci_var_to_idx.get(source_node)
        target_idx = self.pcmci_var_to_idx.get(target_node)

        if source_idx is None or target_idx is None:
            return 0.0

        max_abs_strength = 0.0
        final_strength = 0.0

        causal_graph = self.pcmci_results['graph']
        val_matrix = self.pcmci_results['val_matrix']
        p_matrix = self.pcmci_results['p_matrix']
        # The 'alpha' key should now exist thanks to the fix in run_pcmci
        alpha = self.pcmci_results.get('alpha', 0.05) # Use get with default for safety

        for lag in range(1, len(causal_graph)):
            # --- ROBUSTNESS IMPROVEMENT: Check array bounds before access ---
            if (target_idx < causal_graph[lag].shape[0] and
                source_idx < causal_graph[lag].shape[1] and
                causal_graph[lag][target_idx, source_idx] == '-->' and
                p_matrix[lag][target_idx, source_idx] < alpha):
                
                current_strength = val_matrix[lag][target_idx, source_idx]
                
                if abs(current_strength) > max_abs_strength:
                    max_abs_strength = abs(current_strength)
                    final_strength = current_strength

        return final_strength
    
    def _analyze_disturbance_predictions(self, action_timestamp: datetime, 
                                    horizon_hours: int, control_action_node: str) -> List[Dict]:
        """
        Analyze how predicted disturbances would affect system without control action.
        This simulates what MPC's disturbance model might have predicted.
        """
        disturbance_impacts = []
        
        # Look for disturbance variables in the system
        disturbance_nodes = [node for node in self.kg.G.nodes() 
                            if self.kg.G.nodes[node].get('type') == 'Disturbance']
        
        for dist_node in disturbance_nodes:
            dist_col = self.node_to_column.get(dist_node)
            if not dist_col or dist_col not in self.data.columns:
                continue
            
            # Analyze recent disturbance pattern
            lookback_start = action_timestamp - timedelta(hours=6)
            dist_data = self.data.loc[lookback_start:action_timestamp][dist_col]
            
            if len(dist_data) > 2:
                # Simple trend analysis for disturbance
                recent_trend = dist_data.diff().tail(3).mean()  # Average recent change
                current_level = dist_data.iloc[-1]
                
                # Find which states this disturbance affects using KG or PCMCI
                affected_states = self._get_states_affected_by_disturbance(dist_node)
                
                for state_node in affected_states:
                    # Get causal relationship strength from PCMCI if available
                    causal_strength = self._get_causal_strength(dist_node, state_node)
                    
                    if causal_strength:
                        predicted_impact = recent_trend * causal_strength * horizon_hours
                        
                        disturbance_impacts.append({
                            'disturbance_node': dist_node,
                            'disturbance_name': self.node_to_name.get(dist_node, dist_node),
                            'affected_state': state_node,
                            'affected_state_name': self.node_to_name.get(state_node, state_node),
                            'current_disturbance_level': current_level,
                            'recent_trend': recent_trend,
                            'predicted_impact_magnitude': abs(predicted_impact),
                            'impact_direction': 'increase' if predicted_impact > 0 else 'decrease',
                            'causal_strength': causal_strength,
                            'reasoning': f"Disturbance {self.node_to_name.get(dist_node, dist_node)} "
                                    f"trending {recent_trend:+.2f} would cause "
                                    f"{self.node_to_name.get(state_node, state_node)} to "
                                    f"{'increase' if predicted_impact > 0 else 'decrease'} "
                                    f"by ~{abs(predicted_impact):.2f} over {horizon_hours}h"
                        })
        
        return disturbance_impacts

    def _analyze_reference_tracking_conflicts(self, action_timestamp: datetime, 
                                            control_action_node: str, horizon_hours: int) -> List[Dict]:
        """
        Analyze situations where MPC had to compromise between reference tracking and constraints.
        """
        optimization_factors = []
        
        # Find setpoint/reference nodes for states affected by this control
        states_affected = self.get_states_typically_affected_by_control(control_action_node)
        
        for state_node in states_affected:
            # Look for setpoint nodes
            setpoint_nodes = [node for node in self.kg.G.nodes() 
                            if 'setpoint' in node.lower() and state_node.lower() in node.lower()]
            
            for setpoint_node in setpoint_nodes:
                if not self.kg.has_node(setpoint_node):
                    continue
                    
                setpoint_value = self.kg.G.nodes[setpoint_node].get('value')
                if setpoint_value is None:
                    continue
                
                # Get current state value
                state_col = self.node_to_column.get(state_node)
                if state_col and state_col in self.data.columns:
                    try:
                        current_state = self.data.asof(action_timestamp)[state_col]
                        tracking_error = abs(current_state - setpoint_value)
                        
                        # Check if constraints would prevent reaching setpoint
                        limits_config = self.constraint_limits.get(state_node, {})
                        constraint_prevents_tracking = self._check_constraint_setpoint_conflict(
                            setpoint_value, limits_config
                        )
                        
                        if constraint_prevents_tracking or tracking_error > self.constraint_tolerance:
                            optimization_factors.append({
                                'state_node': state_node,
                                'state_name': self.node_to_name.get(state_node, state_node),
                                'setpoint_node': setpoint_node,
                                'setpoint_value': setpoint_value,
                                'current_value': current_state,
                                'tracking_error': tracking_error,
                                'constraint_conflict': constraint_prevents_tracking,
                                'reasoning': f"MPC balanced tracking {self.node_to_name.get(state_node, state_node)} "
                                        f"setpoint ({setpoint_value:.2f}) with constraint limits"
                            })
                    except:
                        continue
        
        return optimization_factors

    def _check_for_sensor_fault_event(self, timestamp: datetime, control_action_node: str) -> Dict[str, Any] | None:
        """
        (NEW HIERARCHY MODULE - HIGHEST PRIORITY)
        Checks for evidence of a critical sensor failure or data anomaly, which would
        be the true root cause of any subsequent "emergency" actions.
        """
        print(f"DEBUG (Fault Diagnosis V3): Diagnosing potential faults at {timestamp}, focusing on.")

        try:
            event_idx = self.data.index.get_loc(self.find_nearest_timestamp(timestamp))
            if event_idx == 0: return {'error': "Cannot analyze fault at the first data point."}
            data_at_event = self.data.iloc[event_idx]
            data_before_event = self.data.iloc[event_idx - 1]
        except Exception as e:
            return {'error': f"Could not retrieve data for fault analysis: {e}"}

        potential_faults = []
        
        # --- Function to check a specific fault signature ---
        def check_signature(node_to_check, fault_info):
            try:
                # Evidence 1: The physically impossible change (The "Smoking Gun")
                sensor_col = self.node_to_column.get(node_to_check)
                change = data_at_event.get(sensor_col, 0) - data_before_event.get(sensor_col, 0)
                e1_impossible_change = abs(change) > fault_info['threshold']
                
                # Evidence 2 & 3: Extreme optimizer reaction
                lg1_col = self.node_to_column.get(fault_info['lagrangian1'])
                lg2_col = self.node_to_column.get(fault_info['lagrangian2'])
                lg1_val = data_at_event.get(lg1_col, 0)
                lg2_val = data_at_event.get(lg2_col, 0)
                e2_lg1_saturated = abs(lg1_val) > 10.0
                e3_lg2_saturated = abs(lg2_val) > 10.0

                # --- WEIGHTED CONFIDENCE SCORING ---
                # The impossible data point is the most important piece of evidence.
                confidence = (5.0 * e1_impossible_change) + (1.5 * e2_lg1_saturated) + (1.5 * e3_lg2_saturated)
                
                if confidence > 4: # Require at least the smoking gun
                    potential_faults.append({
                        'confidence': confidence,
                        'fault_type': f"{fault_info['name']} Sensor/Data Anomaly",
                        'explanation': f"The event was triggered by a catastrophic anomaly in the {fault_info['name']} data, which reported a physically impossible value.",
                        'evidence': {
                            "Root Cause (Sensor Data)": f"The data for {fault_info['name']} showed an impossible change of {change:.1f} units in a single 5-minute step.",
                            "Optimizer Reaction (System Shock)": f"The optimizer immediately responded to this nonsensical data by saturating its control outputs, proven by the extreme spikes in the control Lagrangians ({fault_info['lagrangian1']}: {lg1_val:.2E}, {fault_info['lagrangian2']}: {lg2_val:.2E}).",
                            "System Action (Defensive Purge)": "The system's subsequent actions were a safety response to this data fault, not a normal climate control strategy."
                        }
                    })
            except Exception as e:
                print(f"DEBUG - Error during {fault_info['name']} fault check: {e}")

        # --- Define all known fault signatures ---
        fault_signatures = {
            'H': {
                'name': 'Humidity', 'threshold': 50.0,
                'lagrangian1': 'uQh_ieq', 'lagrangian2': 'uC_ieq'
            },
            'T': {
                'name': 'Temperature', 'threshold': 10.0,
                'lagrangian1': 'uQh_ieq', 'lagrangian2': 'uQc_ieq'
            },
            'C': {
                'name': 'CO₂ Concentration', 'threshold': 200.0, # A >200ppm jump/drop in 5 mins is impossible
                'lagrangian1': 'uC_ieq',  # Primary reaction: CO₂ injection Lagrangian
                'lagrangian2': 'uQh_ieq'  # Secondary reaction: Heating system is often linked
            }
        }

        # --- Analysis Logic ---
        # 1. First, check the fault related to the user's primary query.
        if control_action_node in fault_signatures:
            check_signature(control_action_node, fault_signatures[control_action_node])

        # 2. If no strong evidence was found for the primary node, check all others.
        # This helps catch cases where a fault in one system causes an anomaly in another.
        if not potential_faults:
            for node, info in fault_signatures.items():
                if node != control_action_node:
                    check_signature(node, info)

        # --- Final Diagnosis ---
        if not potential_faults:
            return {
                'fault_type': "No Specific Fault Detected",
                'explanation': f"While the event at {timestamp.strftime('%H:%M')} was unusual, it does not match any known fault signatures in my diagnostic system. It may be an unmodeled physical effect or a different type of anomaly.",
                'evidence': {}
            }

        # Select the fault with the highest confidence score
        best_diagnosis = max(potential_faults, key=lambda x: x['confidence'])
        return best_diagnosis

    def _analyze_control_action_over_range(self, control_action_node: str, start_dt: datetime, end_dt: datetime, other_nodes: List[str]) -> Dict[str, Any]:
        """
        (FINAL, COMPLETE VERSION)
        Analyzes sustained strategies and calculates the relevant "Observed Effect"
        for each scenario, providing a complete picture of the action and its outcome.
        """
        print(f"DEBUG - Performing AGGREGATE analysis for '{control_action_node}' from {start_dt} to {end_dt}.")
        
        analysis_result = {
            'control_action_node': control_action_node,
            'action_name': self.node_to_name.get(control_action_node, control_action_node),
            'start_time': start_dt,
            'end_time': end_dt,
            'primary_reason': "The reason for the sustained action could not be determined from aggregate data.",
            'causal_driver_type': "Unknown Strategy",
            'supporting_evidence': {},
            'observed_effects': {} # Initialize the key
        }
        primary_goal_identified = False
        try:
            window_data = self.data.loc[start_dt:end_dt]
            if window_data.empty:
                analysis_result['error'] = "No data available in the specified range."
                return analysis_result
        except Exception as e:
            analysis_result['error'] = f"Error slicing data for range analysis: {e}"
            return analysis_result

        # --- STRATEGY 1: Opportunistic Profit Maximization (uC) ---
        if control_action_node == 'uC':
            biomass_lg_col = self.node_to_column.get('B_ieq')
            qrad_col = self.node_to_column.get('Qrad')
            co2_inj_col = self.node_to_column.get('uC')  # Get the actual CO2 injection column
            primary_goal_identified = True
            
            # *** CRITICAL CHECK: Verify CO2 injection was actually active ***
            avg_co2_inj = window_data[co2_inj_col].abs().mean()
            max_co2_inj = window_data[co2_inj_col].abs().max()
            co2_active_threshold = 1e-6  # Realistic threshold for active CO2 injection
            
            print(f"DEBUG (uC Range Analysis): avg_co2_inj={avg_co2_inj:.2E}, max_co2_inj={max_co2_inj:.2E}, threshold={co2_active_threshold:.2E}")
            
            if avg_co2_inj < co2_active_threshold and max_co2_inj < co2_active_threshold:
                # CO2 injection was NOT actually active
                analysis_result.update({
                    'primary_reason': "CO2 injection was not activated during this period.",
                    'causal_driver_type': "No Action Taken (Zero CO2 Injection)",
                    'supporting_evidence': {
                        'Actual CO2 Injection': f"The CO2 injection system was essentially inactive (avg: {avg_co2_inj:.2E}, max: {max_co2_inj:.2E}).",
                        'Possible Reasons': "This could be due to nighttime conditions (no photosynthesis), low light levels, or economic constraints."
                    }
                })
                analysis_result['observed_effects']['summary'] = "No significant CO2 injection activity was detected during this period."
                return analysis_result
            
            avg_biomass_lg = window_data[biomass_lg_col].mean()
            avg_qrad = window_data[qrad_col].mean()
            
            if avg_biomass_lg < -1e-9 and avg_qrad > self.config['reasoning_thresholds']['Qrad_photosynthesis']:
                analysis_result.update({
                    'primary_reason': "This was a deliberate profit-maximization strategy, sustained to capitalize on ideal growing conditions.",
                    'causal_driver_type': "Opportunistic Action (Profit Maximization)",
                    'supporting_evidence': {
                        'Optimizer\'s Objective': f"The Biomass Lagrangian was consistently negative (avg: {avg_biomass_lg:.2E}), proving the goal was to maximize final crop yield.",
                        'Favorable Conditions': f"This was viable due to high solar radiation (avg: {avg_qrad:.2f} W/m²)."
                    }
                })
                
                # ** CALCULATE THE OBSERVED EFFECT **
                bio_ref_col = self.node_to_column.get('B_ref')
                start_biomass = window_data[bio_ref_col].iloc[0]
                end_biomass = window_data[bio_ref_col].iloc[-1]
                change = end_biomass - start_biomass
                analysis_result['observed_effects']['summary'] = f"As a direct result of this strategy, the projected biomass increased by {change:.2f} units (from {start_biomass:.2f} to {end_biomass:.2f}) during this period."
                return analysis_result

        # --- STRATEGY 2: Defensive Heating (uQh) ---
        elif control_action_node == 'uQh':
            tieq_lg_col = self.node_to_column.get('T_ieq')
            hieq_lg_col = self.node_to_column.get('H_ieq')
            tout_col = self.node_to_column.get('Tout')
            heat_ref_col = self.node_to_column.get('uQh')  # Get the actual heating column
            primary_goal_identified = True
            
            # *** CRITICAL CHECK: Verify heating was actually active ***
            avg_heat_ref = window_data[heat_ref_col].abs().mean()
            max_heat_ref = window_data[heat_ref_col].abs().max()
            # Use a more realistic threshold - heating values should be at least 1e-6 to be considered "active"
            # The tiny values like 1e-9 are essentially noise/numerical zeros
            heating_active_threshold = 1e-6  # 0.000001 is still tiny but distinguishes real heating from zero
            
            print(f"DEBUG (uQh Range Analysis): avg_heat_ref={avg_heat_ref:.2E}, max_heat_ref={max_heat_ref:.2E}, threshold={heating_active_threshold:.2E}")
            
            if avg_heat_ref < heating_active_threshold and max_heat_ref < heating_active_threshold:
                # Heating was NOT actually active - this is the key issue!
                analysis_result.update({
                    'primary_reason': "Heating was not activated during this period. In summer conditions, the MPC likely defended against temperature drops by reducing cooling instead.",
                    'causal_driver_type': "No Action Taken (Zero Heating)",
                    'supporting_evidence': {
                        'Actual Heating': f"The heating system was essentially inactive (avg: {avg_heat_ref:.2E}, max: {max_heat_ref:.2E}).",
                        'Alternative Strategy': "The controller may have used cooling reduction or other passive strategies to manage temperature.",
                        'Season Context': "Summer conditions mean active heating is rarely needed; the system relies on modulating cooling/ventilation instead."
                    }
                })
                # Check if cooling was reduced instead
                cool_ref_col = self.node_to_column.get('uQc')
                if cool_ref_col in window_data.columns:
                    avg_cooling = window_data[cool_ref_col].mean()
                    analysis_result['observed_effects']['summary'] = f"Instead of heating, the controller likely reduced cooling (avg: {avg_cooling:.2E}) to defend against temperature drops during this period."
                else:
                    analysis_result['observed_effects']['summary'] = "No significant heating activity was detected. The controller used alternative passive strategies."
                return analysis_result
            
            # Calculate the average "effort" for each constraint
            avg_abs_tieq_lg = window_data[tieq_lg_col].abs().mean()
            avg_abs_hieq_lg = window_data[hieq_lg_col].abs().mean()
            
            # --- <<< THE DEFINITIVE FIX IS HERE >>> ---
            # Compare the magnitude of the problems to find the true priority.
            # We add a small epsilon to avoid division by zero.
            epsilon = 1e-12
            if avg_abs_tieq_lg > avg_abs_hieq_lg + epsilon:
                # Temperature is the clear priority
                analysis_result.update({
                    'primary_reason': "This was a sustained defensive heating strategy to prevent the indoor temperature from violating the minimum safety limit.",
                    'causal_driver_type': "Defensive Action (Constraint Avoidance)",
                    'supporting_evidence': {
                        'Optimizer\'s Objective': f"The Temperature Lagrangian was consistently and strongly negative (avg abs value: {avg_abs_tieq_lg:.2E}), proving the controller was fighting to keep the temperature from falling. This was a much stronger signal than the humidity constraint (avg abs value: {avg_abs_hieq_lg:.2E}).",
                        'Environmental Pressure': f"This was necessary due to persistent cold outside (average Tout: {window_data[tout_col].mean():.2f}°C)."
                    }
                })
                # Calculate observed effect for temperature
                temp_col = self.node_to_column.get('T')
                # (You can use your robust V11 effect analyzer logic here if you abstract it)
                start_val, end_val = window_data[temp_col].iloc[0], window_data[temp_col].iloc[-1]
                analysis_result['observed_effects']['summary'] = f"The strategy was successful. The heating system mitigated the temperature drop, with the value changing by only {end_val - start_val:.2f}°C over the 6-hour period."
                return analysis_result

            elif avg_abs_hieq_lg > avg_abs_tieq_lg + epsilon:
                # Humidity is the clear priority (the old, incorrect logic)
                analysis_result.update({
                    'primary_reason': "This was a sophisticated 'heating to dehumidify' strategy, a necessary trade-off because it was too cold to use ventilation.",
                    'causal_driver_type': "Causal Necessity (Trade-off)",
                    'supporting_evidence': { '...': '...' } # Keep old evidence text
                })
                # Calculate observed effect for humidity
                h_col = self.node_to_column.get('H')
                start_val, end_val = window_data[h_col].iloc[0], window_data[h_col].iloc[-1]
                analysis_result['observed_effects']['summary'] = f"The strategy worked: relative humidity was reduced by {start_val - end_val:.1f}%."
                return analysis_result

        # --- STRATEGY 3: Defensive Cooling (uQc) ---
        elif control_action_node == 'uQc':
            tieq_lg_col = self.node_to_column.get('T_ieq')
            hieq_lg_col = self.node_to_column.get('H_ieq')
            tout_col = self.node_to_column.get('Tout')
            t_in_col = self.node_to_column.get('T')
            h_in_col = self.node_to_column.get('H')
            qrad_col = self.node_to_column.get('Qrad')
            cool_ref_col = self.node_to_column.get('uQc')  # Get the actual cooling column
            primary_goal_identified = True
            
            # *** CRITICAL CHECK: Verify cooling was actually active ***
            avg_cool_ref = window_data[cool_ref_col].abs().mean()
            max_cool_ref = window_data[cool_ref_col].abs().max()
            cooling_active_threshold = 1e-6  # Realistic threshold for active cooling
            
            print(f"DEBUG (uQc Range Analysis): avg_cool_ref={avg_cool_ref:.2E}, max_cool_ref={max_cool_ref:.2E}, threshold={cooling_active_threshold:.2E}")
            
            if avg_cool_ref < cooling_active_threshold and max_cool_ref < cooling_active_threshold:
                # Cooling was NOT actually active
                analysis_result.update({
                    'primary_reason': "Active cooling was not used during this period.",
                    'causal_driver_type': "No Action Taken (Zero Cooling)",
                    'supporting_evidence': {
                        'Actual Cooling': f"The cooling system was essentially inactive (avg: {avg_cool_ref:.2E}, max: {max_cool_ref:.2E}).",
                        'Alternative Strategy': "The controller may have used ventilation or passive cooling strategies instead."
                    }
                })
                # Check if ventilation was used instead
                vent_ref_col = self.node_to_column.get('uV')
                if vent_ref_col in window_data.columns:
                    avg_vent = window_data[vent_ref_col].mean()
                    analysis_result['observed_effects']['summary'] = f"Instead of active cooling, the controller likely used ventilation (avg: {avg_vent:.2E}) as a more economical cooling method."
                else:
                    analysis_result['observed_effects']['summary'] = "No significant cooling activity was detected. The controller used alternative passive strategies."
                return analysis_result
            
            # 1. Calculate the average "effort" for each potential problem.
            avg_abs_tieq_lg = window_data[tieq_lg_col].abs().mean()
            avg_abs_hieq_lg = window_data[hieq_lg_col].abs().mean()
            
            print(f"DEBUG (Aggregate uQc V3): Avg Temp Effort: {avg_abs_tieq_lg:.2E}, Avg Hum Effort: {avg_abs_hieq_lg:.2E}")

            # 2. Compare the efforts to find the true priority.
            if avg_abs_hieq_lg > avg_abs_tieq_lg * 1.5: # Humidity effort must be significantly greater
                # This is the "Dehumidification Priority" scenario (your old answer).
                analysis_result.update({
                    'primary_reason': "This was a sustained strategy to manage high humidity and mitigate disease risk.",
                    'causal_driver_type': "Proactive Health Management",
                    'supporting_evidence': {
                        'Optimizer\'s Objective': f"The Humidity Lagrangian was consistently positive (avg abs value: {avg_abs_hieq_lg:.2E}), proving the controller's primary goal was to reduce humidity. This was a stronger signal than the temperature constraint (avg abs value: {avg_abs_tieq_lg:.2E})."
                    }
                })
                # Calculate observed effect for humidity
                start_val, end_val = window_data[h_in_col].iloc[0], window_data[h_in_col].iloc[-1]
                change = end_val - start_val
                analysis_result['observed_effects']['summary'] = f"The strategy was successful. Over the period, relative humidity was reduced by {abs(change):.1f}% (from {start_val:.1f}% to {end_val:.1f}%)."
                return analysis_result
            else:
                # This is the "Temperature Priority" scenario (the system's old answer).
                analysis_result.update({
                    'primary_reason': "This was a sustained defensive cooling strategy to protect the crop from overheating.",
                    'causal_driver_type': "Defensive Action (Constraint Avoidance)",
                    'supporting_evidence': {
                        'Environmental Pressure': f"This action was necessary to combat a heavy and continuous heat load, primarily from intense solar radiation (average: {window_data[qrad_col].mean():.2f} W/m²)."
                    }
                })
                # Add the causal necessity as supporting evidence
                avg_tout = window_data[tout_col].mean()
                avg_t_in = window_data[t_in_col].mean()
                if avg_tout > avg_t_in:
                    analysis_result['supporting_evidence']['Alternative Action Avoided'] = f"Crucially, active cooling was necessary because ventilation was not a viable option, as the outside air (average: {avg_tout:.2f}°C) was warmer than the inside air (average: {avg_t_in:.2f}°C)."
                
                # Calculate observed effect for temperature
                max_temp = window_data[t_in_col].max()
                max_ref = window_data[self.node_to_column.get('T_ref')].max()
                analysis_result['observed_effects']['summary'] = f"The strategy was effective. The cooling system successfully capped the internal temperature at a peak of {max_temp:.2f}°C, preventing it from significantly exceeding the maximum setpoint of {max_ref:.2f}°C."
                return analysis_result
                
        # --- STRATEGY 4: Strategic Ventilation (uV) ---
        elif control_action_node == 'uV':
            hieq_lg_col = self.node_to_column.get('H_ieq')
            tieq_lg_col = self.node_to_column.get('T_ieq')
            ceq_lg_col = self.node_to_column.get('C_eq') 
            c_in_col = self.node_to_column.get('C')
            c_out_col = self.node_to_column.get('Cout')
            tout_col = self.node_to_column.get('Tout')
            h_cov_col = self.node_to_column.get('H_cov') # Condensation flux
            hout_col = self.node_to_column.get('Hout')
            vent_ref_col = self.node_to_column.get('uV')  # Get the actual ventilation column
            primary_goal_identified = True
            
            # *** CRITICAL CHECK: Verify ventilation was actually active ***
            avg_vent_ref = window_data[vent_ref_col].abs().mean()
            max_vent_ref = window_data[vent_ref_col].abs().max()
            vent_active_threshold = 1e-6  # Realistic threshold for active ventilation
            
            print(f"DEBUG (uV Range Analysis): avg_vent_ref={avg_vent_ref:.2E}, max_vent_ref={max_vent_ref:.2E}, threshold={vent_active_threshold:.2E}")
            
            if avg_vent_ref < vent_active_threshold and max_vent_ref < vent_active_threshold:
                # Ventilation was NOT actually active
                analysis_result.update({
                    'primary_reason': "Ventilation was not significantly activated during this period.",
                    'causal_driver_type': "No Action Taken (Minimal Ventilation)",
                    'supporting_evidence': {
                        'Actual Ventilation': f"The ventilation system was essentially closed (avg: {avg_vent_ref:.2E}, max: {max_vent_ref:.2E}).",
                        'Possible Reasons': "This could be due to unfavorable outside conditions (too hot, too humid, or too cold), or no need for ventilation."
                    }
                })
                analysis_result['observed_effects']['summary'] = "No significant ventilation activity was detected. The greenhouse remained mostly closed during this period."
                return analysis_result

            avg_hieq_lg = window_data[hieq_lg_col].mean()
            avg_tieq_lg = window_data[tieq_lg_col].mean()
            avg_ceq_lg = window_data[ceq_lg_col].mean()
            
            print(f"DEBUG (uV Lagrangian Values): avg_hieq_lg={avg_hieq_lg:.2E}, avg_tieq_lg={avg_tieq_lg:.2E}, avg_ceq_lg={avg_ceq_lg:.2E}")
            
            # *** HIGHEST PRIORITY: CO2 PURGE (check this FIRST before humidity/temperature) ***
            if pd.notna(avg_ceq_lg) and abs(avg_ceq_lg) > 0.05:
                primary_goal_identified = True
                avg_c_in = window_data[c_in_col].mean()
                avg_c_out = window_data[c_out_col].mean()
                if avg_c_out < avg_c_in:
                    analysis_result.update({
                        'primary_reason': "The ventilation bursts were a targeted action to rapidly purge high levels of CO₂ remaining from daytime and align with the lower nighttime setpoint.",
                        'causal_driver_type': "Setpoint Tracking (State Purge)",
                        'supporting_evidence': {
                            'Optimizer\'s Objective': f"The CO₂ Equality Lagrangian was highly active (avg: {avg_ceq_lg:.2E}), proving the controller was working to reduce the indoor CO₂ level.",
                            'Favorable Conditions': f"Ventilation was used because the outside air had a much lower CO₂ concentration (avg: {avg_c_out:.1f} ppm) than the inside air (avg: {avg_c_in:.1f} ppm)."
                        }
                    })
                    start_val, end_val = window_data[c_in_col].iloc[0], window_data[c_in_col].iloc[-1]
                    analysis_result['observed_effects']['summary'] = f"The strategy was successful, reducing the indoor CO₂ concentration by {start_val - end_val:.1f} ppm (from {start_val:.1f} to {end_val:.1f})."
                    return analysis_result
            
            # PRIORITY 2: Dehumidification
            if all(col and col in window_data.columns for col in [hieq_lg_col, tout_col, h_cov_col, hout_col]):
                # 1. Was the primary goal to dehumidify?
                avg_hieq_lg = window_data[hieq_lg_col].mean()
                is_goal_dehumidify = avg_hieq_lg > self.config['lagrangian_active_threshold']['H_ieq']

                # 2. Was the passive alternative (condensation) weak? This is the key "necessity" check.
                # A warm night means low temperature difference, hence weak condensation.
                avg_tout = window_data[tout_col].mean()
                # Define a threshold for a "warm night" where condensation is ineffective
                warm_night_threshold = 15.0 # Example: if outside temp is above 15°C
                is_condensation_weak = avg_tout > warm_night_threshold

                # 3. Was ventilation a viable tool? (Dry outside air)
                avg_hout = window_data[hout_col].mean()
                avg_h_in = window_data[self.node_to_column.get('H')].mean()
                is_ventilation_viable = avg_hout < avg_h_in

                print(f"DEBUG (Causal Necessity Check): Goal Dehumidify? {is_goal_dehumidify}, Condensation Weak? {is_condensation_weak}, Vent Viable? {is_ventilation_viable}")

                if is_goal_dehumidify and is_condensation_weak and is_ventilation_viable:
                    primary_goal_identified = True
                    analysis_result.update({
                        'primary_reason': "This was a necessary and more aggressive health management strategy driven by the warm nighttime temperatures.",
                        'causal_driver_type': "Causal Necessity (Alternative Unavailable)",
                        'supporting_evidence': {
                            'The Critical Goal': f"The controller's primary objective was to lower humidity for crop health, proven by the consistently positive Humidity Lagrangian (avg: {avg_hieq_lg:.2E}).",
                            'The Physical Necessity': f"On a cold night, the system uses 'free' passive condensation. However, because it was a warm night (avg Tout: {avg_tout:.1f}°C), this natural process was too slow and weak to be effective.",
                            'The Chosen Strategy': "Therefore, the controller correctly chose a more powerful tool: active ventilation, using the dry outside air (avg Hout: {avg_hout:.1f}%) to forcefully purge the humid indoor air and guarantee crop safety."
                        }
                    })
                    
                    # You can add the observed effect calculation here as well
                    h_in_col = self.node_to_column.get('H')
                    start_val, end_val = window_data[h_in_col].iloc[0], window_data[h_in_col].iloc[-1]
                    change = end_val - start_val
                    analysis_result['observed_effects']['summary'] = f"The strategy was highly effective, successfully forcing the relative humidity down by {abs(change):.1f}% (from {start_val:.1f}% to {end_val:.1f}%)."
                    
                    return analysis_result

            # PRIORITY 3: Dehumidification (Health) - lower priority than CO2 purge
            elif avg_hieq_lg > 1e-9:
                analysis_result.update({
                    'primary_reason': "This was a sustained strategy to manage high humidity levels and reduce disease risk.",
                    'causal_driver_type': "Proactive Health Management"
                })
                h_in_col, h_out_col = self.node_to_column.get('H'), self.node_to_column.get('Hout')
                avg_h_in, avg_h_out = window_data[h_in_col].mean(), window_data[h_out_col].mean()
                analysis_result['supporting_evidence'] = {
                    'Optimizer\'s Objective': f"The Humidity Lagrangian was consistently positive (avg: {avg_hieq_lg:.2E}), proving the goal was to reduce humidity.",
                    'Favorable Conditions': f"Ventilation was effective because the outside air (avg: {avg_h_out:.1f}%) was consistently drier than the inside air (avg: {avg_h_in:.1f}%)."
                }
                start_val, end_val = window_data[h_in_col].iloc[0], window_data[h_in_col].iloc[-1]
                analysis_result['observed_effects']['summary'] = f"The ventilation successfully managed humidity, changing it by {end_val - start_val:.1f}% (from {start_val:.1f}% to {end_val:.1f}%)."
                return analysis_result

            # PRIORITY 3: Low-Cost Cooling (Economic)
            elif avg_tieq_lg > 1e-9:
                analysis_result.update({
                    'primary_reason': "The controller used ventilation as a sustained, energy-efficient strategy to cool the greenhouse.",
                    'causal_driver_type': "Economic Cooling"
                })
                t_in_col, t_out_col = self.node_to_column.get('T'), self.node_to_column.get('Tout')
                avg_t_in, avg_t_out = window_data[t_in_col].mean(), window_data[t_out_col].mean()
                analysis_result['supporting_evidence'] = {
                    'Optimizer\'s Objective': f"The Temperature Lagrangian was consistently positive (avg: {avg_tieq_lg:.2E}), indicating a continuous need for cooling.",
                    'Economic Choice': f"Ventilation was used because the outside air (avg: {avg_t_out:.2f}°C) was cooler than inside (avg: {avg_t_in:.2f}°C), making it the most cost-effective method."
                }
                start_val, end_val = window_data[t_in_col].iloc[0], window_data[t_in_col].iloc[-1]
                analysis_result['observed_effects']['summary'] = f"The strategy successfully slowed the rate of temperature increase, with the final temperature changing by {end_val - start_val:.2f}°C over the period."
                return analysis_result

            # --- 3. ANALYZE SIDE-EFFECT MANAGEMENT ---
        # The user explicitly asked about the impact on temperature.
        if primary_goal_identified and other_nodes:
            side_effect_summary = ""
            for node in other_nodes:
                # Check if this node is a state variable and not the primary one we already analyzed
                if node in self.base_state_variables and node != 'C': # Example for the CO2 purge case
                    col = self.node_to_column.get(node)
                    if col and col in window_data.columns:
                        change = window_data[col].iloc[-1] - window_data[col].iloc[0]
                        node_name = self.node_to_name.get(node, node)
                        side_effect_summary += (
                            f"The impact on {node_name} was minimal, with a total change of only {change:+.2f} units. "
                        )
            
            if side_effect_summary:
                analysis_result['supporting_evidence']['Side-Effect Management'] = side_effect_summary.strip()
        return analysis_result

    def _generate_mpc_reasoning_summary(self, mpc_reasoning: Dict) -> str:
        """Generate a human-readable summary of MPC reasoning."""
        summary_parts = []
        
        if mpc_reasoning['predicted_violations']:
            violations_count = len(mpc_reasoning['predicted_violations'])
            earliest_violation = min(
                v['earliest_violation_time'] for v in mpc_reasoning['predicted_violations']
            )
            hours_until_violation = (earliest_violation - mpc_reasoning['action_timestamp']).total_seconds() / 3600
            
            summary_parts.append(
                f"MPC predicted {violations_count} constraint violation(s) "
                f"would occur in {hours_until_violation:.1f} hours without intervention"
            )
        
        if mpc_reasoning['disturbance_impacts']:
            disturbance_count = len(mpc_reasoning['disturbance_impacts'])
            summary_parts.append(
                f"Anticipated impacts from {disturbance_count} disturbance trend(s)"
            )
        
        if mpc_reasoning['optimization_factors']:
            tracking_conflicts = len(mpc_reasoning['optimization_factors'])
            summary_parts.append(
                f"Balanced reference tracking with {tracking_conflicts} constraint consideration(s)"
            )
        
        if not summary_parts:
            return "Could not identify clear predictive reasoning for this MPC action"
        
        return "MPC took preventive action because: " + "; ".join(summary_parts)


    def analyze_control_action_event(self, control_action_node: str, timestamp: datetime, original_query: str, ablation_mode: AblationMode = AblationMode.FULL_HCA) -> Dict[str, Any]:
        """
        (V2 - EVIDENCE SCORING ENGINE)
        Analyzes 'why' a control action was taken by gathering hypotheses from multiple expert
        analysis modules and selecting the one with the highest confidence score.
        """
        print(f"DEBUG - Evidence Scoring Engine: Analyzing '{control_action_node}' at {timestamp} with mode {ablation_mode.name}.")

        try:
            action_ts_actual = self.data.iloc[self.data.index.get_indexer([timestamp], method='nearest')[0]].name
        except Exception as e:
            return {'error': f"Could not find data at or near timestamp {timestamp}: {e}"}

        hypotheses = []
        
        if ablation_mode == AblationMode.FULL_HCA:
            inaction_hypothesis = self._check_for_inaction_due_to_conflict(control_action_node, action_ts_actual,original_query)
            if inaction_hypothesis:
                hypotheses.append(inaction_hypothesis)

        if ablation_mode == AblationMode.FULL_HCA:
            override_hypothesis = self._check_for_defensive_override(control_action_node, action_ts_actual)
            if override_hypothesis:
                hypotheses.append(override_hypothesis)

        if ablation_mode == AblationMode.FULL_HCA:
            sunrise_kickstart_hypothesis = self._check_for_sunrise_kickstart_strategy(action_ts_actual)
            if sunrise_kickstart_hypothesis:
                hypotheses.append(sunrise_kickstart_hypothesis)

        # --- Step 1: Gather Hypotheses from All Relevant Expert Modules based on Ablation Mode ---
        # Each function is now a "_v2" version that returns a dictionary with a 'confidence' score.
        kickstart_hypothesis = self._check_for_morning_kickstart_v2(action_ts_actual)
        if kickstart_hypothesis:
            hypotheses.append(kickstart_hypothesis)

        if ablation_mode == AblationMode.FULL_HCA:
            flawed_strategy_hypothesis = self._check_for_flawed_strategy_v2(action_ts_actual)
            if flawed_strategy_hypothesis:
                hypotheses.append(flawed_strategy_hypothesis)

        # Expert 0b: Free CO2 Maneuver (New!)
        free_co2_hypothesis = self._check_for_free_co2_maneuver_v2(action_ts_actual)
        if free_co2_hypothesis:
            hypotheses.append(free_co2_hypothesis)

        simultaneous_reaction_hypothesis = self._check_for_simultaneous_driver_reaction(control_action_node, action_ts_actual)
        if simultaneous_reaction_hypothesis:
            hypotheses.append(simultaneous_reaction_hypothesis)
        
        # Expert 1: Fault Diagnosis (Always runs, it's a safety check)
        fault_hypothesis = self._check_for_sensor_fault_event(action_ts_actual, control_action_node)
        if fault_hypothesis and 'confidence' in fault_hypothesis:
            hypotheses.append(fault_hypothesis)

        # Expert 2: System-Wide Event
        if ablation_mode == AblationMode.FULL_HCA:
            # Call the expert and check if it returned a valid hypothesis with a confidence score.
            system_event_hypothesis = self.analyze_system_event(action_ts_actual, []) # Assuming target_nodes isn't needed here
            if system_event_hypothesis and 'confidence' in system_event_hypothesis and system_event_hypothesis['confidence'] > 0:
                # Reformat the output to match the standard hypothesis structure
                hypotheses.append({
                    'confidence': system_event_hypothesis['confidence'],
                    'primary_reason': system_event_hypothesis['explanation'],
                    'causal_driver_type': system_event_hypothesis['event_type'],
                    'supporting_evidence': system_event_hypothesis['evidence']
                })

        # Expert 3: Optimizer's Direct Goal (KKT / Lagrangian analysis)
        if ablation_mode in [AblationMode.FULL_HCA, AblationMode.KKT_ONLY]:
            optimizer_goal_hypothesis = self._check_for_active_STATE_constraint_driver(control_action_node, action_ts_actual)
            if optimizer_goal_hypothesis and 'confidence' in optimizer_goal_hypothesis:
                hypotheses.append(optimizer_goal_hypothesis)

        # Expert 4: Direct Command (Physics / Setpoint analysis)
        if ablation_mode in [AblationMode.FULL_HCA, AblationMode.PHYSICS_ONLY]:
            setpoint_hypothesis = self._find_causal_setpoint_change(control_action_node, action_ts_actual,original_query)
            if setpoint_hypothesis and 'confidence' in setpoint_hypothesis:
                hypotheses.append(setpoint_hypothesis)

        # Expert 5: Causal Necessity (KKT / Trade-off analysis)
        if ablation_mode in [AblationMode.FULL_HCA, AblationMode.KKT_ONLY]:
            necessity_hypothesis = self._check_for_causal_necessity_driver(control_action_node, action_ts_actual)
            if necessity_hypothesis and 'confidence' in necessity_hypothesis:
                hypotheses.append(necessity_hypothesis)
                
        # Expert 6: Opportunistic Growth (KKT / Economic analysis)
        if ablation_mode in [AblationMode.FULL_HCA, AblationMode.KKT_ONLY]:
            opportunistic_hypothesis = self._check_for_opportunistic_driver(control_action_node, action_ts_actual)
            if opportunistic_hypothesis and 'confidence' in opportunistic_hypothesis:
                hypotheses.append(opportunistic_hypothesis)
                
        # Expert 7: Afternoon Strategy Shift (HCA - High-level strategy)
        if ablation_mode == AblationMode.FULL_HCA:
            afternoon_shift_hypothesis = self._check_for_afternoon_strategy_shift(action_ts_actual)
            if afternoon_shift_hypothesis and 'confidence' in afternoon_shift_hypothesis:
                hypotheses.append(afternoon_shift_hypothesis)

        # Expert 8: Proactive Health Management (HCA / KKT - a mix of physics and optimizer state)
        if ablation_mode in [AblationMode.FULL_HCA, AblationMode.KKT_ONLY]:
            proactive_health_hypothesis = self._check_for_proactive_health_driver(control_action_node, action_ts_actual)
            if proactive_health_hypothesis and 'confidence' in proactive_health_hypothesis:
                hypotheses.append(proactive_health_hypothesis)

        # Expert 9: Sunrise Opportunity (HCA - a first-principles, physics-based event)
        if ablation_mode == AblationMode.FULL_HCA:
            sunrise_hypothesis = self._check_for_sunrise_opportunity_driver(control_action_node, action_ts_actual)
            if sunrise_hypothesis and 'confidence' in sunrise_hypothesis:
                hypotheses.append(sunrise_hypothesis)

        if ablation_mode == AblationMode.FULL_HCA:
            opportunity_hypothesis = self._check_for_causal_opportunity_driver(control_action_node, action_ts_actual)
            if opportunity_hypothesis:
                hypotheses.append(opportunity_hypothesis)

        # Expert 10: Historical Causality (PCMCI analysis)
        if ablation_mode in [AblationMode.FULL_HCA, AblationMode.CAUSAL_ONLY]:
            causal_hypothesis = self._find_preceding_causal_event(control_action_node, action_ts_actual)
            if causal_hypothesis and 'confidence' in causal_hypothesis:
                hypotheses.append(causal_hypothesis)
        
        # --- Step 2: Select the Best Hypothesis ---
        if not hypotheses:
            # Fallback if no expert found a pattern
            best_hypothesis = {
                'primary_reason': "The specific trigger for this action could not be definitively determined from the available patterns for the current analysis mode.",
                'causal_driver_type': "General System Response",
                'supporting_evidence': {},
                'confidence': 0.0 # Assign a zero confidence score
            }
        else:
            # The core logic: find the hypothesis with the highest confidence score
            best_hypothesis = max(hypotheses, key=lambda x: x['confidence'])

        print("\n==================== EVIDENCE SCOREBOARD ====================")
        for h in sorted(hypotheses, key=lambda x: x.get('confidence', 0), reverse=True):
            print(f"  - Score: {h.get('confidence', 0):.1f} | Type: {h.get('causal_driver_type', 'N/A')} | Reason: {h.get('primary_reason', 'N/A')}")
        print(f"\nWINNER: {best_hypothesis.get('causal_driver_type', 'N/A')} with score {best_hypothesis.get('confidence', 0):.1f}")
        print("===========================================================\n")

        # --- Step 3: Assemble the Final Analysis Dictionary ---
        # The winning hypothesis already contains the core explanation.
        analysis_result = {
            'control_action_node': control_action_node,
            'action_name': self.node_to_name.get(control_action_node, control_action_node),
            'timestamp': action_ts_actual,
            'primary_reason': best_hypothesis.get('primary_reason'),
            'causal_driver_type': best_hypothesis.get('causal_driver_type'),
            'supporting_evidence': best_hypothesis.get('supporting_evidence', {})
        }

        # This provides richness without being the primary cause.
        analysis_result['supporting_reasons'] = self._get_deeper_context_for_action(control_action_node, action_ts_actual)
        control_saturation_reason = self._check_for_active_CONTROL_constraint_driver(control_action_node, action_ts_actual)
        if control_saturation_reason:
            analysis_result['supporting_reasons']['control_saturation'] = control_saturation_reason['primary_reason']

        # Add observed effects
        analysis_result['observed_effects'] = self._analyze_observed_effects(control_action_node, action_ts_actual)

        # Final Output
        print("\n==================== FINAL ANALYSIS RESULT (before formatting) ====================")
        import json
        print(json.dumps(analysis_result, indent=2, default=str))
        print("==================================================================================")

        return analysis_result


    def _check_for_midnight_reset_anomaly(self, timestamp: datetime) -> Dict[str, Any] | None:
        """
        (FINAL V5 - STATE-BASED EXPERT)
        Looks for the specific signature of a data artifact/sensor reset that
        often occurs at the beginning of a new day (around midnight). This version
        checks the state of the system, not the change, which is more robust
        for initialization artifacts.
        """
        # This expert is only relevant for events at the very start of the day.
        if not (timestamp.hour == 0 and timestamp.minute <= 10):
            return None

        print("DEBUG (Midnight Artifact Expert V6): Checking for reset signature...")

        try:
            # Define a small window STARTING from the user's time
            window_start = self.find_nearest_timestamp(timestamp)
            window_end = window_start + timedelta(minutes=15) # Look 15 mins ahead
            window_data = self.data.loc[window_start:window_end]

            if len(window_data) < 2: return None
        except Exception as e:
            print(f"DEBUG (Midnight Artifact Expert V6): Could not get data window. Error: {e}")
            return None

        # --- THE ROBUST SIGNATURE ---
        hum_ref_col = self.node_to_column.get('H_ref')
        if not hum_ref_col in window_data.columns: return None

        # 1. Find the lowest point and the highest point within this small window
        min_val_in_window = window_data[hum_ref_col].min()
        max_val_in_window = window_data[hum_ref_col].max()
        
        # Get the timestamps of these extreme points
        ts_of_min = window_data[hum_ref_col].idxmin()
        ts_of_max = window_data[hum_ref_col].idxmax()

        # Evidence 1: Was there an impossibly low value followed by a large jump?
        e1_impossible_state = min_val_in_window < 20.0
        e2_large_jump = (max_val_in_window - min_val_in_window) > 50.0
        # Ensure the jump happened in the correct order (min before max)
        e3_correct_order = ts_of_min < ts_of_max

        # 2. Get the evidence from the moment the JUMP COMPLETED (at the max value)
        data_at_jump_end = self.data.loc[ts_of_max]
        heq_lg_col = self.node_to_column.get('H_eq')
        vent_ref_col = self.node_to_column.get('uV')
        
        heq_lg_val = data_at_jump_end.get(heq_lg_col)
        vent_val = data_at_jump_end.get(vent_ref_col)

        e4_optimizer_shock = pd.notna(heq_lg_val) and abs(heq_lg_val) > 10.0
        
        print(f"DEBUG (Midnight Artifact V6): Impossible Low State? {e1_impossible_state} (min_val:{min_val_in_window:.1f}), Large Jump? {e2_large_jump}, Correct Order? {e3_correct_order}, Shock? {e4_optimizer_shock} (val:{heq_lg_val:.2E})")

        # The definitive signature is an impossible low, a large jump, and the optimizer's shock.
        if e1_impossible_state and e2_large_jump and e3_correct_order and e4_optimizer_shock:
            print("SUCCESS (Midnight Artifact Expert V6): Found definitive signature.")
            return {
                'confidence': 10.0,
                'fault_type': "System Initialization Anomaly / Data Artifact",
                'explanation': "You are correct, this was not a real climate event but a data artifact caused by a system state reset at the start of the new day.",
                'evidence': {
                    "Root Cause (Data Artifact)": f"The humidity reference (`relhum_ref`) registered a physically implausible starting value of {min_val_in_window:.1f}% at {ts_of_min.strftime('%H:%M')}, before instantly jumping to {max_val_in_window:.1f}%. This confirms it is not a real measurement.",
                    "Optimizer's 'Shock' (Proof)": f"The system's internal model registered this as a massive error, proven by the huge spike in the Humidity Equality Lagrangian (`Heq_lg`) to {heq_lg_val:.2E} at the moment the jump completed.",
                    "System's Reflexive Action": f"The controller's immediate response was to trigger a strong ventilation burst (`Vent_ref` spiked to {vent_val:.2f}) as a predictable action to try and re-stabilize the system after receiving nonsensical data."
                }
            }
        
        print("DEBUG (Midnight Artifact Expert V6): Signature not found.")
        return None

    def format_fault_for_llm(self, analysis_result: Dict[str, Any]) -> str:
        """
        (REFINED) Formats the system fault analysis into a string for the LLM,
        with intelligent number formatting to improve clarity.
        """
        if 'error' in analysis_result:
            return f"Error during fault analysis: {analysis_result['error']}"

        # --- Helper function for smart number formatting ---
        def format_value(value):
            if not isinstance(value, (int, float)):
                return value
            if abs(value) > 1000 or (abs(value) > 0 and abs(value) < 0.01):
                # Use scientific notation for very large or very small numbers
                return f"{value:.2E}"
            else:
                # Use standard decimal formatting for "normal" range numbers
                return f"{value:.2f}"
        # --- End of helper function ---

        context = [
            f"--- Diagnostic Report: {analysis_result['fault_type']} ---",
            f"\n**Diagnosis:** {analysis_result['explanation']}",
        ]
        
        evidence = analysis_result.get('evidence', {})
        if evidence:
            context.append("\n**Causal Chain of Evidence:**")
            
            # The order of evidence is important for telling a clear story
            evidence_order = ["Root Cause (Sensor Data)", "Optimizer Reaction (System Shock)", "System Action (Defensive Purge)"]
            
            for key in evidence_order:
                if key in evidence:
                    # --- REFINED LOGIC IS HERE ---
                    # We now process the evidence string to find and format numbers
                    original_text = evidence[key]
                    
                    # Find all numbers in the string
                    numbers_found = re.findall(r"[-+]?\d*\.\d+|\d+", original_text)
                    
                    # Replace each found number with its formatted version
                    formatted_text = original_text
                    if numbers_found:
                        # We process in reverse to avoid index issues if numbers have same prefix
                        for num_str in sorted(numbers_found, key=len, reverse=True):
                            try:
                                num_val = float(num_str)
                                formatted_num = format_value(num_val)
                                # Replace the original number string with the formatted one
                                formatted_text = formatted_text.replace(num_str, formatted_num, 1)
                            except ValueError:
                                continue # Not a valid float, skip it
                    
                    context.append(f"- **{key}:** {formatted_text}")
                    # --- END OF REFINED LOGIC ---
                
        return "\n".join(context)


    def get_effect_direction(self, control_node: str, state_node: str) -> Literal['increase', 'decrease', 'bidirectional', 'unknown']:
        """
        Determines the net effect a control action has on a state variable by traversing the KG.
        
        For example, get_effect_direction('uQh', 'H') will trace:
        uQh -> Q_heat (+) -> T (+) -> H (-) and return 'decrease'.
        
        Returns:
            'increase', 'decrease', 'bidirectional', or 'unknown'.
        """
        if not self.G.has_node(control_node) or not self.G.has_node(state_node):
            return 'unknown'

        # Use a Breadth-First Search (BFS) to find the shortest causal path
        try:
            # We only care about forward paths from the control to the state
            path = nx.shortest_path(self.G, source=control_node, target=state_node)
        except nx.NetworkXNoPath:
            return 'unknown'

        # --- Analyze the relationships along the path ---
        # We represent '+' as +1, '-' as -1. The net effect is the product.
        net_effect = 1 

        for i in range(len(path) - 1):
            u, v = path[i], path[i+1]
            edge_data = self.G.get_edge_data(u, v)
            relationship = edge_data.get('relationship')

            if relationship == '+':
                net_effect *= 1
            elif relationship == '-':
                net_effect *= -1
            elif relationship == '∩' or relationship == '+/-':
                # These are complex or bidirectional relationships. For simplicity in this context,
                # we can often treat them based on the most common operational use.
                # Example: Ventilation (uV) on Temperature (T) is usually for cooling.
                if control_node == 'uV' and state_node == 'T':
                    net_effect *= -1 # Assume cooling
                else:
                    return 'bidirectional' # The effect isn't consistently one-way
            
        # --- Convert the final numeric effect to a string descriptor ---
        if net_effect == 1:
            return 'increase'
        elif net_effect == -1:
            return 'decrease'
        else:
            return 'unknown'

    def _check_for_causal_necessity_driver(self, control_action_node: str, timestamp: datetime) -> Dict[str, Any] | None:
        """
        (COMPREHENSIVE HIERARCHY MODULE)
        Identifies situations of "causal necessity" where a control action was taken because
        cheaper, more obvious, or less conflicting alternatives were not viable. This captures
        complex trade-offs made by the MPC.
        """
        print(f"DEBUG (Causal Necessity Check): Checking for forced actions or trade-offs for '{control_action_node}'...")
        try:
            # Ensure the index is datetime type
            if not isinstance(self.data.index, pd.DatetimeIndex):
                print(f"WARNING: DataFrame index is not DatetimeIndex, it's {type(self.data.index)}")
                return None
            
            nearest_timestamp = self.find_nearest_timestamp(timestamp)
            if nearest_timestamp is None:
                return None
            data_row = self.data.loc[nearest_timestamp]
        except (IndexError, Exception) as e:
            print(f"Could not get data for timestamp {timestamp}: {e}")
            return None # Cannot get data for this timestamp

        # --- Get all relevant state, disturbance, and control values at once ---
        # States
        t_in = data_row.get(self.node_to_column.get('T'))
        h_in = data_row.get(self.node_to_column.get('H'))
        c_in = data_row.get(self.node_to_column.get('C'))
        # Disturbances
        t_out = data_row.get(self.node_to_column.get('Tout'))
        h_out = data_row.get(self.node_to_column.get('Hout'))
        q_rad = data_row.get(self.node_to_column.get('Qrad'))
        # Control Actions
        thresholds = self.config.get('control_active_threshold', {})
        uqc_threshold = thresholds.get('uQc', thresholds.get('default', 1E-6))
        uqh_threshold = thresholds.get('uQh', thresholds.get('default', 1E-6))
        uv_threshold = thresholds.get('uV', thresholds.get('default', 1E-6))
        uc_threshold = thresholds.get('uC', thresholds.get('default', 1E-6))
        is_uqc_active = data_row.get(self.node_to_column.get('uQc'), 0) > uqc_threshold
        is_uqh_active = data_row.get(self.node_to_column.get('uQh'), 0) > uqh_threshold
        is_uv_active = data_row.get(self.node_to_column.get('uV'), 0) > uv_threshold
        is_uc_active = data_row.get(self.node_to_column.get('uC'), 0) > uc_threshold

        # Get optimal ranges for checking side-effects
        t_optimal = self.constraint_limits.get('T', {}).get('optimal_range', [18, 26])
        h_optimal = self.constraint_limits.get('H', {}).get('optimal_range', [70, 85])
        c_optimal = self.constraint_limits.get('C', {}).get('optimal_range', [600, 1000])


        # ==============================================================================
        # --- SCENARIO 1: Analysis for Active Cooling (uQc) ---
        # Why was the EXPENSIVE cooling used instead of FREE ventilation?
        # ==============================================================================
        if control_action_node == 'uQc' and is_uqc_active and not is_uv_active:
            # Check 1: Would ventilation have made the TEMPERATURE worse?
            if pd.notna(t_in) and pd.notna(t_out) and t_out >= t_in:
                temp_difference = t_out - t_in
                confidence_score = 6.0 + min(2.0, temp_difference) # Base score of 7, bonus up to 2 points. Max 9.0
                return {
                    'confidence': confidence_score,
                    'primary_reason': "Energy-intensive cooling was used because the low-cost alternative (ventilation) would have been counter-productive.",
                    'causal_driver_type': "Causal Necessity (No Viable Alternative)",
                    'supporting_evidence': {
                        'Evidence': f"With outside temperatures ({t_out:.1f}°C) being warmer than the inside ({t_in:.1f}°C), opening the vents would have heated the greenhouse further. Therefore, active cooling was the only viable option to reduce temperature."
                    }
                }
            # Check 2: Would ventilation have made the HUMIDITY worse?
            if pd.notna(h_in) and pd.notna(h_out) and h_out >= h_in:
                hum_difference = h_out - h_in
                confidence_score = 7.0 + min(2.0, hum_difference / 10.0) # Base score 7, bonus for very humid outside air.
                return {
                    'confidence': confidence_score,
                    'primary_reason': "Active cooling was used for dehumidification because ventilation was not a viable option.",
                    'causal_driver_type': "Causal Necessity (No Viable Alternative)",
                    'supporting_evidence': {
                        'Evidence': f"With outside humidity ({h_out:.1f}%) being higher than inside ({h_in:.1f}%), opening the vents would have increased humidity. Active cooling was therefore necessary to remove moisture via condensation."
                    }
                }

        # ==============================================================================
        # --- SCENARIO 2: Analysis for Active Heating (uQh) ---
        # Why was heating used, possibly for a non-obvious reason like dehumidification?
        # ==============================================================================
        if control_action_node == 'uQh' and is_uqh_active:
            # Check: Was heating used to fight humidity when it was too cold to ventilate?
            # This is a common advanced strategy: "heating to dehumidify".
            is_humidity_high = pd.notna(h_in) and h_in > h_optimal[1]
            is_temp_low = pd.notna(t_in) and t_in < t_optimal[0]

            if is_humidity_high and is_temp_low and not is_uv_active:
                hum_severity = (h_in - h_optimal[1]) / 5.0 # How many % points over optimal max
                temp_severity = (t_optimal[0] - t_in) / 2.0 # How many °C below optimal min
                confidence_score = 6.0 + min(2.5, hum_severity + temp_severity) # Base 6, max 8.5
                return {
                    'confidence': confidence_score,
                    'primary_reason': "The heating system was activated as a dehumidification strategy, a necessary trade-off due to low temperatures.",
                    'causal_driver_type': "Causal Necessity (Trade-off)",
                    'supporting_evidence': {
                        'Evidence': f"Humidity was high ({h_in:.1f}%), but it was too cold ({t_in:.1f}°C) to use ventilation, which would lower the temperature further. Heating raises the air's capacity to hold moisture (lowering relative humidity) and was the only viable action to manage the humidity risk."
                    }
                }

        # ==============================================================================
        # --- SCENARIO 3: Analysis for CO2 Injection (uC) ---
        # Why was CO2 being WASTEFTULLY injected while vents were open?
        # ==============================================================================
        if control_action_node == 'uC' and is_uc_active and is_uv_active:
            # Check: Was it an aggressive growth strategy despite the waste?
            is_light_high_for_growth = pd.notna(q_rad) and q_rad > self.config['reasoning_thresholds'].get('Qrad_photosynthesis', 200)

            if is_light_high_for_growth:
                light_level = q_rad
                confidence_score = 5.0 + min(3.0, (light_level - 200) / 200)
                return {
                    'confidence': confidence_score,
                    'primary_reason': "CO₂ was injected despite open vents to aggressively capitalize on a high-yield growth opportunity.",
                    'causal_driver_type': "Causal Necessity (Opportunistic Trade-off)",
                    'supporting_evidence': {
                        'Evidence': f"The system detected ideal growth conditions due to high solar radiation ({q_rad:.1f} W/m²). The need for CO₂ to fuel photosynthesis was prioritized over the cost of losing some CO₂ through the open vents. The 'cost of inaction' (lost growth) was deemed greater than the cost of wasted CO₂."
                    }
                }

        # ==============================================================================
        # --- SCENARIO 4: Analysis for Ventilation (uV) ---
        # Why was ventilation used if it had a significant NEGATIVE side-effect?
        # ==============================================================================
        if control_action_node == 'uV' and is_uv_active:
            # Check 1: Was it used for cooling DESPITE harming CO2 levels?
            is_temp_high = pd.notna(t_in) and t_in > t_optimal[1]
            is_co2_level_lowered = pd.notna(c_in) and c_in < c_optimal[0]

            if is_temp_high and is_co2_level_lowered:
                temp_severity = t_in - t_optimal[1]
                confidence_score = 6.0 + min(2.0, temp_severity) # Base 6, max 8.0 for very high temps
                return {
                    'confidence': confidence_score,
                    'primary_reason': "Ventilation was activated to resolve a critical high-temperature issue, accepting a trade-off of lowering CO₂ levels.",
                    'causal_driver_type': "Causal Necessity (Priority Trade-off)",
                    'supporting_evidence': {
                        'Evidence': f"The controller prioritized reducing the high temperature ({t_in:.1f}°C) over maintaining optimal CO₂ concentration. Preventing heat stress on the crop was deemed more critical than the temporary drop in CO₂."
                    }
                }

            # Check 2: Was it used for dehumidification DESPITE harming temperature?
            is_humidity_high = pd.notna(h_in) and h_in > h_optimal[1]
            is_temp_level_lowered = pd.notna(t_in) and t_in < t_optimal[0]

            if is_humidity_high and is_temp_level_lowered:
                hum_severity = h_in - h_optimal[1]
                confidence_score = 6.0 + min(2.0, hum_severity / 5.0) # Base 6, max 8.0 for very high humidity
                return {
                    'confidence': confidence_score,
                    'primary_reason': "Ventilation was activated to resolve a critical high-humidity issue, accepting a trade-off of lowering the temperature.",
                    'causal_driver_type': "Causal Necessity (Priority Trade-off)",
                    'supporting_evidence': {
                        'Evidence': f"The controller prioritized reducing high humidity ({h_in:.1f}%) to prevent disease risk, even though this action caused the temperature ({t_in:.1f}°C) to drop below its optimal range."
                    }
                }

        # If none of the specific "necessity" or "trade-off" scenarios were met, return None.
        print(f"DEBUG (Causal Necessity Check): No specific forced action or trade-off scenario found for '{control_action_node}'.")
        return None
    
    def _check_for_predictive_violation_driver(self, control_action_node: str, timestamp: datetime) -> Dict[str, Any] | None:
        """
        Simulates the NMPC's predictive nature by projecting current trends into the future
        to see if a constraint violation was imminent.
        """
        print("DEBUG (Predictive Check): Looking for predicted future violations...")
        
        # How far ahead to look, in hours
        prediction_horizon_hours = self.config.get('prediction_horizon_hours', 4)
        # How far back to look to establish a trend
        trend_lookback_hours = 1

        try:
            # Get the data row at the time of the action
            nearest_timestamp = self.find_nearest_timestamp(timestamp)
            if nearest_timestamp is None:
                return None
            current_data_row = self.data.loc[nearest_timestamp]
            
            # Get the data slice to calculate the recent trend
            trend_start_time = timestamp - timedelta(hours=trend_lookback_hours)
            trend_slice = self.data.loc[trend_start_time:timestamp]
            
        except Exception as e:
            print(f"WARN (Predictive Check): Could not get data for trend calculation: {e}")
            return None

        states_affected = self.get_states_typically_affected_by_control(control_action_node)
        
        for state_node in states_affected:
            state_col = self.node_to_column.get(state_node)
            if not state_col or state_col not in trend_slice.columns: continue
            
            state_series = trend_slice[state_col].dropna()
            if len(state_series) < 2: continue
                
            # Calculate the recent trend (slope) in units per hour
            time_numeric = (state_series.index - state_series.index[0]).total_seconds()
            slope_per_sec, _ = np.polyfit(time_numeric, state_series, 1)
            slope_per_hour = slope_per_sec * 3600
            
            current_value = state_series.iloc[-1]
            
            # Project the trend into the future
            for hour_ahead in range(1, prediction_horizon_hours + 1):
                projected_value = current_value + (slope_per_hour * hour_ahead)
                
                # Check if this projected value violates any hard limits
                limits = self.constraint_limits.get(state_node, {})
                max_limit = limits.get('max')
                min_limit = limits.get('min')
                
                violation_found = False
                limit_type = ""
                if max_limit is not None and projected_value > max_limit:
                    violation_found = True
                    limit_type = "UPPER"
                elif min_limit is not None and projected_value < min_limit:
                    violation_found = True
                    limit_type = "LOWER"
                
                if violation_found:
                    var_name = self.node_to_name.get(state_node, state_node)
                    print(f"DEBUG (Predictive Check): SUCCESS! Found predicted violation for {var_name}.")
                    return {
                        'primary_reason': f"The controller acted proactively to prevent a predicted future violation of the {var_name}'s {limit_type} limit.",
                        'causal_driver_type': "Predictive Violation Avoidance",
                        'supporting_evidence': {
                            'Forecast': f"Based on the recent trend ({slope_per_hour:+.2f} units/hour), the system projected that {var_name} would exceed its {limit_type} limit within {hour_ahead} hour(s)."
                        }
                    }

        return None

    def _check_for_sunrise_opportunity_driver(self, control_action_node: str, timestamp: datetime) -> Dict[str, Any] | None:
        """
        (DEFINITIVE, PATTERN-BASED VERSION 6.0)
        Identifies a pre-emptive CO2 injection by searching the entire morning window for a
        sunrise-followed-by-spike pattern, making it robust to user timing inaccuracies
        and data anomalies.
        """
        from datetime import datetime
        
        if control_action_node != 'uC':
            return None

        print("DEBUG (Sunrise Opportunity Check): Searching for sunrise-injection pattern...")

        try:
            # Define the morning window based on the query timestamp's date.
            morning_start = datetime.combine(timestamp.date(), time(4, 0))
            morning_end = datetime.combine(timestamp.date(), time(8, 0))
            morning_data = self.data.loc[morning_start:morning_end]

            if len(morning_data) < 2: return None

        except (IndexError, KeyError):
            return None

        qrad_col = self.node_to_column.get('Qrad')
        co2_inj_col = self.node_to_column.get('uC')

        if not all(col in self.data.columns for col in [qrad_col, co2_inj_col]):
            return None

        # --- PATTERN SEARCH LOGIC ---

        # Find the exact timestamp of the sunrise within the morning window.
        qrad_series = morning_data[qrad_col]
        # Find where the series goes from <1 to >=1
        sunrise_events = qrad_series[(qrad_series >= 1) & (qrad_series.shift(1) < 1)]
        
        if sunrise_events.empty:
            print("DEBUG (Sunrise Opportunity Check): No sunrise event found in the morning window.")
            return None

        sunrise_timestamp = sunrise_events.index[0]
        qrad_at_sunrise = sunrise_events.iloc[0]
        print(f"DEBUG (Sunrise Opportunity Check): Found sunrise at {sunrise_timestamp.strftime('%H:%M')}.")

        # Now, check if the user's query time is close to this actual sunrise time.
        # This confirms the user is asking about the right event.
        time_difference_minutes = abs((timestamp - sunrise_timestamp).total_seconds()) / 60
        if time_difference_minutes > 60: # Allow a 1-hour tolerance
            print("DEBUG (Sunrise Opportunity Check): User query time is too far from actual sunrise time.")
            return None

        # Finally, search for the injection spike in the window AFTER the actual sunrise.
        window_minutes = self.config['reasoning_thresholds']['sunrise_spike_window_minutes']
        search_end_time = sunrise_timestamp + timedelta(minutes=window_minutes)
        injection_window = self.data.loc[sunrise_timestamp:search_end_time, co2_inj_col]

        if injection_window.empty: return None

        injection_at_sunrise = injection_window.iloc[0]
        max_injection_after = injection_window.iloc[1:].max() if len(injection_window) > 1 else 0

        is_injection_spike = (injection_at_sunrise < 0.01 and max_injection_after > 0.4)

        print(f"DEBUG (Sunrise Opportunity Check): Is Injection Spike after {sunrise_timestamp.strftime('%H:%M')}? {is_injection_spike}")

        if is_injection_spike:
            confidence_score = 8.7
            print("SUCCESS: Identified Sunrise Opportunity Scenario.")
            return {
                'confidence': confidence_score,
                'primary_reason': "The sharp spike in CO₂ injection was a pre-emptive action to maximize the benefit of the sunrise.",
                'causal_driver_type': "Predictive Opportunistic Action",
                'supporting_evidence': {
                    'Trigger Event': f"The controller's forecast identified the moment of sunrise (around {sunrise_timestamp.strftime('%H:%M')}), when solar radiation increased from near-zero to {qrad_at_sunrise:.2f} W/m².",
                    'Strategic Goal': f"It aggressively injected CO₂ shortly after to ensure the plants had the raw materials needed for photosynthesis the instant the first light became useful.",
                    'Economic Rationale': "This proactive strategy prevents wasting any valuable sunlight and maximizes the potential for biomass production for the day."
                }
            }
        
        return None
    

    def find_and_analyze_extreme_event(self, date: 'datetime.date', event_type: str) -> Dict[str, Any]:
        """
        (GENERALIZED) Finds the timestamp of a user-defined extreme event on a
        given day and then performs a detailed contextual analysis at that moment.
        """
        print(f"DEBUG - Searching for extreme event: '{event_type}' on {date}")
        
        # 1. Look up the "recipe" for this event from our config
        config = self.extreme_event_configs.get(event_type.lower())
        if not config:
            return {'error': f"I don't know how to find the event '{event_type}'. Please try 'maximum heat load', 'coldest moment', etc."}

        try:
            day_data = self.data[self.data.index.date == date].copy()
            if day_data.empty: return {'error': "No data for the specified date."}
        except Exception as e: return {'error': f"Error slicing data: {e}"}

        # 2. Prepare for calculation based on the recipe
        metric_name = config['metric_name']
        column_nodes = config['columns']
        operation = config['operation']
        find_method = config['find']
        # Support both single and multiple analysis variables
        analysis_variables = config.get('analysis_variables', [config.get('analysis_variable')])
        if not analysis_variables:
            analysis_variables = ['T']  # Default fallback
        unit = config.get('unit', '')

        # Convert node names to actual data column names
        data_cols = [self.node_to_column.get(node) for node in column_nodes]
        if any(col is None for col in data_cols):
            return {'error': "One or more data columns for this event analysis are missing."}

        # 3. Perform the calculation
        if operation == 'sum':
            day_data[metric_name] = day_data[data_cols].sum(axis=1)
        elif operation == 'identity':
            day_data[metric_name] = day_data[data_cols[0]]
        # (Could add 'diff', 'mean', etc. in the future)

        # 4. Find the extreme point using the specified method (idxmax or idxmin)
        if find_method == 'idxmax':
            event_timestamp = day_data[metric_name].idxmax()
            extreme_value = day_data[metric_name].max()
        elif find_method == 'idxmin':
            event_timestamp = day_data[metric_name].idxmin()
            extreme_value = day_data[metric_name].min()
        else:
            return {'error': "Invalid 'find' method in event configuration."}

        if not event_timestamp:
            return {'error': "Could not identify the moment of the event."}

        print(f"DEBUG - Found event '{event_type}' at {event_timestamp} with value {extreme_value:.2f}{unit}")

        # 5. Analyze the Context at that Specific Timestamp for all analysis variables
        detailed_contexts = {}
        for analysis_variable in analysis_variables:
            context_at_event = self.analyze_context_at_timestamp(
                variable_node=analysis_variable,
                query_term=self.node_to_name.get(analysis_variable, analysis_variable),
                date_str=event_timestamp.strftime('%Y-%m-%d'),
                time_str=event_timestamp.strftime('%H:%M')
            )
            if 'error' in context_at_event:
                return {'error': f"Error analyzing context for {analysis_variable} at event time: {context_at_event['error']}"}
            detailed_contexts[analysis_variable] = context_at_event

        # 6. Synthesize the Final Result
        variable_names = [self.node_to_name.get(var, var) for var in analysis_variables]
        analysis_result = {
            'event_timestamp': event_timestamp,  # Add this for plotting
            'conclusion': f"The moment of **{event_type}** occurred at **{event_timestamp.strftime('%H:%M')}**, reaching a value of **{extreme_value:.2f} {unit}**.",
            'evidence': {
                "Context at Event Time": f"Here is the detailed system state for the relevant variables ({', '.join(variable_names)}) at that moment:"
            },
            # Embed the full context analyses as a dictionary for the LLM
            'detailed_contexts': detailed_contexts
        }

        return analysis_result

    def _check_for_proactive_health_driver(self, control_action_node: str, timestamp: datetime) -> Dict[str, Any] | None:
        """
        (DEFINITIVE V5 - Intelligent Event-Finding)
        Uses intelligent event-finding to locate the actual ventilation burst even if the user's
        timestamp is imprecise. Searches the entire nighttime window, finds the real burst,
        analyzes it, and politely corrects the user's timing in the response.
        """
        from datetime import datetime
        if control_action_node != 'uV' or not (timestamp.hour < 5 or timestamp.hour > 22):
            return None

        print("DEBUG (Proactive Health Check V5): Intelligent search for nighttime dehumidification burst...")

        # Get column names
        vent_col = self.node_to_column.get('uV')
        hum_col = self.node_to_column.get('H')
        h_lg_col = self.node_to_column.get('H_ieq')
        
        if not all(col in self.data.columns for col in [vent_col, hum_col, h_lg_col]):
            return None

        try:
            # --- INTELLIGENT EVENT-FINDING LOGIC ---
            # Define a WIDE nighttime search window to catch bursts even if user timing is off
            # Search from evening of query day through early morning of next day
            if timestamp.hour < 12:
                # Query is in early morning (like 00:05) - search previous evening through this morning
                search_start = datetime.combine(timestamp.date(), time(22, 0)) - timedelta(days=1)
                search_end = datetime.combine(timestamp.date(), time(6, 0))
            else:
                # Query is in evening/night - search this evening through next morning
                search_start = datetime.combine(timestamp.date(), time(22, 0))
                search_end = datetime.combine(timestamp.date() + timedelta(days=1), time(6, 0))
            
            print(f"DEBUG (Proactive Health Check V5): Searching window: {search_start} to {search_end}")
            
            # Get data for the search window (handle case where data might not extend that far)
            available_start = max(search_start, self.data.index.min())
            available_end = min(search_end, self.data.index.max())
            search_window_data = self.data.loc[available_start:available_end]

            if len(search_window_data) < 2:
                print("DEBUG (Proactive Health Check V5): Insufficient data in search window.")
                return None
            
            # Find ALL significant ventilation activity (> 0.01 to avoid numerical noise)
            significant_vent = search_window_data[search_window_data[vent_col] > 0.01]
            
            if not significant_vent.empty:
                # Found ventilation activity! Find the one closest to user's query time
                closest_burst = min(significant_vent.index, key=lambda x: abs((x - timestamp).total_seconds()))
                time_difference = abs((closest_burst - timestamp).total_seconds() / 3600)  # hours
                
                print(f"DEBUG (Proactive Health Check V5): Found burst at {closest_burst} ({time_difference:.1f} hours from query time)")
                
                # Get conditions before the burst
                event_idx = self.data.index.get_loc(closest_burst)
                if event_idx > 0:
                    data_before_event = self.data.iloc[event_idx - 1]
                    
                    humidity_before = data_before_event.get(hum_col)
                    h_lg_before = data_before_event.get(h_lg_col)
                    
                    is_humidity_high_risk = humidity_before > self.config['reasoning_thresholds']['humidity_critical_threshold']
                    is_optimizer_concerned = abs(h_lg_before) > self.config['lagrangian_active_threshold']['H_ieq']
                    
                    print(f"DEBUG (Proactive Health Check V5): Humidity={humidity_before:.1f}% (risk>{self.config['reasoning_thresholds']['humidity_critical_threshold']}%), H_ieq={h_lg_before:.2E}")
                    print(f"DEBUG (Proactive Health Check V5): High risk? {is_humidity_high_risk}, Optimizer concerned? {is_optimizer_concerned}")
                    
                    if is_humidity_high_risk and is_optimizer_concerned:
                        # Construct a response that corrects the user's timing if necessary
                        if time_difference > 0.5:  # More than 30 minutes off
                            timing_correction = f"The ventilation burst you're asking about actually occurred at {closest_burst.strftime('%H:%M')} (not exactly at {timestamp.strftime('%H:%M')}), and it"
                        else:
                            timing_correction = f"The ventilation burst at approximately {closest_burst.strftime('%H:%M')}"
                        
                        print("SUCCESS: Identified Proactive Health Management Scenario.")
                        return {
                            'confidence': 9.9,
                            'primary_reason': f"{timing_correction} was a pre-emptive action to manage a high risk of crop disease.",
                            'causal_driver_type': "Proactive Health Management",
                            'supporting_evidence': {
                                'Critical Condition': f"Just before the burst, humidity was at a high-risk level of {humidity_before:.1f}%, increasing the danger of condensation and fungal diseases (Botrytis risk).",
                                'Optimizer\'s Objective': f"The controller's internal state shows the Humidity Lagrangian was active at {h_lg_before:.2E}, providing direct proof that preventing humidity from exceeding its maximum limit was the immediate trigger.",
                                'Strategic Action': "A sharp burst of ventilation was used as a cost-effective 'purge' to rapidly bring humidity down to a safer level, preventing disease without excessive energy use.",
                                'Nighttime Timing': "This action occurred during nighttime hours when high humidity combined with cooler temperatures creates ideal conditions for fungal disease development."
                            }
                        }
                    else:
                        print(f"DEBUG (Proactive Health Check V5): Conditions not met for proactive health (humidity or optimizer not concerned)")
            else:
                print("DEBUG (Proactive Health Check V5): No significant ventilation bursts found in nighttime window.")
                # FALLBACK: If no nighttime bursts, expand search to the entire day
                print("DEBUG (Proactive Health Check V5): Expanding search to entire day...")
                day_data = self.data.loc[timestamp.date().strftime('%Y-%m-%d')]
                day_significant_vent = day_data[day_data[vent_col] > 0.01]
                
                if not day_significant_vent.empty:
                    closest_burst = min(day_significant_vent.index, key=lambda x: abs((x - timestamp).total_seconds()))
                    time_difference = abs((closest_burst - timestamp).total_seconds() / 3600)
                    
                    print(f"DEBUG (Proactive Health Check V5): Found burst on same day at {closest_burst} ({time_difference:.1f} hours from query time)")
                    
                    event_idx = self.data.index.get_loc(closest_burst)
                    if event_idx > 0:
                        data_before_event = self.data.iloc[event_idx - 1]
                        humidity_before = data_before_event.get(hum_col)
                        h_lg_before = data_before_event.get(h_lg_col)
                        
                        is_humidity_high_risk = humidity_before > self.config['reasoning_thresholds']['humidity_critical_threshold']
                        is_optimizer_concerned = abs(h_lg_before) > self.config['lagrangian_active_threshold']['H_ieq']
                        
                        print(f"DEBUG (Proactive Health Check V5): Humidity={humidity_before:.1f}%, H_ieq={h_lg_before:.2E}")
                        
                        # Even if conditions don't perfectly match "proactive health", provide explanation if burst exists
                        if closest_burst.hour >= 22 or closest_burst.hour < 6:
                            timing_desc = "nighttime"
                        else:
                            timing_desc = "daytime"
                        
                        timing_correction = f"The ventilation burst you're asking about actually occurred at {closest_burst.strftime('%H:%M')} (not at {timestamp.strftime('%H:%M')}), and it"
                        
                        print("SUCCESS: Found ventilation burst on same day.")
                        return {
                            'confidence': 8.5 if (is_humidity_high_risk or is_optimizer_concerned) else 7.0,
                            'primary_reason': f"{timing_correction} was activated to manage greenhouse climate conditions.",
                            'causal_driver_type': "Proactive Health Management" if is_humidity_high_risk else "Climate Management",
                            'supporting_evidence': {
                                'Actual Time': f"The burst occurred during {timing_desc} hours at {closest_burst.strftime('%H:%M')}.",
                                'Humidity Level': f"Humidity before the burst was {humidity_before:.1f}%{' (high risk for disease)' if is_humidity_high_risk else ''}.",
                                'Controller State': f"The Humidity Lagrangian was at {h_lg_before:.2E}, indicating {'active humidity management' if is_optimizer_concerned else 'routine operation'}.",
                                'Purpose': "Ventilation is used to control humidity levels and prevent conditions favorable for fungal diseases like Botrytis."
                            }
                        }
                else:
                    print("DEBUG (Proactive Health Check V5): No ventilation bursts found on this day.")
                
        except Exception as e:
            print(f"ERROR in Proactive Health Check V5: {e}")
            import traceback
            traceback.print_exc()
        
        return None

    def _check_for_aggressive_growth_strategy(self, control_action_node: str, timestamp: datetime) -> Optional[Dict[str, Any]]:
        """
        Checks if CO2 injection is part of an aggressive growth strategy that prioritizes
        photosynthesis over energy efficiency, even at the cost of increased heating.
        """
        from datetime import datetime
        if control_action_node not in ['uC', 'CO2_inj_ref']:
            return None
            
        try:
            # Define analysis window (morning hours when growth strategy would be active)
            # Use the date from timestamp but check morning hours 6-12
            date = timestamp.date()
            morning_start = datetime.combine(date, datetime.min.time().replace(hour=6))
            morning_end = datetime.combine(date, datetime.min.time().replace(hour=12))
                
            window_data = self.data.loc[morning_start:morning_end]
            if window_data.empty:
                return None
                
            # Get relevant columns
            co2_inj_col = self.node_to_column.get('uC')
            heat_col = self.node_to_column.get('uQh')
            co2_ref_col = self.node_to_column.get('C')
            temp_ref_col = self.node_to_column.get('T')
            
            if not all(col and col in window_data.columns for col in [co2_inj_col, heat_col, co2_ref_col, temp_ref_col]):
                return None
                
            # Check for aggressive CO2 injection (high injection rates)
            co2_injection = window_data[co2_inj_col]
            max_injection = co2_injection.max()
            avg_injection = co2_injection.mean()
            
            # Check for concurrent heating activity
            heating = window_data[heat_col]
            avg_heating = heating.mean()
            
            # Check if CO2 levels are being maintained at high levels
            co2_levels = window_data[co2_ref_col]
            avg_co2 = co2_levels.mean()
            
            print(f"DEBUG (Aggressive Growth): Max CO2 inj: {max_injection:.6f}, Avg CO2 inj: {avg_injection:.6f}, Avg heating: {avg_heating:.6f}, Avg CO2: {avg_co2:.1f}")
            
            # Strategy indicators:
            # 1. High CO2 injection rates (significant non-zero injection)
            # 2. High CO2 levels maintained (elevated levels for growth)
            # 3. Morning timing (optimal for photosynthesis)
            
            is_aggressive_injection = max_injection > 0.1 or avg_injection > 0.01  # Significant injection activity
            has_high_co2 = avg_co2 > 600  # Elevated CO2 levels for enhanced photosynthesis
            
            print(f"DEBUG (Aggressive Growth): is_aggressive={is_aggressive_injection}, has_high_co2={has_high_co2}")
            
            if is_aggressive_injection and has_high_co2:
                return {
                    'primary_reason': "The system was executing an aggressive growth optimization strategy that prioritized maximizing photosynthesis over energy efficiency.",
                    'causal_driver_type': "Growth Optimization Strategy",
                    'supporting_evidence': {
                        "Strategy Priority": f"CO2 injection was highly active (max {max_injection:.3f}, average {avg_injection:.3f}) to maintain elevated CO2 levels (average {avg_co2:.0f} ppm) for optimal photosynthesis.",
                        "Growth Focus": "This represents a deliberate optimization choice favoring enhanced plant growth and yield through elevated CO2 concentrations during peak photosynthetic hours.",
                        "Energy Consideration": "The system was willing to expend energy on CO2 injection to create supra-optimal growing conditions, prioritizing long-term yield over immediate energy conservation."
                    }
                }
                
        except Exception as e:
            print(f"Error in aggressive growth strategy check: {e}")
            
        return None

    def _check_for_cooling_tradeoff_strategy(self, control_action_node: str, timestamp: datetime) -> Optional[Dict[str, Any]]:
        """
        Checks if active cooling is used instead of transpiration-based cooling through ventilation,
        particularly during high radiation periods when plant stress protection is prioritized.
        """
        if control_action_node not in ['uQc', 'cool_ref']:
            return None
            
        try:
            # Define analysis window (afternoon when cooling might be needed)
            afternoon_start = timestamp.replace(hour=12, minute=0, second=0)
            afternoon_end = timestamp.replace(hour=18, minute=0, second=0)
            
            if not (afternoon_start <= timestamp <= afternoon_end):
                return None  # Not afternoon hours
                
            window_data = self.data.loc[afternoon_start:afternoon_end]
            if window_data.empty:
                return None
                
            # Get relevant columns
            cool_col = self.node_to_column.get('uQc')
            vent_col = self.node_to_column.get('uV')
            qrad_col = self.node_to_column.get('Qrad')
            temp_col = self.node_to_column.get('T')
            
            if not all(col and col in window_data.columns for col in [cool_col, vent_col, qrad_col, temp_col]):
                return None
                
            # Check for high radiation conditions
            qrad_values = window_data[qrad_col]
            avg_qrad = qrad_values.mean()
            
            # Check for active cooling usage
            cooling_values = window_data[cool_col]
            max_cooling = cooling_values.max()
            avg_cooling = cooling_values.mean()
            
            # Check ventilation levels (should be low if avoiding transpiration)
            vent_values = window_data[vent_col]
            avg_vent = vent_values.mean()
            
            # Check temperature (should be elevated)
            temp_values = window_data[temp_col]
            avg_temp = temp_values.mean()
            
            print(f"DEBUG (Cooling Trade-off): Avg Qrad: {avg_qrad:.1f}, Max cooling: {max_cooling:.6f}, Avg cooling: {avg_cooling:.6f}, Avg vent: {avg_vent:.6f}, Avg temp: {avg_temp:.1f}")
            
            # Strategy indicators:
            # 1. High radiation (> 300 W/m² indicates potential plant stress)
            # 2. Active cooling being used (non-zero cooling)
            # 3. Relatively low ventilation (avoiding transpiration-based cooling)
            # 4. Elevated temperature
            
            is_high_radiation = avg_qrad > 300  # High radiation period
            is_active_cooling = max_cooling > 1e-6 or avg_cooling > 1e-7  # Active cooling engaged
            is_low_ventilation = avg_vent < 0.01  # Low ventilation to avoid transpiration
            is_high_temp = avg_temp > 25  # Elevated temperature requiring cooling
            
            print(f"DEBUG (Cooling Trade-off): high_rad={is_high_radiation}, active_cool={is_active_cooling}, low_vent={is_low_ventilation}, high_temp={is_high_temp}")
            
            if is_high_radiation and is_active_cooling and is_low_ventilation and is_high_temp:
                return {
                    'primary_reason': "The system chose active mechanical cooling over transpiration-based cooling through ventilation to protect plant health during high radiation stress.",
                    'causal_driver_type': "Cooling Mechanisms Trade-off",
                    'supporting_evidence': {
                        "Plant Protection Priority": f"During high radiation conditions (average {avg_qrad:.0f} W/m²), the system prioritized plant health over energy-efficient transpiration cooling.",
                        "Active Cooling Choice": f"Mechanical cooling was actively engaged (max {max_cooling:.4f}) while maintaining low ventilation (average {avg_vent:.4f}) to avoid excessive transpiration.",
                        "Risk Mitigation": f"At elevated temperatures (average {avg_temp:.1f}°C), the system chose controlled cooling to prevent heat stress rather than relying on potentially damaging evaporative cooling through open ventilation."
                    }
                }
                
        except Exception as e:
            print(f"Error in cooling trade-off check: {e}")
            
        return None

    def _check_for_defensive_override(self, control_action_node: str, timestamp: datetime) -> Dict[str, Any] | None:
        """
        (DEFINITIVE EXPERT - V7)
        Checks for a defensive override by finding the PEAK constraint activity in a window
        around the event. This is robust to data granularity and volatility.
        """
        print("DEBUG (Defensive Override Expert V7): Checking for constraint-driven overrides...")

        try:
            # Define a window to find the peak constraint activity
            window_start = timestamp - timedelta(minutes=15)
            window_end = timestamp + timedelta(minutes=15)
            window_data = self.data.loc[window_start:window_end]

            event_idx = self.data.index.get_loc(self.find_nearest_timestamp(timestamp))
            if event_idx == 0: return None
            data_before_event = self.data.iloc[event_idx - 1]
            
            if window_data.empty: return None

        except Exception as e:
            print(f"DEBUG (Defensive Override Expert V7): Failed to get data. Error: {e}")
            return None

        control_col = self.node_to_column.get(control_action_node)
        if not control_col: return None

        # --- SIGNATURE 1: Was a beneficial action active just BEFORE the event? ---
        action_before = data_before_event.get(control_col, 0)
        is_action_active_before = action_before > 0.1
        
        if not is_action_active_before:
            return None

        # --- SIGNATURE 2: Was a corresponding STATE CONSTRAINT the dominant problem IN THE WINDOW? ---
        states_affected = self.get_states_typically_affected_by_control(control_action_node)
        
        for state_node in states_affected:
            lg_node = self.lagrangian_nodes.get(state_node, {}).get('ieq')
            if not lg_node: continue

            lg_col = self.node_to_column.get(lg_node)
            if not lg_col or lg_col not in window_data.columns: continue
            
            # <<< THE DEFINITIVE FIX IS HERE >>>
            # Find the PEAK Lagrangian value in the window around the event.
            peak_lg_val_in_window = window_data[lg_col].max()
            base_threshold = self.config['lagrangian_active_threshold'].get(lg_node, 1e-7)

            # The condition is now simple: was the peak problem in the window significant?
            is_constraint_the_problem = pd.notna(peak_lg_val_in_window) and peak_lg_val_in_window > base_threshold
            # <<< END OF FIX >>>

            print(f"DEBUG (Defensive Override Expert V7): Checking state '{state_node}'. Peak problem in window significant? {is_constraint_the_problem} (peak_val: {peak_lg_val_in_window:.3E}, threshold: {base_threshold:.1E})")

            if is_constraint_the_problem:
                
                confidence_score = 9.8 
                var_name = self.node_to_name.get(state_node, state_node)

                print(f"SUCCESS (Defensive Override Expert V7): Found override for '{control_action_node}' due to significant {var_name} upper limit in the event window.")
                return {
                    'confidence': confidence_score,
                    'primary_reason': f"The controller executed a defensive override, sharply cutting the action to prevent the {var_name} from violating its critical upper safety limit.",
                    'causal_driver_type': "Defensive Override (Constraint Priority)",
                    'supporting_evidence': {
                        'The Conflict': f"Although conditions were ideal for continuing the action, the {var_name} had reached its maximum allowed level, forcing the system to intervene.",
                        'Optimizer\'s Command': f"The Lagrangian multiplier for {var_name} was highly active around this time (peaking at {peak_lg_val_in_window:.2E}), which is the optimizer's definitive command to stop or reduce any action that would cause a further increase.",
                        'Strategic Decision': "This demonstrates a key principle of the controller's strategy: safety and stability constraints always take priority over opportunistic or economic goals."
                    }
                }
                
        return None
    
    def _check_for_jittery_controller(self, control_action_node: str, timestamp: datetime) -> Optional[Dict[str, Any]]:
        """
        Checks for jittery controller behavior characterized by rapid, oscillatory adjustments
        due to conflicting objectives and high parameter sensitivity.
        """
        try:
            # Define analysis window around the timestamp
            window_start = timestamp - pd.Timedelta(hours=2)
            window_end = timestamp + pd.Timedelta(hours=2)
            
            window_data = self.data.loc[window_start:window_end]
            if window_data.empty or len(window_data) < 10:  # Need enough data points
                return None
                
            # Get the control action column
            control_col = self.node_to_column.get(control_action_node)
            if not control_col or control_col not in window_data.columns:
                return None
                
            # Analyze the control action for oscillatory behavior
            control_values = window_data[control_col]
            
            # Calculate rate of change (differences)
            control_changes = control_values.diff().abs()
            avg_change = control_changes.mean()
            max_change = control_changes.max()
            
            # Calculate variability (coefficient of variation)
            if control_values.std() > 0:
                cv = control_values.std() / abs(control_values.mean())
            else:
                cv = 0
                
            # Count significant changes (changes > threshold)
            threshold = control_values.std() * 0.5 if control_values.std() > 0 else 0.01
            significant_changes = (control_changes > threshold).sum()
            change_frequency = significant_changes / len(control_changes)
            
            print(f"DEBUG (Jittery Controller): Avg change: {avg_change:.6f}, Max change: {max_change:.6f}, CV: {cv:.3f}, Change freq: {change_frequency:.3f}")
            
            # Jittery behavior indicators:
            # 1. High frequency of significant changes (> 20% of time steps)
            # 2. High coefficient of variation (> 0.5)
            # 3. Multiple large changes
            
            is_high_frequency_changes = change_frequency > 0.2  # Changes in >20% of intervals
            is_high_variability = cv > 0.5  # High relative variability
            is_multiple_large_changes = significant_changes > 5  # Multiple significant adjustments
            
            print(f"DEBUG (Jittery Controller): high_freq={is_high_frequency_changes}, high_var={is_high_variability}, multiple_changes={is_multiple_large_changes}")
            
            if is_high_frequency_changes and (is_high_variability or is_multiple_large_changes):
                return {
                    'primary_reason': "The controller exhibited jittery behavior with rapid, oscillatory adjustments due to conflicting objectives and high parameter sensitivity.",
                    'causal_driver_type': "Jittery Controller",
                    'supporting_evidence': {
                        "Oscillatory Pattern": f"The control action showed frequent significant changes ({significant_changes} times) with high variability (CV: {cv:.2f}).",
                        "Parameter Sensitivity": f"Rapid adjustments (average change: {avg_change:.4f}, max change: {max_change:.4f}) indicate the controller was struggling with competing optimization objectives.",
                        "Conflicting Objectives": "The jittery behavior suggests the system was trying to balance multiple competing goals simultaneously, leading to unstable control decisions."
                    }
                }
                
        except Exception as e:
            print(f"Error in jittery controller check: {e}")
            
        return None

    def _check_for_causal_opportunity_driver(self, control_action_node: str, timestamp: datetime) -> Dict[str, Any] | None:
        """
        (V2 - EVIDENCE SCORING)
        Identifies situations where ventilation was used to exploit a "free"
        environmental resource for heating or cooling.
        """
        # This expert is only relevant when ventilation is the action in question.
        if 'uV' not in control_action_node:
            return None
            
        print("DEBUG (Causal Opportunity Expert): Checking for free heating/cooling resources...")
        
        try:
            data_row = self.data.loc[self.find_nearest_timestamp(timestamp)]
        except Exception:
            return None

        t_in = data_row.get(self.node_to_column.get('T'))
        t_out = data_row.get(self.node_to_column.get('Tout'))
        teq_lg = data_row.get(self.node_to_column.get('T_eq'))

        if not all(pd.notna(v) for v in [t_in, t_out, teq_lg]):
            return None

        # --- Scenario 1: "Free Heating" via Ventilation ---
        # Condition: Outside air is warmer, AND the controller wants to heat up.
        if t_out > t_in and teq_lg < -0.05: # Strong signal to INCREASE temp
            # SCORING: High confidence for this sophisticated maneuver.
            temp_diff_bonus = min(2.0, t_out - t_in)
            optimizer_effort_bonus = min(3.0, abs(teq_lg) * 10)
            confidence_score = 4.0 + temp_diff_bonus + optimizer_effort_bonus # Base 4, max 9.0
            
            return {
                'confidence': confidence_score,
                'primary_reason': "This was an advanced energy-saving strategy to 'kickstart' the morning warm-up using warmer outside air.",
                'causal_driver_type': "Causal Opportunity (Free Heating)",
                'supporting_evidence': {
                    'The Goal': f"The controller's primary objective was to rapidly increase the indoor temperature, proven by the massive negative spike in the Temperature Equality Lagrangian ({teq_lg:.2f}).",
                    'The Opportunity': f"It identified that the outside air ({t_out:.1f}°C) was momentarily warmer than the inside air ({t_in:.1f}°C), making it a source of 'free' heat.",
                    'The Action': "Therefore, it used ventilation as a cost-effective heating tool to achieve its goal."
                }
            }

        # --- Scenario 2: "Free Cooling" via Ventilation ---
        # Condition: Outside air is cooler, AND the controller wants to cool down.
        if t_out < t_in and teq_lg > 0.05: # Strong signal to DECREASE temp
            # SCORING: High confidence, as this is the primary, most economical way to cool.
            temp_diff_bonus = min(2.0, t_in - t_out)
            optimizer_effort_bonus = min(3.0, abs(teq_lg) * 10)
            confidence_score = 4.0 + temp_diff_bonus + optimizer_effort_bonus # Base 4, max 9.0
            
            return {
                'confidence': confidence_score,
                'primary_reason': "This was an energy-efficient strategy to cool the greenhouse using cooler outside air.",
                'causal_driver_type': "Causal Opportunity (Free Cooling)",
                'supporting_evidence': {
                    'The Goal': f"The controller's objective was to lower the indoor temperature, proven by the strong positive signal from the Temperature Equality Lagrangian ({teq_lg:.2f}).",
                    'The Opportunity': f"It identified that the outside air ({t_out:.1f}°C) was significantly cooler than the inside air ({t_in:.1f}°C), making it the most economical cooling resource.",
                    'The Action': "Therefore, it activated ventilation as the primary cooling method."
                }
            }

        return None

    def analyze_setpoint_strategy(self, setpoint_node: str, timestamp: datetime) -> Dict[str, Any]:
        """
        (GENERALIZED V2)
        Analyzes whether a setpoint was necessary and effective by checking for multiple
        known operational strategies (e.g., aggressive growth, energy saving, etc.).
        """
        print(f"DEBUG (Setpoint Strategy Analysis): Evaluating {setpoint_node} at {timestamp}.")
        
        try:
            data_row = self.data.loc[self.find_nearest_timestamp(timestamp)]
            data_before = self.data.iloc[self.data.index.get_loc(self.find_nearest_timestamp(timestamp)) - 1]
        except Exception as e:
            return {'error': f"Could not retrieve data for analysis: {e}"}

        base_state_node = setpoint_node.replace('_ref', '') # 'C_ref' -> 'C'
        setpoint_col = self.node_to_column.get(setpoint_node)
        # --- Gather all potentially relevant evidence from the data row ---
        # Optimizer State
        bieq_lg = data_row.get(self.node_to_column.get('B_ieq'))
        state_ieq_lg = data_row.get(self.node_to_column.get(f"{base_state_node}_ieq"))
        
        # Physical State & Fluxes
        state_val = data_row.get(self.node_to_column.get(base_state_node))
        setpoint_val = data_row.get(self.node_to_column.get(setpoint_node))
        state_to_flux_map = {'C': 'C_phot', 'T': 'Q_cool', 'H': 'H_trans'}
        flux_node = state_to_flux_map.get(base_state_node)
        flux_val = data_row.get(self.node_to_column.get(flux_node))
        # Environmental Context
        qrad = data_row.get(self.node_to_column.get('Qrad'))
        tout = data_row.get(self.node_to_column.get('Tout'))
        
        # ==============================================================================
        # --- SCENARIO 1 (NEW & HIGH-PRIORITY): Day-to-Night Humidity Drop for Disease Prevention ---
        # ==============================================================================
        if base_state_node == 'H' and (17 <= timestamp.hour <= 19):
            try:
                # --- EVENT-FINDING LOGIC ---
                # Define a search window around the user's specified time
                search_start = timestamp - timedelta(minutes=30)
                search_end = timestamp + timedelta(minutes=30)
                
                search_window_data = self.data.loc[search_start:search_end, setpoint_col]

                if len(search_window_data) < 2:
                    raise ValueError("Not enough data in search window.")

                # Find the largest single drop (most negative change) in this window.
                changes = search_window_data.diff()
                if changes.min() < -20: # Ensure there is at least one large drop
                    # Get the timestamp where the largest drop FINISHED.
                    event_timestamp = changes.idxmin()
                    
                    # Get the values right before and at the actual event.
                    event_index_loc = self.data.index.get_loc(event_timestamp)
                    setpoint_at_drop = self.data.iloc[event_index_loc][setpoint_col]
                    setpoint_before_drop = self.data.iloc[event_index_loc - 1][setpoint_col]

                    # --- END OF EVENT-FINDING LOGIC ---

                    return {
                        'strategy_name': "Proactive Disease Prevention (Day-to-Night Transition)",
                        'conclusion': f"Yes, you are correct, there was a sharp drop for disease prevention. It occurred at {event_timestamp.strftime('%H:%M')}, a few minutes before the time you mentioned.",
                        'evidence': {
                            "The Goal (Health Rationale)": "This is the controller's standard 'end-of-day' procedure. By forcing humidity down, it prevents condensation on the plants as temperatures cool, which is critical for preventing fungal diseases like botrytis.",
                            "Proof of Action": f"The humidity setpoint was commanded to drop sharply from {setpoint_before_drop:.1f}% down to {setpoint_at_drop:.1f}%.",
                            "Effectiveness": "This proactive strategy is highly effective at ensuring long-term crop health by creating an unfavorable environment for pathogens during the vulnerable nighttime period."
                        }
                    }
            except Exception as e:
                print(f"DEBUG (Setpoint Strategy V5): Event search failed for humidity drop. Error: {e}")
                pass # Fall through to other checks
            
        # ==============================================================================
        # --- SCENARIO 2: Aggressive Growth (High CO2 Setpoint) ---
        # ==============================================================================
        if base_state_node == 'C':
            is_goal_growth = (pd.notna(bieq_lg) and bieq_lg < -1e-9) and \
                            (pd.notna(setpoint_val) and setpoint_val > 800)
            is_light_missing = pd.notna(qrad) and qrad < 50
            
            if is_goal_growth and is_light_missing:
                # This is the "Flawed Strategy" pattern!
                return {
                    'strategy_name': "Flawed Aggressive Growth",
                    'conclusion': "The high CO₂ setpoint was part of an attempted growth strategy that was rendered ineffective by a sudden, unexpected loss of sunlight.",
                    'evidence': {
                        "Controller's Goal": f"The controller was clearly aiming for aggressive growth, proven by the high CO₂ setpoint ({setpoint_val:.1f} ppm) and the profit-driven signal from the Biomass Lagrangian ({bieq_lg:.2E}).",
                        "Contradictory Reality": f"However, this strategy was physically impossible at that moment because there was no usable solar radiation (Iout was {qrad:.1f} W/m²).",
                        "Diagnosis": "This indicates a lag in the controller's reaction to the sudden change in weather or a potential flaw in its predictive model, causing it to waste resources."
                    }
                }

        # ==============================================================================
        # --- SCENARIO 3: Proactive Energy Saving (Low Temperature Setpoint at Night) ---
        # ==============================================================================
        if base_state_node == 'T' and timestamp.hour > 20: # Check for nighttime
            is_setpoint_low = pd.notna(setpoint_val) and setpoint_val < 18 # Low temp target
            is_tout_low = pd.notna(tout) and tout < 15 # Cold outside
            is_optimizer_relaxed = pd.notna(state_ieq_lg) and abs(state_ieq_lg) < 1e-8 # Not fighting a limit

            if is_setpoint_low and is_tout_low and is_optimizer_relaxed:
                return {
                    'strategy_name': "Proactive Energy Conservation",
                    'conclusion': "Yes, the low temperature setpoint was a necessary and effective energy-saving strategy.",
                    'evidence': {
                        "The Goal (Economic Rationale)": "The controller intentionally lowered the temperature setpoint to minimize heating costs during the cold night, when plant growth is minimal.",
                        "Environmental Pressure": f"This was a sensible strategy given the low outside temperature of {tout:.1f}°C, which would require significant energy to counteract.",
                        "System State": f"The system was stable at this lower setpoint ({setpoint_val:.1f}°C), proven by the near-zero Temperature Lagrangian ({state_ieq_lg:.2E}), indicating it was an efficient equilibrium."
                    }
                }

        # ==============================================================================
        # --- SCENARIO 4: Disease Prevention (Low Humidity Setpoint at Night) ---
        # ==============================================================================
        if base_state_node == 'H' and timestamp.hour > 20:
            is_setpoint_low = pd.notna(setpoint_val) and setpoint_val < 75 # Dry target
            is_plant_transpiring = pd.notna(flux_val) and flux_val > 0.01 # Check H_trans
            is_optimizer_working = pd.notna(state_ieq_lg) and state_ieq_lg < -1e-9 # Fighting lower limit

            if is_setpoint_low and not is_plant_transpiring and is_optimizer_working:
                return {
                    'strategy_name': "Proactive Disease Prevention",
                    'conclusion': "Yes, the low humidity setpoint was a necessary strategy to protect crop health.",
                    'evidence': {
                        "The Goal (Health Rationale)": "The controller lowered the humidity setpoint to create a drier environment overnight. This is a critical strategy to prevent condensation on the plants and reduce the risk of fungal diseases like botrytis.",
                        "Biological Context": "This is especially important at night when plants are not actively transpiring (H_trans was near zero), which can lead to stagnant, humid air pockets.",
                        "Proof of Effort": f"The Humidity Lagrangian was strongly negative ({state_ieq_lg:.2E}), proving the controller was actively working to achieve and maintain this dry, safe state."
                    }
                }

        # --- FALLBACK SCENARIO ---
        return {
            'strategy_name': "Standard Operation",
            'conclusion': "The setpoint appears to be part of a standard operational procedure and not a special, high-level strategy.",
            'evidence': {
                "Setpoint Value": f"The setpoint for {self.node_to_name.get(base_state_node)} was set to {setpoint_val:.2f}.",
                "System State": "The controller was likely tracking this reference as part of its normal routine to maintain a stable climate."
            }
        }

    def format_setpoint_strategy_for_llm(self, analysis_result: Dict[str, Any]) -> str:
        """Formats the setpoint strategy analysis for the LLM."""
        if 'error' in analysis_result:
            return f"Error during analysis: {analysis_result['error']}"

        context = [f"--- Analysis of Setpoint Strategy: {analysis_result['strategy_name']} ---"]
        context.append(f"\n**Verdict:** {analysis_result['conclusion']}")
        
        if analysis_result.get('evidence'):
            context.append("\n**Supporting Evidence:**")
            for key, value in analysis_result['evidence'].items():
                context.append(f"- **{key}:** {value}")
                
        return "\n".join(context)

    def _check_for_free_dehumidification(self, control_action_node: str, timestamp: datetime) -> Optional[Dict[str, Any]]:
        """
        Checks if ventilation is being used primarily for cooling but also provides dehumidification
        as a beneficial side effect, representing efficient multi-objective control.
        """
        if control_action_node not in ['uV', 'Vent_ref']:
            return None
            
        try:
            # Define analysis window around the timestamp
            window_start = timestamp - pd.Timedelta(hours=1)
            window_end = timestamp + pd.Timedelta(hours=1)
            
            window_data = self.data.loc[window_start:window_end]
            if window_data.empty:
                return None
                
            # Get relevant columns
            vent_col = self.node_to_column.get('uV')
            temp_col = self.node_to_column.get('T')
            hum_col = self.node_to_column.get('H')
            cool_col = self.node_to_column.get('uQc')
            
            if not all(col and col in window_data.columns for col in [vent_col, temp_col, hum_col, cool_col]):
                return None
                
            # Check ventilation activity
            vent_values = window_data[vent_col]
            max_vent = vent_values.max()
            avg_vent = vent_values.mean()
            
            # Check temperature conditions (should be elevated for cooling need)
            temp_values = window_data[temp_col]
            avg_temp = temp_values.mean()
            temp_trend = temp_values.diff().mean()  # Temperature trend
            
            # Check humidity conditions and changes
            hum_values = window_data[hum_col]
            avg_hum = hum_values.mean()
            hum_change = hum_values.iloc[-1] - hum_values.iloc[0]  # Net humidity change
            
            # Check if cooling is also active (or not needed)
            cool_values = window_data[cool_col]
            avg_cooling = cool_values.mean()
            
            print(f"DEBUG (Free Dehumidification): Max vent: {max_vent:.6f}, Avg vent: {avg_vent:.6f}, Avg temp: {avg_temp:.1f}, Temp trend: {temp_trend:.3f}, Avg hum: {avg_hum:.1f}, Hum change: {hum_change:.1f}, Avg cooling: {avg_cooling:.6f}")
            
            # Free dehumidification indicators:
            # 1. Significant ventilation activity
            # 2. Cooling primarily needed (high temp or positive temp trend)
            # 3. Humidity reduction achieved as side benefit
            # 4. Minimal active cooling (ventilation doing the work)
            
            is_significant_ventilation = max_vent > 0.01 or avg_vent > 0.005
            is_cooling_needed = avg_temp > 25 or temp_trend > 0.1  # High temp or rising temp
            is_humidity_reduced = hum_change < -2.0  # Significant humidity reduction
            is_low_active_cooling = avg_cooling < 0.001  # Minimal mechanical cooling
            
            print(f"DEBUG (Free Dehumidification): sig_vent={is_significant_ventilation}, cool_needed={is_cooling_needed}, hum_reduced={is_humidity_reduced}, low_active_cool={is_low_active_cooling}")
            
            if is_significant_ventilation and is_cooling_needed and is_humidity_reduced and is_low_active_cooling:
                return {
                    'primary_reason': "The system used ventilation for cooling while achieving dehumidification as a beneficial side effect, representing efficient multi-objective control.",
                    'causal_driver_type': "Free Dehumidification",
                    'supporting_evidence': {
                        "Primary Purpose": f"Ventilation was activated (max {max_vent:.4f}) primarily for cooling under elevated temperature conditions (average {avg_temp:.1f}°C).",
                        "Beneficial Side Effect": f"The ventilation also achieved significant dehumidification (humidity reduction of {abs(hum_change):.1f}%) without requiring additional active systems.",
                        "Energy Efficiency": f"Minimal mechanical cooling was used (average {avg_cooling:.4f}), allowing ventilation to serve dual purposes efficiently."
                    }
                }
                
        except Exception as e:
            print(f"Error in free dehumidification check: {e}")
            
        return None
    
    def _check_for_sunrise_kickstart_strategy(self, timestamp: datetime) -> Dict[str, Any] | None:
        """
        (NEW, HIGH-PRIORITY EXPERT)
        Looks for the coordinated 'sunrise kickstart' pattern where heating and CO2 injection
        are used together just before sunrise to prepare for a productive day.
        """
        # This highly specific pattern only occurs in the pre-dawn hours.
        if not (4 <= timestamp.hour < 7):
            return None

        print("DEBUG (Sunrise Kickstart Expert): Checking for coordinated pre-dawn action...")
        
        try:
            # We need a small window to see the spike
            window_start = timestamp - timedelta(minutes=10)
            window_end = timestamp + timedelta(minutes=10)
            window_data = self.data.loc[window_start:window_end]
            if window_data.empty: return None
            data_at_event = self.data.loc[self.find_nearest_timestamp(timestamp)]
        except Exception:
            return None

        # --- Define the Signature of the Kickstart Strategy ---
        
        # 1. Was there a sharp CO2 injection spike?
        co2_inj_col = self.node_to_column.get('uC')
        co2_spike_threshold = 0.5 # A significant injection value
        is_co2_spike = window_data[co2_inj_col].max() > co2_spike_threshold
        
        # 2. Was the heating system active?
        heat_col = self.node_to_column.get('uQh')
        # Check the absolute value to handle potential negative reference signals
        is_heating_active = abs(data_at_event.get(heat_col, 0)) > 1e-10

        # 3. Was it dark? (Confirming it's pre-sunrise)
        qrad_col = self.node_to_column.get('Qrad')
        is_dark = data_at_event.get(qrad_col, 100) < 10

        # 4. (Confirmation) Was the optimizer's goal to increase Temperature and CO2?
        tieq_lg = data_at_event.get(self.node_to_column.get('T_ieq'))
        ceq_lg = data_at_event.get(self.node_to_column.get('C_eq'))
        is_goal_to_heat = pd.notna(tieq_lg) and tieq_lg < -1e-10
        is_goal_to_add_co2 = pd.notna(ceq_lg) and ceq_lg < -0.1
        
        print(f"DEBUG (Kickstart Expert): CO2 Spike? {is_co2_spike}, Heating Active? {is_heating_active}, Dark? {is_dark}, Goal to Heat? {is_goal_to_heat}, Goal for CO2? {is_goal_to_add_co2}")

        # --- Evaluate and Score ---
        if is_co2_spike and is_heating_active and is_dark and is_goal_to_heat and is_goal_to_add_co2:
            # This is a very specific, multi-faceted pattern. Give it a very high confidence score.
            confidence_score = 9.8 
            print(f"SUCCESS (Sunrise Kickstart Expert): Identified strategy with score {confidence_score}")
            return {
                'confidence': confidence_score,
                'primary_reason': "This was a coordinated and predictive 'sunrise kickstart' strategy to prepare the greenhouse for a highly productive day.",
                'causal_driver_type': "Coordinated Opportunistic Action",
                'supporting_evidence': {
                    'Strategic Goal': "The controller's objective was to maximize the benefit of the imminent sunrise. It aimed to have both temperature and CO₂ at optimal levels the moment photosynthesis could begin.",
                    'Coordinated Actions': f"To achieve this, it simultaneously injected a large amount of CO₂ (peaking at {window_data[co2_inj_col].max():.2f}) while activating the heating system.",
                    'Optimizer State': f"This coordinated strategy is proven by the optimizer's internal state: the Temperature Lagrangian (Tieq_lg: {tieq_lg:.2E}) and CO₂ Lagrangian (Ceq_lg: {ceq_lg:.2E}) were both strongly negative, signaling a demand to increase both variables.",
                    'Economic Rationale': "This action represents an investment of energy before sunrise to maximize the conversion of free solar energy into profitable biomass throughout the day."
                }
            }
            
        return None
  

    def analyze_profitability_strategy(self, date: datetime.date) -> Dict[str, Any]:
        """
        (ROBUST V2) Analyzes the economic trade-off between running cooling and CO2 injection
        simultaneously, using PEAK conditions, not averages.
        """
        from datetime import datetime
        print(f"DEBUG (Profitability Expert V2): Analyzing Cooling vs. Growth strategy for {date}.")

        try:
            afternoon_start = datetime.combine(date, time(12, 0))
            afternoon_end = datetime.combine(date, time(18, 0))
            window_data = self.data.loc[afternoon_start:afternoon_end]
            if window_data.empty:
                return {'error': "No data available for the afternoon period to analyze."}
        except Exception as e:
            return {'error': f"Error slicing data: {e}"}

        # --- Gather the evidence for the strategy ---
        cool_col = self.node_to_column.get('uQc')
        co2_inj_col = self.node_to_column.get('uC')
        qrad_col = self.node_to_column.get('Qrad')
        bio_ref_col = self.node_to_column.get('B_ref')

        # 1. Were both systems active? (This logic is fine)
        avg_cooling = window_data[cool_col].mean()
        avg_co2_inj = window_data[co2_inj_col].mean()
        is_strategy_active = avg_cooling > 1e-5 and avg_co2_inj > 0.1

        # --- 2. THE CRITICAL FIX: Was there a profitable opportunity? ---
        # Instead of the average, check if the PEAK solar radiation was high.
        # This correctly identifies days with periods of intense sun, even if cloudy overall.
        peak_qrad = window_data[qrad_col].max()
        is_opportunity = peak_qrad > self.reasoning_thresholds['Qrad_high']
        # --- END OF FIX ---

        # 3. Was the strategy actually successful? (This logic is fine)
        bio_start = window_data[bio_ref_col].iloc[0]
        bio_end = window_data[bio_ref_col].iloc[-1]
        biomass_gain = bio_end - bio_start
        is_successful = biomass_gain > 0.05

        # --- Logging for better debugging ---
        print(f"DEBUG (Profitability Expert V2): Strategy Active? {is_strategy_active}")
        print(f"DEBUG (Profitability Expert V2): Opportunity? {is_opportunity} (Peak Iout: {peak_qrad:.1f} vs Threshold: {self.reasoning_thresholds['Qrad_high']})")
        print(f"DEBUG (Profitability Expert V2): Successful? {is_successful} (Biomass Gain: {biomass_gain:.2f})")

        # --- Synthesize the explanation ---
        if is_strategy_active and is_opportunity and is_successful:
            return {
                'conclusion': "Yes, this is a highly profitable and optimal strategy for a high-light day.",
                'explanation': (
                    "The two systems are not fighting each other; they are performing two independent and necessary tasks to capitalize on the same profitable opportunity: intense afternoon sun. "
                    "The controller has correctly calculated that the economic gain from the biomass produced during these peak sunlight hours is far greater than the combined cost of the electricity for cooling and the price of the injected CO₂."
                ),
                'evidence': {
                    "The Opportunity": f"The strategy was driven by periods of intense solar radiation (peaking at {peak_qrad:.1f} W/m²), which created perfect windows for maximum photosynthesis.",
                    "Necessary Task 1 (Cooling)": "The intense sun created a heavy heat load, making active cooling necessary to prevent crop damage from heat stress.",
                    "Necessary Task 2 (CO₂ Injection)": "To convert the abundant light into biomass, the plants required a high level of CO₂, which was supplied by the injection system.",
                    "Proof of Profitability": f"The strategy was successful, resulting in a significant biomass gain of {biomass_gain:.2f} units during this period."
                }
            }
        else:
            # The fallback logic remains the same
            return {
            'conclusion': "The profitability of this strategy is questionable under these conditions.",
            'explanation': "The system was running both cooling and CO2 injection, but the conditions did not fully justify this high-cost strategy, or the outcome was not significantly profitable.",
            'evidence': {
                    "Strategy Active?": is_strategy_active,
                    "High-Light Opportunity?": is_opportunity,
                    "Was it Successful?": is_successful
                }
            }
    
    def analyze_control_efficiency(self, control_node: str, date: datetime.date) -> Dict[str, Any]:
        """
        Analyzes whether a control action was efficient/wasteful during specific conditions.
        Focuses on CO2 injection during low-light/cloudy periods.
        """
        from datetime import datetime
        print(f"DEBUG (Efficiency Analysis): Analyzing efficiency of {control_node} on {date}.")
        
        try:
            # Get full day data
            start_dt = datetime.combine(date, time(0, 0))
            end_dt = datetime.combine(date, time(23, 59))
            day_data = self.data.loc[start_dt:end_dt]
            if day_data.empty:
                return {'error': "No data available for the specified date."}
        except Exception as e:
            return {'error': f"Error accessing data: {e}"}

        # Get column names
        control_col = self.node_to_column.get(control_node)
        qrad_col = self.node_to_column.get('Qrad')
        
        if not control_col or not qrad_col:
            return {'error': f"Required data columns not found for {control_node} analysis."}

        # For CO2 injection efficiency analysis
        if control_node == 'uC':
            # Define low-light periods (cloudy conditions)
            low_light_threshold = self.reasoning_thresholds.get('Qrad_photosynthesis', 200)  # W/m²
            
            # Find periods where CO2 was injected during low light
            co2_active_periods = day_data[day_data[control_col] > 0.1]  # CO2 injection active
            low_light_periods = co2_active_periods[co2_active_periods[qrad_col] < low_light_threshold]
            
            # Calculate efficiency metrics
            total_co2_injection_time = len(co2_active_periods)
            wasteful_co2_time = len(low_light_periods)
            
            if total_co2_injection_time == 0:
                return {
                    'conclusion': "No CO2 injection occurred on this date.",
                    'explanation': "The CO2 injection system was not active at all during this day.",
                    'evidence': {
                        "Total CO2 Injection Time": "0 minutes",
                        "Analysis": "Cannot assess efficiency when no injection occurred."
                    }
                }
            
            efficiency_ratio = (total_co2_injection_time - wasteful_co2_time) / total_co2_injection_time
            was_wasteful = wasteful_co2_time > total_co2_injection_time * 0.3  # More than 30% during low light
            
            if was_wasteful:
                conclusion = "Yes, CO2 injection was largely wasteful on this date."
                explanation = (
                    "A significant portion of CO2 injection occurred during periods of low solar radiation, "
                    "when photosynthesis rates would be limited. This represents inefficient use of CO2 resources "
                    "since the injected CO2 could not be effectively utilized by the plants for biomass production."
                )
            else:
                conclusion = "CO2 injection was reasonably efficient on this date."
                explanation = (
                    "Most CO2 injection occurred during adequate light conditions for photosynthesis. "
                    "While there may have been some injection during lower light periods, the overall efficiency was acceptable."
                )
            daytime_data = day_data.between_time('06:00', '18:00')
            iout_series = daytime_data[qrad_col]
            iout_volatility_cv = iout_series.std() / iout_series.mean() if iout_series.mean() > 1 else 0
            is_volatile_day = iout_volatility_cv > 0.4

            # 2. Now, interpret the "waste" in the context of the day's volatility.
            waste_percentage = (wasteful_co2_time / total_co2_injection_time) * 100 if total_co2_injection_time > 0 else 0
            
            if is_volatile_day:
                # On a VOLATILE day, some "waste" is expected as part of an aggressive tracking strategy.
                if waste_percentage < 40: # Allow a higher tolerance for waste
                    conclusion = "No, the CO₂ injection was part of an efficient and aggressive tracking strategy."
                    explanation = ("On a highly volatile day with inconsistent sunlight, the controller must predict when the sun will return. "
                                "Keeping CO₂ levels high during brief cloudy periods is a deliberate strategy to ensure the plants can immediately photosynthesize "
                                "when the sun reappears. This 'waste' is a necessary cost for maximizing growth under challenging conditions.")
                else: # A very high percentage of waste is still bad.
                    conclusion = "Yes, the CO₂ injection was largely wasteful on this volatile day."
                    explanation = ("While some inefficiency is expected on a cloudy day, a very high percentage of CO₂ was injected during periods "
                                "with insufficient light, suggesting the controller's predictive model was not tracking the weather effectively.")
            else:
                # On a STABLE day, any waste is less excusable.
                if waste_percentage < 15: # Use a stricter threshold for waste
                    conclusion = "CO₂ injection was reasonably efficient on this date."
                    explanation = ("Most CO₂ injection occurred during adequate light conditions. The small amount of injection during lower light "
                                    "periods is within acceptable operational limits.")
                else:
                    conclusion = "Yes, CO₂ injection was inefficient on this date."
                    explanation = ("A significant portion of CO₂ was injected during periods of low light, which indicates a potential "
                                    "tuning issue or a lagging response from the controller on this otherwise stable day.")
            return {
                'conclusion': conclusion,
                'explanation': explanation,
                'evidence': {
                    "Day Type": "Volatile / Inconsistent Sunlight" if is_volatile_day else "Stable Sunlight",
                    "Total CO₂ Injection Intervals": f"{total_co2_injection_time}",
                    "Injection Intervals During Low Light": f"{wasteful_co2_time} ({waste_percentage:.1f}%)",
                    "Efficiency Ratio": f"{(100 - waste_percentage):.1f}% of injection time had adequate light"
                }
            }
        
        # For other controls, provide a generic analysis
        else:
            return {
                'conclusion': f"Analysis of {control_node} efficiency is not yet implemented.",
                'explanation': f"The system can analyze CO2 injection efficiency, but {control_node} efficiency analysis is not available.",
                'evidence': {
                    "Available Analysis": "CO2 injection efficiency during cloudy periods",
                    "Requested Control": control_node
                }
            }
    
    # You need to change the function signature to accept 'original_query'
    def _check_for_inaction_due_to_conflict(self, control_action_node: str, timestamp: datetime, original_query: str) -> Dict[str, Any] | None:
        """
        (V2 - Context-Aware)
        Checks if a control was inactive due to a conflict, but ONLY if the user's query
        is explicitly about inaction (e.g., "why was it off?").
        """
        
        # --- STEP 1: ADD THE CONTEXTUAL GUARD CLAUSE ---
        inaction_keywords = ["why didn't", "why was it not", "why was it off", "why was it zero", "why was it inactive"]
        query_lower = original_query.lower()
        
        # Check if the query contains any of the inaction keywords.
        is_inaction_query = any(kw in query_lower for kw in inaction_keywords)
        
        # If the user is NOT asking about inaction, this expert should not run.
        if not is_inaction_query:
            print("DEBUG (Inaction Expert): Query is not about inaction. Skipping this expert.")
            return None
        # --- END OF THE FIX ---
        
        print(f"DEBUG (Inaction Expert): Query IS about inaction. Proceeding with analysis...")

        # --- The rest of your function remains exactly the same ---
        try:
            data_at_event = self.data.loc[self.find_nearest_timestamp(timestamp)]
        except Exception:
            return None

        control_col = self.node_to_column.get(control_action_node)
        control_value = data_at_event.get(control_col, 0)

        # This check is still useful as a secondary confirmation.
        if control_value > self.config['control_active_threshold'].get('default', 1e-6):
            print("DEBUG (Inaction Expert): Control was actually active. Skipping.")
            return None

        states_affected = self.get_states_typically_affected_by_control(control_action_node)
        
        for state_node in states_affected:
            effect_direction = self.get_effect_direction(control_action_node, state_node)
            lg_node = self.lagrangian_nodes.get(state_node, {}).get('ieq')
            lg_col = self.node_to_column.get(lg_node)
            if not lg_col or lg_col not in data_at_event: continue
            
            lg_val = data_at_event.get(lg_col, 0)
            threshold = self.config['lagrangian_active_threshold'].get(lg_node, 1e-7)

            conflict = False
            if effect_direction == 'decrease' and lg_val < -threshold:
                conflict = True
                limit_type = 'LOWER'
                problem_var = state_node
            elif effect_direction == 'increase' and lg_val > threshold:
                conflict = True
                limit_type = 'UPPER'
                problem_var = state_node

            if conflict:
                print(f"SUCCESS (Inaction Expert): Found conflict. '{control_action_node}' was inactive to avoid violating the {problem_var} {limit_type} limit.")
                return {
                    'confidence': 9.9,
                    'primary_reason': f"The controller intentionally kept {self.node_to_name.get(control_action_node)} off to avoid worsening a more critical problem: the {self.node_to_name.get(problem_var)} was at its {limit_type} limit.",
                    'causal_driver_type': "Constraint-Driven Inaction (Trade-off)",
                    'supporting_evidence': {
                        "Conflicting Constraint": f"The {self.node_to_name.get(problem_var)} Lagrangian (`{lg_col}`) was strongly active at {lg_val:.2E}, proving that preventing a violation of this state was the top priority.",
                        "Strategic Choice": f"Activating {self.node_to_name.get(control_action_node)} would have {effect_direction}d the {self.node_to_name.get(problem_var)}, violating a critical safety constraint. Therefore, keeping it off was the optimal decision."
                    }
                }
        return None
    
    def _check_for_flawed_strategy_v2(self, timestamp: datetime) -> Dict[str, Any] | None:
        """
        (META EXPERT V2 - GENERALIZED)
        Identifies situations where the controller is executing a strategy that is
        contradicted by the current environmental conditions, indicating a model mismatch,
        sensor lag, or an otherwise illogical action.
        """
        print("DEBUG (Flawed Strategy Expert): Checking for contradictions...")
        
        try:
            data_row = self.data.loc[self.find_nearest_timestamp(timestamp)]
        except Exception:
            return None

        # --- Gather all potentially relevant data points once ---
        bieq_lg = data_row.get(self.node_to_column.get('B_ieq'))
        co2_setpoint = data_row.get(self.node_to_column.get('C_ref'))
        qrad = data_row.get(self.node_to_column.get('Qrad'))
        t_in = data_row.get(self.node_to_column.get('T'))
        t_out = data_row.get(self.node_to_column.get('Tout'))
        h_in = data_row.get(self.node_to_column.get('H'))
        h_out = data_row.get(self.node_to_column.get('Hout'))
        STRONG_ACTION_THRESHOLD = 0.1 
        is_venting_active = data_row.get(self.node_to_column.get('uV'), 0) > STRONG_ACTION_THRESHOLD
        is_cooling_active = data_row.get(self.node_to_column.get('uQc'), 0) > STRONG_ACTION_THRESHOLD
        is_heating_active = data_row.get(self.node_to_column.get('uQh'), 0) > STRONG_ACTION_THRESHOLD

        lagrangian_values = {
        'T': abs(data_row.get(self.node_to_column.get('T_eq'), 0)),
        'H': abs(data_row.get(self.node_to_column.get('H_eq'), 0)),
        'C': abs(data_row.get(self.node_to_column.get('C_eq'), 0))
        }
        if not any(v > 1e-6 for v in lagrangian_values.values()):
            return None
        primary_goal_node = max(lagrangian_values, key=lambda k: lagrangian_values[k])
        print(f"DEBUG (Flawed Strategy): Determined primary goal is '{primary_goal_node}' with effort {lagrangian_values[primary_goal_node]:.2E}")
        # --- Now, check for contradictions against ONLY the primary goal ---
        is_venting_active = data_row.get(self.node_to_column.get('uV'), 0) > 1e-5

        # ==============================================================================
        # --- SCENARIO 1: "Growth Strategy without Light" ---
        # Goal: Maximize growth. Action: High CO2. Reality: No sun.
        # ==============================================================================
        if 10 <= timestamp.hour < 16:
            is_goal_growth = (pd.notna(bieq_lg) and bieq_lg < -1e-9) and \
                            (pd.notna(co2_setpoint) and co2_setpoint > 800)
            
            if is_goal_growth:
                if pd.notna(qrad) and qrad < 50:
                    return {
                        'confidence': 9.3,
                        'primary_reason': "The controller was attempting a growth strategy that was physically impossible due to a sudden loss of sunlight.",
                        'causal_driver_type': "Flawed Strategy (Model/Reality Mismatch)",
                        'supporting_evidence': {
                            "Controller's Goal": f"The controller was aiming for aggressive growth, proven by the high CO₂ setpoint ({co2_setpoint:.1f} ppm) and the profit-driven signal from the Biomass Lagrangian ({bieq_lg:.2E}).",
                            "Contradictory Reality": f"However, this strategy was ineffective because there was no usable solar radiation (Iout was {qrad:.1f} W/m²).",
                            "Diagnosis": "This indicates a lag in the controller's reaction to the sudden weather change or a flaw in its predictive model, causing it to waste resources on CO₂ that the plants could not use."
                        }
                    }

        # ==============================================================================
        # --- SCENARIO 2: "Cooling with Hotter Air" ---
        # Goal: Lower temperature. Action: Ventilation. Reality: Hotter outside.
        # ==============================================================================
        if is_venting_active and primary_goal_node == 'T':
            teq_lg = data_row.get(self.node_to_column.get('T_eq'))
            is_goal_cooling = pd.notna(teq_lg) and teq_lg > 0.01
            
            if is_goal_cooling:
                t_in = data_row.get(self.node_to_column.get('T'))
                t_out = data_row.get(self.node_to_column.get('Tout'))
                if pd.notna(t_in) and pd.notna(t_out) and t_out > t_in:
                    return {
                        'confidence': 9.4, # This is a very clear physical contradiction
                        'primary_reason': "The controller activated ventilation to cool the greenhouse, but this action was counter-productive because the outside air was hotter.",
                        'causal_driver_type': "Flawed Strategy (Counter-Productive Action)",
                        'supporting_evidence': {
                            "Controller's Goal": f"The controller's clear objective was to lower the temperature, as shown by the positive Temperature Equality Lagrangian ({teq_lg:.2E}).",
                            "Contradictory Reality": f"However, the chosen action (ventilation) was physically flawed, as the outside air ({t_out:.1f}°C) was warmer than the inside air ({t_in:.1f}°C), causing the vents to introduce heat instead of removing it.",
                            "Diagnosis": "This points to a significant flaw, potentially a faulty outside temperature sensor or a critical error in the controller's physical model."
                        }
                    }

        # ==============================================================================
        # --- SCENARIO 3: "Dehumidifying with Wetter Air" ---
        # Goal: Lower humidity. Action: Ventilation. Reality: More humid outside.
        # ==============================================================================
        if is_venting_active and primary_goal_node == 'H':
            heq_lg = data_row.get(self.node_to_column.get('H_eq'))
            is_goal_dehumidify = pd.notna(heq_lg) and heq_lg > 1e-5
            
            if is_goal_dehumidify:
                h_in = data_row.get(self.node_to_column.get('H'))
                h_out = data_row.get(self.node_to_column.get('Hout'))
                if pd.notna(h_in) and pd.notna(h_out) and h_out > h_in:
                    return {
                        'confidence': 9.2,
                        'primary_reason': "The controller activated ventilation to dehumidify, but this was counter-productive as the outside air was more humid.",
                        'causal_driver_type': "Flawed Strategy (Counter-Productive Action)",
                        'supporting_evidence': {
                            "Controller's Goal": f"The controller's objective was to lower humidity, proven by the positive Humidity Equality Lagrangian ({heq_lg:.2E}).",
                            "Contradictory Reality": f"The chosen action (ventilation) was ineffective, as the outside air ({h_out:.1f}%) was more humid than the inside air ({h_in:.1f}%), causing the vents to introduce moisture.",
                            "Diagnosis": "This suggests a possible fault in the outside humidity sensor or an error in the controller's state estimation."
                        }
                    }

        # ==============================================================================
        # --- SCENARIO 4: "Fighting Itself" (Simultaneous Heating and Cooling) ---
        # Goal: Unclear. Action: Active Heating AND Active Cooling. Reality: Massive energy waste.
        # ==============================================================================
        # Note: We exclude the "morning kickstart" which uses VENTILATION, not active cooling.
        is_heating_active = data_row.get(self.node_to_column.get('uQh'), 0) > 1e-5
        
        if is_heating_active and is_cooling_active:
            return {
                'confidence': 10.0, # This is almost certainly a critical system fault
                'primary_reason': "A critical fault occurred, causing the system to activate both heating and cooling simultaneously, resulting in massive energy waste.",
                'causal_driver_type': "Critical Fault (Opposing Actuators)",
                'supporting_evidence': {
                    "Contradictory Actions": "The data shows that both the primary heating system and the active cooling system were running at the same time.",
                    "Physical Consequence": "These actions directly cancel each other out, leading to no effective climate control while consuming maximum energy.",
                    "Diagnosis": "This is a sign of a severe software fault, a stuck relay, or a major sensor failure causing the optimizer to issue conflicting commands. This requires immediate technical intervention."
                }
            }
            
        # ==============================================================================
        # --- SCENARIO 5: "Contradictory Control" (e.g., Cooling while worried about being too cold) ---
        # Goal: Prevent low temp. Action: Massive Cooling. Reality: Illogical.
        # ==============================================================================
        # This checks for a significant control action that directly opposes the optimizer's stated goal.
        
        # Check for cooling action
        if is_cooling_active:
            tieq_lg = data_row.get(self.node_to_column.get('T_ieq'))
            # If the optimizer is worried about being too COLD (negative Tieq_lg)
            # while the system is actively COOLING, it's a major contradiction.
            if pd.notna(tieq_lg) and tieq_lg < -1e-10: # Significantly negative
                return {
                    'confidence': 9.5, # Very high confidence, indicates a probable fault or severe mis-tuning.
                    'primary_reason': "The controller was executing a contradictory and inefficient strategy, actively cooling the greenhouse while its internal optimizer state indicated a concern for the temperature being too low.",
                    'causal_driver_type': "Flawed Strategy (Contradictory Control)",
                    'supporting_evidence': {
                        "Contradictory Goal": f"The Temperature Lagrangian (Tieq_lg) was negative ({tieq_lg:.2E}), which is the optimizer's signal to prevent the temperature from dropping below its lower limit.",
                        "Opposing Action": "Simultaneously, the controller was commanding a strong cooling action, which physically works against its own stated goal.",
                        "Diagnosis": "This behavior is highly anomalous and suggests a severe issue, such as a fault in the state estimation, a critical model inaccuracy, or extreme parameter mis-tuning. The controller is effectively fighting itself, leading to the observed overshoot and inefficient, jagged control."
                    }
                }
            
            return None

    def _check_for_morning_kickstart_v2(self, timestamp: datetime) -> Dict[str, Any] | None:
        """
        (V2 - META EXPERT)
        Specifically looks for the counter-intuitive "heating with ventilation" pattern
        used for the morning warm-up. This is a high-priority, multi-variable pattern.
        """
        # This pattern only occurs in the early morning.
        if not (3 <= timestamp.hour < 10):
            return None

        print("DEBUG (Morning Kickstart Expert v3): Checking for 'free heating' maneuver...")
        
        try:
            data_row = self.data.loc[self.find_nearest_timestamp(timestamp)]
        except Exception:
            return None

        # --- Step 1: Check for the core physical signature ---
        
        # <<< THE FINAL FIX IS HERE >>>
        # Check if the ABSOLUTE value is significantly non-zero.
        # This correctly handles both positive and negative control reference signals.
        is_heating_active = abs(data_row.get(self.node_to_column.get('uQh'), 0)) > 1e-10
        is_venting_active = abs(data_row.get(self.node_to_column.get('uV'), 0)) > 1e-10
        # <<< END OF FIX >>>

        # B. Is there a clear "free heating" opportunity?
        t_in = data_row.get(self.node_to_column.get('T'))
        t_out = data_row.get(self.node_to_column.get('Tout'))
        is_opportunity = pd.notna(t_in) and pd.notna(t_out) and t_out > t_in

        # If the core signature is not present, this hypothesis is invalid.
        if not (is_heating_active and is_venting_active and is_opportunity):
            return None

        # --- Step 2: Calculate Confidence Score ---
        # (The rest of the function remains the same)
        temp_difference = t_out - t_in
        opportunity_score = 6.0 + min(3.0, temp_difference * 1.5)
        
        alignment_bonus = 0.0
        teq_lg = data_row.get(self.node_to_column.get('T_eq'))
        if pd.notna(teq_lg) and teq_lg < -0.01:
            alignment_bonus = min(2.0, abs(teq_lg) * 20)
            
        confidence_score = opportunity_score + alignment_bonus
        
        print(f"SUCCESS (Morning Kickstart Expert v3): Identified strategy with score {confidence_score:.2f} (Opportunity: {opportunity_score:.2f}, Alignment: {alignment_bonus:.2f})")
        
        return {
            'confidence': confidence_score,
            'primary_reason': "This seemingly wasteful action was a highly advanced energy-saving strategy to 'kickstart' the greenhouse for the morning.",
            'causal_driver_type': "Coordinated Action (Free Heating Maneuver)",
            'supporting_evidence': {
                'The Goal': f"The controller's objective was to rapidly warm the greenhouse for the day. The optimizer's effort signal (Teq_lg) was {teq_lg:.3f}, indicating a need to raise the temperature.",
                'The Opportunity': f"It identified that the outside air ({t_out:.1f}°C) was warmer than the inside air ({t_in:.1f}°C), providing a source of 'free' heat.",
                'The Coordinated Action': "Therefore, it executed a multi-part strategy: using ventilation to import the free heat from outside while simultaneously using the mechanical heater as a supplement to achieve the warm-up goal as quickly and efficiently as possible."
            }
        }


    def _find_causal_setpoint_change(self, control_action_node: str, timestamp: datetime,original_query: str) -> Dict[str, Any] | None:
        """
        (FINAL, ROBUST VERSION)
        Finds the most significant setpoint change in a lookback window that could have
        caused the control action. This is more robust than checking just the last time step.
        """
        states_affected = self.get_states_typically_affected_by_control(control_action_node)
        print(f"DEBUG (Setpoint Check): Action '{control_action_node}' affects states: {states_affected}")
        
        # SPECIAL CASE: For ventilation, ensure we check humidity setpoint changes
        if control_action_node == 'uV' and 'H' not in states_affected:
            states_affected.append('H')
            print(f"DEBUG (Setpoint Check): Added 'H' to states_affected for ventilation")

        potential_drivers = []
        
        try:
            # Get the integer position of the event timestamp for robust slicing
            event_idx = self.data.index.get_indexer([timestamp], method='nearest')[0]
        except Exception as e:
            print(f"WARN (Setpoint Check): Error finding index for timestamp {timestamp}: {e}")
            return None

        # Define the lookback window in terms of number of data points (e.g., 2 hours = 24 steps)
        lookback_steps = 24 
        start_idx = max(0, event_idx - lookback_steps)
        end_idx = event_idx + 1 # .iloc is exclusive at the end

        if start_idx >= end_idx - 1:
            return None # Not enough data

        for state_node in states_affected:
            ref_col = self.node_to_column.get(f"{state_node}_ref")
            if not ref_col or ref_col not in self.data.columns:
                print(f"DEBUG (Setpoint Check): No ref_col found for {state_node} (looked for {state_node}_ref -> {ref_col})")
                continue

            # Use robust integer-based slicing
            window_series = self.data[ref_col].iloc[start_idx:end_idx]
            
            if len(window_series) < 2: 
                print(f"DEBUG (Setpoint Check): Not enough data in window for {state_node}")
                continue

            # Check for a significant change *within* the window
            abs_change = abs(window_series.max() - window_series.min())
            
            # Use a specific, defined threshold from config
            threshold = self.config['setpoint_change_threshold'].get(state_node, 1.0)
            
            print(f"DEBUG (Setpoint Check): For '{state_node}', change in window is {abs_change:.2f} (min: {window_series.min():.2f}, max: {window_series.max():.2f}). Threshold is {threshold:.2f}.")

            if abs_change > threshold:
                potential_drivers.append({
                    'state_node': state_node,
                    'significance': abs_change,
                    'value_before': window_series.iloc[0], # Value at start of window
                    'value_after': window_series.iloc[-1], # Value at end of window
                })
                
        if not potential_drivers:
            print("DEBUG (Setpoint Check): No significant setpoint changes found.")
            return None

        # Choose the setpoint change with the largest absolute magnitude
        best_driver = max(potential_drivers, key=lambda x: x['significance'])
        
        state_node_changed = best_driver['state_node']

        # We care about the overall trend in the window, not just start/end points
        setpoint_change_trend = best_driver['value_after'] - best_driver['value_before']
        
        # 1. What physical effect does the control action have on the state whose setpoint changed?
        #    e.g., get_effect_direction('uQc', 'T') -> 'decrease'
        effect_direction = self.get_effect_direction(control_action_node, state_node_changed)

        # 2. Does the control action make physical sense as a response to the setpoint change?
        is_logical_action = False
        
        # <<< --- START OF THE FIX --- >>>
        if setpoint_change_trend < 0: # The goal is to DECREASE the state
            # Is the action's effect 'decrease' OR 'bidirectional'?
            # A bidirectional tool can be used for decreasing.
            if effect_direction in ['decrease', 'bidirectional']:
                is_logical_action = True
        
        elif setpoint_change_trend > 0: # The goal is to INCREASE the state
             # Is the action's effect 'increase' OR 'bidirectional'?
            if effect_direction in ['increase', 'bidirectional']:
                is_logical_action = True
        
        if is_logical_action:
            print(f"SUCCESS (Setpoint Check - V8): Found LOGICAL driver: '{state_node_changed}'.")
            
            state_node_changed = best_driver['state_node']
            confidence = 8.5 + (best_driver['significance'] / 5.0)

            # --- NEW: Hypothesis Confirmation Module ---
            hypothesis_confirmation = ""
            query_lower = original_query.lower()

            # Check if the user's hypothesis matches what we found.
            # This is the "disease prevention" hypothesis.
            if (state_node_changed == 'H' and 
                best_driver['value_after'] < best_driver['value_before'] and 
                (timestamp.hour >= 17 or timestamp.hour < 4) and 
                ('disease' in query_lower or 'botrytis' in query_lower or 'overnight' in query_lower)):
                
                hypothesis_confirmation = ("This action directly confirms your hypothesis. Lowering humidity after sunset is a critical, "
                                        "pre-programmed strategy to prevent overnight condensation on the plants and mitigate the risk of fungal diseases like botrytis.")
            evidence = {
            'The Command': f"The reference for {self.node_to_name.get(state_node_changed)} was changed significantly from ~{best_driver['value_before']:.1f} to ~{best_driver['value_after']:.1f} just before this action."
            }
            
            # If we generated a confirmation, add it to the evidence.
            if hypothesis_confirmation:
                evidence['User Hypothesis Confirmation'] = hypothesis_confirmation
            
            return {
                'confidence': min(confidence, 9.8),
                'primary_reason': f"Yes, the action was a direct response to a command to change the {self.node_to_name.get(state_node_changed)} setpoint.",
                'causal_driver_type': "Setpoint Tracking (Direct Command)",
                'supporting_evidence': evidence
            }
        else:
            print(f"DEBUG (Setpoint Check - V8): Found a setpoint change for '{state_node_changed}', but the control action '{control_action_node}' is NOT a logical response. Ignoring this hypothesis.")
            return None


    def _find_preceding_causal_event(self, control_action_node: str, timestamp: datetime) -> Dict[str, Any] | None:
        """
        Uses PCMCI to find the most likely preceding causal event.
        It looks back at different time lags for different potential causes.
        """
        print(f"DEBUG (Causal Search): Looking for preceding causal events for '{control_action_node}'.")
        if not self.pcmci_results:
            return None

        potential_causes = {}

        # 1. Find all variables that PCMCI identifies as causes for this control action
        control_pcmci_node = self.get_pcmci_node_for_control(control_action_node)
        if not control_pcmci_node: return None
            
        links = self.get_pcmci_incoming_links(control_pcmci_node) # Needs a small helper, see below

        # 2. For each potential cause, check its behavior at the correct lagged time
        for cause_node, info in links.items():
            cause_name = self.node_to_name.get(cause_node, cause_node)
            cause_col = self.node_to_column.get(cause_node)
            lag_minutes = info['lag_minutes']
            strength = info['strength']

            if not cause_col: continue

            # Calculate the time in the past to look for the cause
            causal_timestamp = timestamp - pd.Timedelta(minutes=lag_minutes)
            
            # Check the trend of the cause around its own causal time
            # We look at the change in the cause leading up to the causal_timestamp
            window_slice = slice(causal_timestamp - pd.Timedelta(minutes=15), causal_timestamp)
            cause_series = self.data.loc[window_slice, cause_col]
            
            if len(cause_series) > 1:
                cause_change = cause_series.iloc[-1] - cause_series.iloc[0]
                
                # If there was a significant change in the cause at the right time...
                # (Using a simple absolute threshold for now)
                if abs(cause_change) > self.config.get('causal_event_threshold', 0.1):
                    trend = "increased" if cause_change > 0 else "decreased"
                    # We've found a plausible causal event!
                    potential_causes[cause_node] = {
                        'name': cause_name,
                        'trend': trend,
                        'change': cause_change,
                        'lag_minutes': lag_minutes,
                        'strength': strength
                    }

        # 3. Choose the best explanation from the potential causes
        if not potential_causes:
            return None

        # Heuristic: The cause with the strongest causal link (highest abs strength) is the most likely primary driver.
        best_cause_node = max(potential_causes, key=lambda k: abs(potential_causes[k]['strength']))
        best_cause_info = potential_causes[best_cause_node]

        # --- SCORING LOGIC ---
        # The confidence is based on the absolute strength of the PCMCI causal link.
        # The `val_matrix` in PCMCI gives a partial correlation coefficient, typically between -1 and 1.
        # We can map this strength to our 0-10 confidence scale.
        causal_strength = abs(best_cause_info['strength'])
        
        # A simple linear scaling: strength of 0.1 -> score 1, strength of 0.5 -> score 5.
        # This is a good fallback explanation, so its scores are in the lower-medium range.
        confidence_score = 1.0 + (causal_strength * 4.0) # Maps strength 0-1 to a score of 1-5
        confidence_score = min(confidence_score, 3.5) # Hard cap at 3.5

        return {
            'confidence': confidence_score,
            'primary_reason': f"The action was likely a response to a preceding event: the {best_cause_info['name']} had {best_cause_info['trend']}.",
            'causal_driver_type': "Causal Event Response",
            'supporting_evidence': {
                'Causal Link': f"Historical data shows that a change in **{best_cause_info['name']}** typically affects this control action about **{best_cause_info['lag_minutes']} minutes later** (Causal Strength: {best_cause_info['strength']:.2f}).",
                'Observed Cause': f"At the causally relevant time (around {timestamp - pd.Timedelta(minutes=best_cause_info['lag_minutes']):%H:%M}), the {best_cause_info['name']} was observed to {best_cause_info['trend']}."
            }
        }

    def get_pcmci_incoming_links(self, target_node: str) -> Dict:
        """(NEW HELPER) Gets all significant incoming causal links for a target node from PCMCI."""
        links = {}
        if not self.pcmci_results: return links
        
        target_idx = self.pcmci_var_to_idx.get(target_node)
        if target_idx is None: return links
        
        var_names = self.pcmci_results['var_names']
        graph = self.pcmci_results['graph']
        val_matrix = self.pcmci_results['val_matrix']
        
        for lag in range(1, len(graph)):
            for source_idx, source_node in enumerate(var_names):
                if '-->' in str(graph[lag][target_idx, source_idx]):
                    strength = val_matrix[lag][target_idx, source_idx]
                    # Store the link with the strongest strength if multiple lags exist
                    if source_node not in links or abs(strength) > abs(links[source_node]['strength']):
                        links[source_node] = {
                            'lag_minutes': lag * 5,
                            'strength': strength
                        }
        return links


    def _get_deeper_context_for_action(self, control_action_node: str, timestamp: datetime) -> Dict[str, str]:
        """
        (FINAL, EXPERT VERSION)
        Analyzes the broader situation for any control action to provide the deepest possible context,
        considering environmental conditions, economic trade-offs, and alternative actions.
        """
        deeper_context = {}
        nearest_timestamp = self.find_nearest_timestamp(timestamp)
        if nearest_timestamp is None:
            return {}
        data_row = self.data.loc[nearest_timestamp]

        # --- Get all relevant state and disturbance values at once ---
        t_in = data_row.get(self.node_to_column.get('T'))
        h_in = data_row.get(self.node_to_column.get('H'))
        c_in = data_row.get(self.node_to_column.get('C'))
        
        t_out = data_row.get(self.node_to_column.get('Tout'))
        h_out = data_row.get(self.node_to_column.get('Hout'))
        q_rad = data_row.get(self.node_to_column.get('Qrad'))

        # --- 1. ANALYSIS FOR HEATING (uQh) ---
        if control_action_node == 'uQh':
            conditions = []
            if pd.notna(t_in) and pd.notna(t_out) and t_out < t_in:
                conditions.append(f"the cold outside air ({t_out:.1f}°C)")
            if pd.notna(q_rad) and q_rad < 50: # Low solar radiation
                conditions.append("a lack of solar heating from the sun")
            if conditions:
                deeper_context['Environmental Pressure'] = f"The heating was necessary to counteract heat loss caused by {' and '.join(conditions)}."

        # --- 2. ANALYSIS FOR COOLING (uQc) ---
        elif control_action_node == 'uQc':
            conditions = []
            if pd.notna(q_rad) and q_rad > self.reasoning_thresholds.get('Qrad_high', 400):
                conditions.append(f"intense solar radiation ({q_rad:.1f} W/m²)")
            if pd.notna(t_in) and pd.notna(t_out) and t_out > t_in:
                conditions.append(f"high outside temperatures ({t_out:.1f}°C)")
            if conditions:
                deeper_context['Environmental Pressure'] = f"The intense cooling was necessary to fight a heavy heat load caused by {' and '.join(conditions)}."
                
            # Economic Trade-off: Why not just use cheap ventilation?
            is_vent_bad_for_cooling = pd.notna(t_in) and pd.notna(t_out) and t_out >= t_in
            is_vent_bad_for_dehumid = pd.notna(h_in) and pd.notna(h_out) and h_out >= h_in
            
            if is_vent_bad_for_cooling and is_vent_bad_for_dehumid:
                deeper_context['Alternative Action Avoided'] = "Costly active cooling was required because ventilation would have been ineffective, as the outside air was both too warm and too humid."
            elif is_vent_bad_for_cooling:
                deeper_context['Alternative Action Avoided'] = "Costly active cooling was required because ventilation would have made the temperature problem worse."

        # --- 3. ANALYSIS FOR VENTILATION (uV) ---
        elif control_action_node == 'uV':
            deeper_context['Economic Choice'] = "Ventilation is the most energy-efficient (lowest cost) method for climate control."
            
            conditions = []
            # Was it used for cooling?
            if pd.notna(t_in) and pd.notna(t_out) and t_out < t_in:
                conditions.append("cooling (since the outside air was cooler)")
            # Was it used for dehumidification?
            if pd.notna(h_in) and pd.notna(h_out) and h_out < h_in:
                conditions.append("dehumidification (since the outside air was drier)")
            if conditions:
                deeper_context['Favorable Conditions'] = f"The outside conditions were favorable for using ventilation for {' and '.join(conditions)}."

        # --- 4. ANALYSIS FOR CO2 INJECTION (uC) ---
        elif control_action_node == 'uC':
            is_ventilation_active = data_row.get(self.node_to_column.get('uV'), 0) > 0.01 # Check if vents are open
            
            if is_ventilation_active:
                deeper_context['Economic Trade-off'] = "The system injected CO₂ even while the vents were open, indicating that the need for CO₂ for plant growth was considered more important than the cost of losing some of it to the outside."
            else:
                deeper_context['Economic Choice'] = "CO₂ was injected while the vents were closed to maximize its efficiency and minimize waste."

        # --- 5. UNIVERSAL CHECK: LONG-STANDING ISSUE (Lagrangians) ---
        # This check applies to all actions.
        states_affected = self.get_states_typically_affected_by_control(control_action_node)
        lookback_window = slice(timestamp - pd.Timedelta(hours=6), timestamp)
        
        for state in states_affected:
            lg_node = self.lagrangian_nodes.get(state, {}).get('ieq')
            if not lg_node: continue
            lg_col = self.node_to_column.get(lg_node) # Corrected this line
            if lg_col and lg_col in self.data.columns:
                try:
                    series_slice = self.data.loc[lookback_window, lg_col]
                    if not series_slice.empty:
                        avg_lg = series_slice.mean()
                        thresholds_dict = self.config.get('lagrangian_active_threshold', {})
                        specific_threshold = thresholds_dict.get(lg_node, thresholds_dict.get('default', 1e-7))
                        if pd.notna(avg_lg) and abs(avg_lg) > specific_threshold:
                            state_name = self.node_to_name.get(state, state)
                            limit_type = "upper" if avg_lg > 0 else "lower"
                            if 'Long-standing Issue' not in deeper_context:
                                deeper_context['Long-standing Issue'] = f"This action appears to address a persistent issue, as the **{state_name}** had been consistently pushing against its **{limit_type} limit** for several hours."
                except Exception as e:
                    print(f"WARN - Error analyzing long-standing issue for {state}: {e}")

        return deeper_context

    def _check_for_active_STATE_constraint_driver(self, control_action_node: str, timestamp: datetime) -> Dict[str, Any] | None:
        """
        (DEFINITIVE, SIMPLIFIED VERSION)
        This final version relies on the corrected system knowledge and uses a clean,
        standard logic path to correctly identify all state constraint violations.
        """
        print("DEBUG (State Constraint Check): Starting...")
        states_affected = self.get_states_typically_affected_by_control(control_action_node)
        active_state_constraints = []

        start_window = timestamp - pd.Timedelta(minutes=5)
        end_window = timestamp + pd.Timedelta(minutes=5)

        for state_node in states_affected:
            lg_node = self.lagrangian_nodes.get(state_node, {}).get('ieq')
            if not lg_node: continue
            
            lg_col = self.node_to_column.get(lg_node)
            if not (lg_col and lg_col in self.data.columns): continue
                
            try:
                window_slice = self.data.loc[start_window:end_window, lg_col]
                if window_slice.empty: continue
                peak_lg_val = window_slice.iloc[window_slice.abs().argmax()]
            except (TypeError, ValueError, KeyError):
                continue
                
            if pd.isna(peak_lg_val): continue

            threshold = self.config['lagrangian_active_threshold'].get(lg_node, 1e-7)
            
            print(f"DEBUG (State Constraint Check): Checking {lg_node} for {state_node}. Peak Value in window: {peak_lg_val:.3E}, Threshold: {threshold:.3E}")

            # Get state value at this time for proper Lagrangian interpretation
            state_col = self.node_to_column.get(state_node)
            if state_col and state_col in self.data.columns:
                try:
                    state_value = self.data.loc[timestamp, state_col]
                    # Round to 2 decimal places for Lagrangian calculation
                    state_value = round(state_value, 2)
                    
                    # Use the CORRECTED Lagrangian interpretation for HARD constraints
                    hard_interp = self.interpret_hard_constraint_lagrangian(state_node, state_value, peak_lg_val)
                    
                    print(f"DEBUG (Lagrangian Interpretation): {state_node} = {state_value:.2f}, λ = {peak_lg_val:.3E}")
                    print(f"  → Status: {hard_interp['status']}, Confidence: {hard_interp['confidence']}")
                    print(f"  → {hard_interp['interpretation']}")
                    
                    if hard_interp['is_active']:
                        active_state_constraints.append({
                            'node': state_node, 
                            'lg_val': peak_lg_val,
                            'state_value': state_value,
                            'hard_interpretation': hard_interp
                        })
                except (KeyError, ValueError):
                    # Fallback to old logic if state value not available
                    if abs(peak_lg_val) > threshold:
                        active_state_constraints.append({'node': state_node, 'lg_val': peak_lg_val})
            else:
                # Fallback to old logic if state column not found
                if abs(peak_lg_val) > threshold:
                    active_state_constraints.append({'node': state_node, 'lg_val': peak_lg_val})

        if not active_state_constraints:
            print("DEBUG (State Constraint Check): No active HARD constraints found in window.")
            
            # ===== NEW: CHECK SOFT CONSTRAINTS =====
            print("DEBUG (Soft Constraint Check): Checking soft constraints since no hard constraints active...")
            
            # Check soft constraints for all affected states
            soft_violations = []
            for state_node in states_affected:
                state_col = self.node_to_column.get(state_node)
                if not (state_col and state_col in self.data.columns):
                    continue
                    
                try:
                    state_value = self.data.loc[timestamp, state_col]
                    if pd.isna(state_value):
                        continue
                    
                    # Round to 2 decimal places for soft constraint penalty calculation
                    state_value = round(state_value, 2)
                    
                    # Calculate soft constraint penalty
                    penalty_result = self.calculate_soft_constraint_penalty(state_node, state_value)
                    
                    print(f"DEBUG (Soft Constraint): {state_node} = {state_value:.2f}")
                    print(f"  Penalty: {penalty_result['penalty_value']:.6f} (ratio: {penalty_result['penalty_ratio']*100:.1f}%)")
                    print(f"  Status: {penalty_result['status']}, Active: {penalty_result['is_penalty_active']}")
                    
                    if penalty_result['is_penalty_active']:
                        soft_violations.append({
                            'node': state_node,
                            'state_value': state_value,
                            'penalty_result': penalty_result
                        })
                        
                except (KeyError, ValueError) as e:
                    print(f"DEBUG (Soft Constraint): Error checking {state_node}: {e}")
                    continue
            
            # If we found soft constraint violations, return the most severe one
            if soft_violations:
                # Sort by penalty ratio (higher = more severe)
                best_violation = max(soft_violations, key=lambda x: x['penalty_result']['penalty_ratio'])
                
                state_node = best_violation['node']
                penalty_info = best_violation['penalty_result']
                var_name = self.node_to_name.get(state_node, state_node)
                
                # Check if this control action makes sense for this violation
                violation_type = penalty_info['violation_type']
                direction = 'decrease' if violation_type == 'upper' else 'increase'
                corrective_actions = self.kg.get_corrective_actions_for_state(state_node, direction)
                
                if control_action_node not in corrective_actions:
                    print(f"DEBUG (Soft Constraint): Violation found but action '{control_action_node}' not logical for {state_node} {direction}")
                    return None
                
                print(f"DEBUG (Soft Constraint): WINNER - {state_node} soft constraint violation (severity: {penalty_info['severity']})")
                
                # Score based on penalty severity
                severity_scores = {'severe': 7.5, 'moderate': 6.5, 'mild': 5.5}
                confidence_score = severity_scores.get(penalty_info['severity'], 5.0)
                
                return {
                    'confidence': confidence_score,
                    'primary_reason': f"The controller acted to prevent {var_name} from exceeding its SOFT constraint bounds, minimizing cost penalties.",
                    'causal_driver_type': "Soft Constraint Management (Cost Minimization)",
                    'supporting_evidence': {
                        'Soft_Constraint_Penalty': f"The soft constraint penalty for {var_name} was {penalty_info['penalty_value']:.6f}, which is {penalty_info['penalty_ratio']*100:.0f}% of the maximum penalty weight ({penalty_info['penalty_weight']:.2f}).",
                        'Soft_Constraint_Bounds': f"{var_name} SOFT constraints: [{penalty_info['soft_bounds']['min']}, {penalty_info['soft_bounds']['max']}]",
                        'State_Value': f"{var_name} was {penalty_info['state_value']:.2f}, violating the {penalty_info['violation_type']} soft bound",
                        'Interpretation': penalty_info['interpretation'],
                        'Severity': f"Violation severity: {penalty_info['severity']}"
                    }
                }
            
            print("DEBUG (Soft Constraint Check): No soft constraint violations found either.")
            return None

        # CRITICAL FIX: Filter constraints to only those where the control action makes logical sense
        # BEFORE selecting the "best" one based on magnitude
        logical_constraints = []
        for constraint in active_state_constraints:
            test_state = constraint['node']
            test_lg_val = constraint['lg_val']
            test_limit_type = 'UPPER' if test_lg_val > 0 else 'LOWER'
            test_direction = 'decrease' if test_limit_type == 'UPPER' else 'increase'
            test_corrective_actions = self.kg.get_corrective_actions_for_state(test_state, test_direction)
            
            if control_action_node in test_corrective_actions:
                logical_constraints.append(constraint)
                print(f"DEBUG (State Constraint): '{test_state}' constraint IS logical for action '{control_action_node}'")
            else:
                print(f"DEBUG (State Constraint): '{test_state}' constraint is NOT logical for action '{control_action_node}' (would need {test_corrective_actions})")
        
        if not logical_constraints:
            print("DEBUG (State Constraint Check): No logical constraints found for this control action.")
            return None
        
        # Now select the best among the LOGICAL constraints
        best_constraint = max(logical_constraints, key=lambda x: abs(x['lg_val']))
        
        state_node = best_constraint['node']
        lg_val = best_constraint['lg_val']
        limit_type = 'UPPER' if lg_val > 0 else 'LOWER'
        var_name = self.node_to_name.get(state_node, state_node)
        
        # Get hard constraint interpretation if available
        hard_interp = best_constraint.get('hard_interpretation')
        state_value = best_constraint.get('state_value')

        # --- SCORING LOGIC ---
        # The confidence is directly proportional to how hard the optimizer is pushing.
        # We use a log scale to handle the huge range of possible Lagrangian values.

        lg_magnitude = abs(best_constraint['lg_val'])
        lg_node = self.lagrangian_nodes.get(best_constraint['node'], {}).get('ieq')
        threshold = self.config['lagrangian_active_threshold'].get(lg_node, 1e-7)

        if lg_magnitude > threshold:
            # FIXED: State constraints should have HIGHER priority than setpoint tracking
            # Base score increased from 7.5 to 9.0 to ensure constraint avoidance wins
            orders_of_magnitude = np.log10(lg_magnitude / threshold)
            confidence_score = 9.0 + (0.5 * orders_of_magnitude)  
        else:
            confidence_score = 0

        confidence_score = min(confidence_score, 9.9)  # Increased max from 9.8 to 9.9
        direction = 'decrease' if limit_type == 'UPPER' else 'increase'
        corrective_actions = self.kg.get_corrective_actions_for_state(state_node, direction)
        print(f"DEBUG (State Constraint): WINNER - State '{state_node}', direction '{direction}', corrective actions: {corrective_actions}")

        # This check is now redundant but kept for safety
        if control_action_node in corrective_actions:
            print(f"DEBUG (State Constraint): Logical action confirmed. '{control_action_node}' is a valid action to {direction} {state_node}.")
            
            # Build evidence with CORRECTED hard constraint interpretation
            evidence = {
                'Lagrangian_Multiplier': f"The Lagrangian multiplier for {var_name} was {lg_val:.2E}, indicating the HARD constraint at {limit_type} bound was active."
            }
            
            # Add hard constraint interpretation if available
            if hard_interp and state_value is not None:
                hard_bounds = hard_interp.get('hard_bounds', {})
                evidence['Hard_Constraint_Bounds'] = f"{var_name} HARD constraints: [{hard_bounds.get('min')}, {hard_bounds.get('max')}]"
                evidence['State_Value'] = f"{var_name} was {state_value:.2f}, approaching the {limit_type} hard bound"
                evidence['Interpretation'] = hard_interp.get('interpretation', '')
                
                # Also check soft constraint for additional context
                soft_check = self.check_soft_constraint_violation(state_node, state_value)
                if soft_check and soft_check.get('penalty_active'):
                    soft_bounds = soft_check.get('soft_bounds', {})
                    evidence['Soft_Constraint_Note'] = f"Note: {var_name} was also outside soft bounds [{soft_bounds.get('min')}, {soft_bounds.get('max')}], but the Lagrangian reflects the HARD constraint only."
            
            return {
                'confidence': confidence_score,
                'primary_reason': f"The controller acted defensively to prevent {var_name} from violating its {limit_type} HARD constraint.",
                'causal_driver_type': "State Constraint Avoidance (Defensive Action)",
                'supporting_evidence': evidence
            }
        else:
            print(f"DEBUG (State Constraint): Found active {limit_type} limit for {var_name}, but action '{control_action_node}' is not a logical response. Ignoring.")
            return None

    
    
    def _check_for_setpoint_change_driver(self, control_action_node: str, timestamp: datetime):
        """(REVISED for more debugging)
        Checks if a significant change in a relevant state's setpoint was the driver.
        """
        states_affected = self.get_states_typically_affected_by_control(control_action_node)
        print(f"DEBUG (Setpoint Check): Action '{control_action_node}' affects states: {states_affected}")

        for state_node in states_affected:
            # The column for the reference value is what we need
            ref_col = self.node_to_column.get(f"{state_node}_ref")
            
            # --- ADDED DEBUGGING ---
            if not ref_col:
                print(f"DEBUG (Setpoint Check): No ref_col found for state '{state_node}' (tried '{state_node}_ref'). Skipping.")
                continue
            if ref_col not in self.data.columns:
                print(f"DEBUG (Setpoint Check): Column '{ref_col}' not in data. Skipping.")
                continue
            # --- END DEBUGGING ---
                
            # Get the value at the event and the one just before
            ref_series = self.data[ref_col].loc[:timestamp].tail(2)
            if len(ref_series) == 2:
                change = ref_series.diff().iloc[-1]
                threshold = self.config.get('setpoint_change_threshold', {}).get(state_node, 5.0)
                
                # --- MORE DEBUGGING ---
                print(f"DEBUG (Setpoint Check): For '{state_node}' (col: {ref_col}), change is {change:.2f}. Threshold is {threshold:.2f}.")
                
                if abs(change) > threshold:
                    print(f"DEBUG (Setpoint Check): SUCCESS! Found significant setpoint change for '{state_node}'.")
                    return {
                        'primary_reason': f"The controller acted to track a new, demanding setpoint for {self.node_to_name.get(state_node)}.",
                        'causal_driver_type': "Setpoint Tracking (Direct Command)",
                        'supporting_evidence': {
                            'Setpoint Change': f"The reference for {self.node_to_name.get(state_node)} changed from {ref_series.iloc[0]:.2f} to {ref_series.iloc[-1]:.2f}."
                        }
                    }
        return None

    def _check_for_active_CONTROL_constraint_driver(self, control_action_node: str, timestamp: datetime):
        """
        (REVISED with per-variable Lagrangian thresholds and validation)
        Checks if the control action itself is hitting its own limit.
        """
        lg_node = self.lagrangian_nodes.get(control_action_node, {}).get('ieq')
        if not lg_node: return None
        
        lg_col = self.node_to_column.get(lg_node)
        if not (lg_col and lg_col in self.data.columns):
            return None

        lg_val = self.data.loc[timestamp, lg_col]
        
        # --- THE FIX IS HERE ---
        # Get the specific threshold for this control's Lagrangian from the config dictionary
        thresholds_dict = self.config.get('lagrangian_active_threshold', {})
        specific_threshold = thresholds_dict.get(lg_node, thresholds_dict.get('default', 1e-7))

        if pd.isna(lg_val) or abs(lg_val) < specific_threshold:
            return None # Lagrangian is not active enough

        # Get the control action's change in value to validate the direction
        control_col = self.node_to_column.get(control_action_node)
        if not control_col: return None # Should not happen if we got this far
        
        action_series = self.data[control_col].loc[:timestamp].tail(2)
        action_change = action_series.diff().iloc[-1] if len(action_series) == 2 else 0

        limit_type = 'UPPER' if lg_val > 0 else 'LOWER'
        var_name = self.node_to_name.get(control_action_node, control_action_node)
        
        # --- VALIDATION LOGIC (This part is already correct) ---
        # If the lower limit is active, the action should not be a significant increase.
        if limit_type == 'LOWER' and action_change > self.control_thresholds.get('change_threshold', 0.001):
            print(f"DEBUG (Control Constraint): Ignoring active LOWER limit for '{var_name}' because the action was an INCREASE.")
            return None
            
        # If the upper limit is active, the action should not be a significant decrease.
        if limit_type == 'UPPER' and action_change < -self.control_thresholds.get('change_threshold', 0.001):
            print(f"DEBUG (Control Constraint): Ignoring active UPPER limit for '{var_name}' because the action was a DECREASE.")
            return None
                
        # If the logic passes, the constraint is a valid reason.
        return {
            'primary_reason': f"The control action '{var_name}' was operating at its {limit_type} limit.",
            'causal_driver_type': "Control Saturation",
            'supporting_evidence': {
                'Active Control Lagrangian': f"The multiplier for {var_name} ({lg_val:.2E}) was active."
            }
        }
    

    def analyze_day_strategy(self, date: datetime.date, mentioned_nodes: list) -> Dict[str, Any]:
        """
        (FINAL, COMPREHENSIVE VERSION 5.1) Performs a full daily analysis covering:
        1. Overall growth verdict (Biomass).
        2. Overall strategy based on environmental stability (Volatility).
        3. Specific stress risks (Lagrangians) and their management.
        4. Direct answers to specific variable and interaction questions from the user's query.
        """
        print(f"DEBUG (Daily Strategy V5.1 - Comprehensive): Analyzing for {date}, focusing on {mentioned_nodes}")
        
        day_data = self.data[self.data.index.date == date]
        if day_data.empty:
            return {'error': 'No data for this day.'}

        # --- MODULE 1: GROWTH & COST PERFORMANCE ---
        growth_verdict = "Growth could not be determined."
        bio_col = self.node_to_column.get('B')
        bio_change = 0.0
        if bio_col and bio_col in day_data.columns:
            # Use start of day vs end of day for a more robust measure of net gain
            bio_series = day_data[bio_col].dropna()
            if len(bio_series) > 1:
                start_biomass = bio_series.iloc[0]
                end_biomass = bio_series.iloc[-1]
                bio_change = end_biomass - start_biomass
                
                if bio_change > 0.15:
                    growth_verdict = f"Yes, it was a very good day for growth, with a net biomass gain of approximately {bio_change:.2f} units."
                elif bio_change > 0.05:
                    growth_verdict = f"It was a decent day for growth, with a modest biomass gain of approximately {bio_change:.2f} units."
                else:
                    growth_verdict = f"No, it was a poor day for growth, with a negligible biomass gain of {bio_change:.2f} units."

        # Cost-benefit analysis
        cost_analysis = {}
        heat_flux_col = self.node_to_column.get('Q_heat')
        cool_flux_col = self.node_to_column.get('Q_cool')
        time_step_seconds = 300.0
        joules_to_megajoules = 1_000_000.0
        total_heating_cost = 0.0
        total_cooling_cost = 0.0

        if heat_flux_col and heat_flux_col in day_data.columns:
            total_heating_joules = day_data[heat_flux_col][day_data[heat_flux_col] > 0].sum() * time_step_seconds
            total_heating_cost = total_heating_joules / joules_to_megajoules
            
        if cool_flux_col and cool_flux_col in day_data.columns:
            total_cooling_joules = day_data[cool_flux_col].abs().sum() * time_step_seconds
            total_cooling_cost = total_cooling_joules / joules_to_megajoules

        total_energy_cost = total_heating_cost + total_cooling_cost
        cost_analysis['total_energy_cost_MJ'] = total_energy_cost
        
        if bio_change > 0 and total_energy_cost > 0:
            growth_per_cost_ratio = bio_change / total_energy_cost
            cost_verdict = f"The day had a cost-effectiveness ratio of {growth_per_cost_ratio:.2f} biomass units per MegaJoule of energy."
        else:
            cost_verdict = "Cost-effectiveness could not be determined."
        cost_analysis['cost_verdict'] = cost_verdict

        # --- MODULE 2: OVERALL STRATEGY (Volatility-Based) ---
        strategy_explanation = "The controller's overall strategy could not be determined."
        iout_col = self.node_to_column.get('Qrad')
        if iout_col and iout_col in day_data.columns:
            daytime_data = day_data.between_time('06:00', '18:00')
            if not daytime_data.empty:
                iout_series = daytime_data[iout_col]
                iout_volatility_cv = iout_series.std() / iout_series.mean() if iout_series.mean() > 1 else 0
                is_volatile_day = iout_volatility_cv > 0.4
                
                if is_volatile_day:
                    strategy_explanation = "The controller detected highly challenging and inconsistent sunlight. Its strategy shifted to a sophisticated **dynamic tracking** mode to maximize growth under difficult conditions."
                else:
                    is_sunny_day = day_data[iout_col].mean() > 150
                    if is_sunny_day:
                        strategy_explanation = "The controller identified excellent and stable growing conditions due to high solar radiation. Its primary strategy was **profit maximization**."
                    else:
                        strategy_explanation = "The controller correctly identified a consistently overcast day. Its primary strategy was **cost minimization**."

        # --- MODULE 3: GENERALIZED STRESS ANALYSIS ---
        all_stress_analyses = []
        for stress_name, config in self.stress_signatures.items():
            state_node = config['state_node']
            lg_col = self.node_to_column.get(config['lagrangian_node'])
            time_start, time_end = config['time_window']
            
            # Handle overnight windows
            if time_start > time_end:
                part1 = day_data.between_time(time_start, '23:59')
                # Assuming the data is for a single day, we can't look at the next day
                relevant_data = part1
            else:
                relevant_data = day_data.between_time(time_start, time_end)
            
            if not (relevant_data.empty or not lg_col or lg_col not in relevant_data.columns):
                lagrangian_series = relevant_data[lg_col]
                risk_detected = False
                
                if config['limit_type'] == 'UPPER' and not lagrangian_series.empty:
                    peak_lg_val = lagrangian_series.max()
                    if peak_lg_val > self.config['lagrangian_active_threshold'].get(config['lagrangian_node'], 1e-9):
                        risk_detected = True
                elif config['limit_type'] == 'LOWER' and not lagrangian_series.empty:
                    peak_lg_val = lagrangian_series.min()
                    if peak_lg_val < -self.config['lagrangian_active_threshold'].get(config['lagrangian_node'], 1e-9):
                        risk_detected = True

                if risk_detected:
                    stress_evidence = (
                        f"Yes, there was a significant risk of **{stress_name}**. The Lagrangian (`{config['lagrangian_node']}`) "
                        f"was active, showing the controller was fighting to keep the {self.node_to_name.get(state_node)} from violating its {config['limit_type'].lower()} limit."
                    )
                    
                    # Check if the risk was successfully managed
                    ref_col = self.node_to_column.get(f"{state_node}_ref")
                    hard_limit_key = 'max' if config['limit_type'] == 'UPPER' else 'min'
                    hard_limit = self.constraint_limits.get(state_node, {}).get(hard_limit_key)
                    
                    risk_managed = "partially"
                    if ref_col in relevant_data.columns and hard_limit is not None:
                        if config['limit_type'] == 'UPPER' and relevant_data[ref_col].max() < hard_limit:
                            risk_managed = "successfully"
                        elif config['limit_type'] == 'LOWER' and relevant_data[ref_col].min() > hard_limit:
                            risk_managed = "successfully"
                    
                    all_stress_analyses.append({
                        'risk_detected': True,
                        'risk_managed': risk_managed,
                        'stress_type': stress_name,
                        'evidence': stress_evidence,
                        'mitigation_system': config['primary_mitigation_system']
                    })

        # --- MODULE 4: SPECIFIC QUESTION ANALYSIS ---
        specific_analysis = {}
        
        # This block specifically checks if the user asked about Temperature ('T').
        if 'T' in mentioned_nodes:
            temp_col = self.node_to_column.get('T')
            if temp_col and temp_col in day_data.columns:
                daytime_data_temp = day_data.between_time('08:00', '18:00')
                if not daytime_data_temp.empty:
                    max_temp_day = daytime_data_temp[temp_col].max()
                    optimal_range = self.constraint_limits.get('T', {}).get('optimal_range', [18, 26])
                    
                    if max_temp_day > (optimal_range[1] + 1.0):
                        temp_verdict = (f"Regarding your question, the temperature peaked at {max_temp_day:.1f}°C. This is slightly "
                                        "above the optimal range for photosynthesis, but was well-managed by the cooling system and did not harm growth.")
                    else:
                        temp_verdict = (f"Regarding your question, the temperature peaked at {max_temp_day:.1f}°C, which is perfectly "
                                        "within the optimal range for photosynthesis and was not a limiting factor.")
                    specific_analysis['Temperature and Photosynthesis'] = temp_verdict

        # This new block checks for the interaction between CO2 and Temperature.
        if 'C' in mentioned_nodes and 'T' in mentioned_nodes:
            temp_col = self.node_to_column.get('T')
            phot_col = self.node_to_column.get('C_phot')
            if temp_col and phot_col and all(c in day_data.columns for c in [temp_col, phot_col]):
                daytime_data_int = day_data.between_time('08:00', '18:00')
                if not daytime_data_int.empty:
                    peak_photosynthesis_time = daytime_data_int[phot_col].idxmin()
                    temp_at_peak_phot = daytime_data_int.loc[peak_photosynthesis_time, temp_col]
                    optimal_range = self.constraint_limits.get('T', {}).get('optimal_range', [18, 26])

                    if optimal_range[0] <= temp_at_peak_phot <= optimal_range[1]:
                        interaction_verdict = (f"Yes, the plants were definitely benefiting. The analysis shows that peak photosynthesis occurred when the temperature "
                                            f"was {temp_at_peak_phot:.1f}°C, perfectly within the optimal range. This indicates the high temperatures "
                                            "were not hindering, but were in fact helping to maximize, the benefit of the available CO₂.")
                    else:
                        interaction_verdict = (f"The benefit was likely reduced. Peak photosynthesis occurred at {temp_at_peak_phot:.1f}°C, "
                                            "which is outside the optimal range, suggesting temperature became a limiting factor.")
                    specific_analysis['CO2_and_Temperature_Interaction'] = interaction_verdict
        humidity_strategy = self.analyze_humidity_strategy(date)
        # --- FINAL RETURN DICTIONARY ---
        return {
            'growth_verdict': growth_verdict,
            'cost_analysis': cost_analysis,
            'strategy_explanation': strategy_explanation,
            'all_stress_analyses': all_stress_analyses,
            'humidity_strategy': humidity_strategy,
            'specific_analysis': specific_analysis
        }

    def format_day_strategy_for_llm(self, analysis_result: Dict[str, Any], original_query: str) -> str:
        """
        (FINAL, SMART FORMATTER V6.0 - Trade-off Aware)
        Intelligently assembles context, prioritizing a direct answer to the user's
        specific concern about the growth vs. disease risk trade-off.
        """
        if 'error' in analysis_result:
            return f"Error analyzing day strategy: {analysis_result['error']}"
        
        context = ["--- Daily Strategy & Performance Analysis ---"]
        
        # --- PRIORITY 1: Check if the query is specifically about the humidity/disease trade-off ---
        is_humidity_tradeoff_query = ('humidity' in original_query.lower() or 'humid' in original_query.lower()) and \
                                    ('disease' in original_query.lower() or 'botrytis' in original_query.lower() or 'risk' in original_query.lower())
        
        if is_humidity_tradeoff_query:
            context.append("\n**User Concern:** The user is worried about high daytime humidity and the risk of botrytis.")
            
            # Explain the growth rationale
            growth_verdict = analysis_result.get('growth_verdict', "No growth data available.")
            context.append(f"\n**Growth Rationale:** The high daytime humidity is a deliberate growth-maximization strategy. {growth_verdict} This is because high humidity allows plants to take in CO₂ for photosynthesis more efficiently.")
            
            # Explain the mitigation strategy using the day/night analysis
            humidity_strategy = analysis_result.get('humidity_strategy', {})
            day_strat = humidity_strategy.get('day_strategy', {})
            night_strat = humidity_strategy.get('night_strategy', {})
            
            mitigation_parts = []
            if day_strat:
                mitigation_parts.append(f"1. **Daytime Management:** During the day, the controller uses the active cooling system as a powerful dehumidifier (avg flux: {day_strat.get('avg_cooling_dehumidification', 0):.4f}) to prevent humidity from becoming excessive.")
            if night_strat:
                mitigation_parts.append(f"2. **Nighttime Prevention:** As the sun sets, the controller switches to a disease prevention mode, forcefully lowering the humidity setpoint to ~{night_strat.get('humidity_setpoint', 0):.0f}%. This is critical for preventing condensation on the plants overnight.")
                
            if mitigation_parts:
                context.append("\n**Risk Mitigation Strategy:** However, the controller actively manages this risk with a two-part strategy:")
                context.extend([f"- {part}" for part in mitigation_parts])
                
            return "\n".join(context)

        # --- PRIORITY 1a: Provide the overall growth verdict. ---
        # This always comes first as it's the primary measure of success.
        growth_verdict = analysis_result.get('growth_verdict')
        if growth_verdict:
            context.append(f"\n**Overall Growth Performance:** {growth_verdict}")

        # <<< --- START OF NEW/MODIFIED LOGIC --- >>>

        # --- PRIORITY 2: Provide the cost-benefit analysis if present. ---
        # This directly answers the "...compared to the energy costs?" part of the query.
        cost_analysis = analysis_result.get('cost_analysis')
        if cost_analysis:
            context.append("\n**Cost-Benefit Analysis:**")
            cost_verdict = cost_analysis.get('cost_verdict')
            if cost_verdict:
                context.append(f"- {cost_verdict}")
        
        # --- PRIORITY 3: Answer any OTHER specific questions the user asked. ---
        # This is now a fallback if the query wasn't about cost.
        specifics = analysis_result.get('specific_analysis', {})
        if specifics:
            # Check if the query was about cost. If so, this is supplementary. Otherwise, it's the main point.
            is_cost_query = 'cost' in original_query.lower() or 'energy' in original_query.lower()
            title = "Additional Specific Analysis:" if is_cost_query else "Direct Answer to Your Question:"
            
            context.append(f"\n**{title}**")
            for key, value in specifics.items():
                context.append(f"- {value}")

        # <<< --- END OF NEW/MODIFIED LOGIC --- >>>

        # --- PRIORITY 4: Add overall strategy context if no specific question was answered. ---
        if not specifics and not cost_analysis:
            strategy_explanation = analysis_result.get('strategy_explanation')
            if strategy_explanation:
                context.append(f"\n**Controller's Overall Strategy:** {strategy_explanation}")

        # --- PRIORITY 5: Add relevant, non-redundant stress analysis. ---
        all_stresses = analysis_result.get('all_stress_analyses', [])
        already_covered_stresses = []
        if 'Temperature and Photosynthesis' in specifics:
            already_covered_stresses.append('Heat Stress')
        
        relevant_stresses = [s for s in all_stresses if s.get('stress_type') not in already_covered_stresses]

        if relevant_stresses:
            context.append("\n**Additional Stress Analysis:**")
            for stress in relevant_stresses:
                context.append(f"- **{stress['stress_type']}:** {stress.get('evidence', 'No details available.')}")
                
        return "\n".join(context)


    def _check_for_opportunistic_driver(self, control_action_node: str, timestamp: datetime) -> Dict[str, Any] | None:
        """
        (REVISED FOR PROFIT-DRIVEN ANALYSIS - V4 with tolerant conditions)
        Checks for profit-driven actions by combining optimizer intent with more tolerant
        environmental context, suitable for early-morning predictive actions.
        """
        print("DEBUG (Opportunistic Check): Looking for profit-maximization signals...")

        if control_action_node not in ['uC', 'uQh']:
            return None

        try:
            data_row = self.data.loc[self.find_nearest_timestamp(timestamp)]
        except Exception:
            return None

        # --- STEP 1: Check for Optimizer's Intent (The "Why") ---
        biomass_lg_val = data_row.get(self.node_to_column.get('B_ieq'))
        if pd.isna(biomass_lg_val) or biomass_lg_val > -1e-9:
            return None

        # --- STEP 2: Check for Favorable Environmental Conditions (The "When") ---
        qrad_value = data_row.get(self.node_to_column.get('Qrad'))
        t_value = data_row.get(self.node_to_column.get('T'))
        
        if pd.isna(qrad_value) or pd.isna(t_value):
            return None

        # <<< MODIFICATION START: More tolerant thresholds for early morning >>>
        # The controller acts predictively. Any sign of usable light is an opportunity.
        qrad_threshold = 25.0  # A low, fixed threshold to detect the start of the day.
        is_light_sufficient = qrad_value > qrad_threshold
        
        t_optimal_range = self.constraint_limits.get('T', {}).get('optimal_range', [18, 26])
        # Allow temperature to be slightly below the optimal range, as it's still warming up.
        temp_tolerance = 1.0 
        is_temp_optimal = (t_optimal_range[0] - temp_tolerance) <= t_value <= (t_optimal_range[1] + temp_tolerance)
        # <<< MODIFICATION END >>>
        
        # Both conditions must still be met.
        if not (is_light_sufficient and is_temp_optimal):
            print(f"DEBUG (Opportunistic Check): Conditions not met. Light OK? {is_light_sufficient} (val:{qrad_value}), Temp OK? {is_temp_optimal} (val:{t_value})")
            return None

        # --- STEP 3: Calculate Confidence Score ---
        # If we get here, the conditions are met. This is a strong hypothesis.
        confidence_score = 7.5
        optimizer_bonus = min(2.0, abs(biomass_lg_val / 5e-9))
        confidence_score += optimizer_bonus
        confidence_score = min(confidence_score, 9.6)

        print(f"DEBUG (Opportunistic Expert): SUCCESS! Score={confidence_score:.2f} (Optimizer Bonus: {optimizer_bonus:.2f})")
        return {
            'confidence': confidence_score,
            'primary_reason': "The action was part of a profit-maximization strategy, driven by the high economic value placed on increasing crop biomass.",
            'causal_driver_type': "Opportunistic Action (Profit Maximization)",
            'supporting_evidence': {
                'Optimizer\'s Objective': f"The biomass Lagrangian multiplier ({biomass_lg_val:.2E}) was strongly negative, providing direct proof that the controller's main goal was to increase the final yield.",
                'Favorable Conditions': f"This strategy was viable because the controller identified the start of excellent growing conditions, with usable solar radiation ({qrad_value:.1f} W/m²) and a near-optimal temperature ({t_value:.1f}°C)."
            }
        }


    def _analyze_observed_effects(self, control_action_node: str, timestamp: datetime) -> Dict[str, Any]:
        """
        (DEFINITIVE - V17)
        Analyzes observed effects by performing a linear regression (polyfit) on a SMOOTHED
        version of the data, using CLEAN, NON-OVERLAPPING windows. This is the definitive
        and most robust method, with the critical copy-paste bug fixed.
        """
        print(f"DEBUG - _analyze_observed_effects (V17): Analyzing effects for '{control_action_node}' after {timestamp}.")
        observed_effects = {'summary': "No significant effects could be determined.", 'influenced_variables': {}}
        
        states_to_observe = self.get_states_typically_affected_by_control(control_action_node)
        if not states_to_observe: return observed_effects

        for state_node in states_to_observe:
            state_col = self.node_to_column.get(state_node)
            if not state_col or state_col not in self.data.columns: continue

            try:
                # --- Define Clean, Non-Overlapping Windows ---
                window_duration_minutes = 30
                
                baseline_end_time = timestamp - timedelta(minutes=5) # Ends one step BEFORE the event
                baseline_start_time = baseline_end_time - timedelta(minutes=window_duration_minutes)
                
                observation_start_time = timestamp # Starts AT the event
                observation_end_time = observation_start_time + timedelta(minutes=window_duration_minutes)

                slice_before = self.data.loc[baseline_start_time:baseline_end_time, state_col].dropna()
                slice_after = self.data.loc[observation_start_time:observation_end_time, state_col].dropna()

                if len(slice_before) < 4 or len(slice_after) < 4:
                    print(f"WARN (V17 Effects): Not enough data for {state_node} in clean windows.")
                    continue
                    
                # Smooth the data to remove high-frequency noise.
                smoothed_slice_before = slice_before.rolling(window=3, min_periods=2, center=True).mean().dropna()
                smoothed_slice_after = slice_after.rolling(window=3, min_periods=2, center=True).mean().dropna()
                
                if len(smoothed_slice_before) < 2 or len(smoothed_slice_after) < 2:
                    print(f"WARN (V17 Effects): Not enough data for {state_node} after smoothing.")
                    continue
                
                # --- Perform a linear regression (polyfit) on the CLEANED, SMOOTHED data. ---
                time_numeric_before = (smoothed_slice_before.index - smoothed_slice_before.index[0]).total_seconds()
                slope_before_per_sec, _ = np.polyfit(time_numeric_before, smoothed_slice_before, 1)
                slope_before_per_hour = slope_before_per_sec * 3600

                time_numeric_after = (smoothed_slice_after.index - smoothed_slice_after.index[0]).total_seconds()
                
                # <<< THE DEFINITIVE FIX IS HERE >>>
                # Use the correct 'smoothed_slice_after' data for the 'after' calculation.
                slope_after_per_sec, _ = np.polyfit(time_numeric_after, smoothed_slice_after, 1)
                # <<< END OF FIX >>>
                
                slope_after_per_hour = slope_after_per_sec * 3600

                trend_summary = f"successfully altered the trend. The rate of change improved significantly from **{slope_before_per_hour:+.2f} units/hour** to **{slope_after_per_hour:+.2f} units/hour**"

                if np.sign(slope_before_per_hour) != np.sign(slope_after_per_hour) and abs(slope_before_per_hour) > 0.1:
                    trend_summary += ", successfully reversing the previous trend."
                elif abs(slope_after_per_hour) < abs(slope_before_per_hour) * 0.5:
                    trend_summary += ", effectively stabilizing the variable."

                observed_effects['influenced_variables'][state_node] = {
                    'name': self.node_to_name.get(state_node, state_node),
                    'trend_summary': trend_summary,
                }

            except Exception as e:
                print(f"WARN (V17 Effects): Error during effect analysis for {state_node}: {e}")

        if observed_effects['influenced_variables']:
            summary_parts = []
            for info in observed_effects['influenced_variables'].values():
                summary_parts.append(f"for **{info['name']}**, it {info['trend_summary']}")
            observed_effects['summary'] = "Following the action, the key observed effect on the system was that " + "; ".join(summary_parts)

        return observed_effects

    def _get_pcmci_lag_for_pair(self, source_node: str, target_node: str) -> Union[int, None]:
        """
        (NEW HELPER)
        Finds the specific, strongest significant time lag in minutes from a source node
        to a target node using pre-computed PCMCI results.

        Returns:
            int: The lag in minutes, or None if no significant link is found.
        """
        if not hasattr(self, 'pcmci_results') or not self.pcmci_results:
            return None

        # Map the friendly control action node to its canonical name used in PCMCI
        source_pcmci_node = self.get_pcmci_node_for_control(source_node)
        
        source_idx = self.pcmci_var_to_idx.get(source_pcmci_node)
        target_idx = self.pcmci_var_to_idx.get(target_node)

        if source_idx is None or target_idx is None:
            return None

        causal_graph = self.pcmci_results['graph']
        p_matrix = self.pcmci_results['p_matrix']
        val_matrix = self.pcmci_results['val_matrix']
        alpha = self.pcmci_results.get('alpha', 0.05)

        strongest_link_lag = -1
        max_abs_strength = 0

        for lag in range(1, len(causal_graph)):
            # Check bounds for safety
            if (target_idx < causal_graph[lag].shape[0] and
                source_idx < causal_graph[lag].shape[1] and
                causal_graph[lag][target_idx, source_idx] == '-->' and
                p_matrix[lag][target_idx, source_idx] < alpha):
                
                current_strength = val_matrix[lag][target_idx, source_idx]
                if abs(current_strength) > max_abs_strength:
                    max_abs_strength = abs(current_strength)
                    strongest_link_lag = lag
        
        if strongest_link_lag != -1:
            # Assuming your data has a 5-minute frequency
            return strongest_link_lag * 5 
        
        return None

    def get_pcmci_node_for_control(self, control_node: str) -> str | None:
        """
        Translates any control-related node name (e.g., 'uV_ref', 'Vent_ref', 'uV')
        to the canonical KG node name that was used as a `var_name` in the PCMCI analysis.
        Returns None if no valid mapping is found.
        """
        # 1. Start with the input node
        candidate_node = control_node
        
        # 2. If it's a column name, map it to a KG node
        if candidate_node in self.column_to_node:
            candidate_node = self.column_to_node[candidate_node]

        # 3. If it's a reference node, map it to its base code
        # e.g., 'T_ref' might not be a control, but this handles 'uV_ref' -> 'uV'
        if candidate_node in self.ref_node_to_base_code:
            candidate_node = self.ref_node_to_base_code[candidate_node]
            
        # 4. At this point, `candidate_node` should be a base KG node (e.g., 'uV').
        # Now, we verify if this node was actually included in the last PCMCI run.
        if hasattr(self, 'pcmci_results') and self.pcmci_results:
            pcmci_var_names = self.pcmci_results.get('var_names', [])
            if candidate_node in pcmci_var_names:
                print(f"DEBUG - Successfully mapped '{control_node}' to PCMCI var_name '{candidate_node}'.")
                return candidate_node
            else:
                print(f"WARN - Node '{candidate_node}' (derived from '{control_node}') was NOT FOUND in the PCMCI variable list.")
                return None
        else:
            print("WARN - PCMCI results are not available for mapping.")
            return None


    def analyze_competing_fluxes(self, flux_nodes: List[str], target_node: str, start_dt: datetime, end_dt: datetime) -> Dict[str, Any]:
        """
        Analyzes the net effect of two or more competing fluxes on a target variable.
        """
        print(f"DEBUG - Analyzing competing fluxes {flux_nodes} on target {target_node}")

        analysis_result = {'conclusion': "Could not determine the net effect.", 'evidence': {}}

        try:
            window_data = self.data.loc[start_dt:end_dt].copy() # Use .copy() to avoid SettingWithCopyWarning
            if window_data.empty:
                return {'error': "No data in the specified time range."}
        except Exception as e:
            return {'error': f"Error slicing data: {e}"}

        flux_cols = [self.node_to_column.get(node) for node in flux_nodes]
        target_col = self.node_to_column.get(target_node)

        if not all(col and col in window_data.columns for col in flux_cols + [target_col]):
            return {'error': "Missing one or more necessary data columns for flux analysis."}

        # 1. Calculate the net flux
        window_data['net_flux'] = window_data[flux_cols].sum(axis=1)
        avg_net_flux = window_data['net_flux'].mean()
        
        # 2. Quantify each contributing flux
        flux_contributions = {}
        for node, col in zip(flux_nodes, flux_cols):
            avg_flux = window_data[col].mean()
            flux_contributions[node] = {
                'name': self.node_to_name.get(node, node),
                'average_value': avg_flux,
                'effect': 'heating' if avg_flux > 0 else 'cooling'
            }

        # 3. Analyze the system's response
        avg_target_action = window_data[target_col].mean()

        # 4. Synthesize the conclusion
        conclusion = f"The two opposing forces were in a state of near-equilibrium, resulting in a net natural heat load of approximately {avg_net_flux:.2f} W/m²."
        
        if abs(avg_net_flux) < 50 and abs(avg_target_action) < 0.01: # Thresholds for "near-zero"
            conclusion += f" This balance significantly reduced the demand on the active {self.node_to_name.get(target_node, target_node)} system."
        
        analysis_result['conclusion'] = conclusion
        analysis_result['evidence'] = {
            "Competing Forces": flux_contributions,
            "Net Effect": f"The average net flux from these natural sources was {avg_net_flux:.2f} W/m².",
            "System Response": f"As a result, the controller only needed to apply an average of {avg_target_action:.4f} units of {self.node_to_name.get(target_node, target_node)}."
        }
        
        return analysis_result


    def _check_for_free_co2_maneuver_v2(self, timestamp: datetime) -> Dict[str, Any] | None:
        """
        (V2 - META EXPERT)
        Specifically looks for the "free CO2 inhalation" pattern, where ventilation is
        used to import high-concentration CO2 from the outside at night.
        """
        # This pattern only occurs in the pre-dawn hours.
        if not (2 <= timestamp.hour < 6):
            return None

        print("DEBUG (Free CO2 Expert): Checking for 'CO2 inhalation' maneuver...")
        
        try:
            data_row = self.data.loc[self.find_nearest_timestamp(timestamp)]
        except Exception:
            return None

        # --- Signature of the "Free CO2" Maneuver ---
        
        # 1. Is ventilation active (and optionally, heating to compensate)?
        is_venting_active = abs(data_row.get(self.node_to_column.get('uV'), 0)) > 1e-10
        
        if not is_venting_active:
            return None

        # 2. Is there a favorable CO2 gradient? (Higher outside than inside)
        c_in = data_row.get(self.node_to_column.get('C'))
        c_out = data_row.get(self.node_to_column.get('Cout'))
        is_co2_opportunity = pd.notna(c_in) and pd.notna(c_out) and c_out > (c_in + 20) # Require a decent gradient
        
        if not is_co2_opportunity:
            return None

        # 3. Is the optimizer's goal to increase CO2? (Confirmation)
        ceq_lg = data_row.get(self.node_to_column.get('C_eq'))
        is_goal_increase_co2 = pd.notna(ceq_lg) and ceq_lg < -1 # Strong signal to increase CO2

        if not is_goal_increase_co2:
            return None
            
        # If all three conditions are met, we have found the pattern with very high confidence.
        
        # SCORING: This is a highly specific, advanced, and counter-intuitive strategy.
        # When found, it's a very strong explanation.
        confidence_score = 9.2
        
        print(f"SUCCESS (Free CO2 Expert): Identified cost-saving CO2 inhalation strategy. Score: {confidence_score}")
        return {
            'confidence': confidence_score,
            'primary_reason': "This was an intelligent, cost-saving action to get a head start on the day's CO₂ requirements.",
            'causal_driver_type': "Coordinated Action (Free CO₂ Fertilization)",
            'supporting_evidence': {
                'The Goal': f"The controller's primary objective was to raise the internal CO₂ concentration for the coming day, proven by the strong negative signal from the CO₂ Equality Lagrangian ({ceq_lg:.2f}).",
                'The Opportunity': f"It identified that the outside night air was rich in CO₂ ({c_out:.1f} ppm) compared to the inside ({c_in:.1f} ppm), providing a 'free' source of CO₂.",
                'The Coordinated Action': "Instead of immediately starting the expensive CO₂ injectors, the controller opened the vents to 'inhale' this free CO₂. It likely ran the heater simultaneously to compensate for the predictable heat loss, calculating that this brief, combined action was more cost-effective."
            }
        }

    def format_sudden_event_for_llm(self, analysis_result: Dict[str, Any]) -> str:
        """
        Formats the analysis of sudden events for the LLM, focusing on the most significant changes.
        """
        if 'error' in analysis_result:
            return f"Error during analysis: {analysis_result['error']}"

        summary = analysis_result.get('overall_pattern_summary', {})
        events = analysis_result.get('notable_events_analysis', [])

        context = [
            f"--- Causal Analysis of Sudden Events ---",
            f"\n**Overall Context:** The variable '{summary.get('variable_name')}' averaged {summary.get('mean_value'):.2f} during this period.",
        ]

        if not events:
            context.append("\nNo significant sudden events were detected in this time range.")
            return "\n".join(context)

        # Focus on the most significant event (largest change)
        most_significant_event = max(events, key=lambda e: e.get('basic_event_info', {}).get('change_magnitude', 0))
        event_info = most_significant_event.get('basic_event_info', {})
        ts = event_info.get('timestamp')
        change_type = event_info.get('change_type', 'change')
        
        context.append(
            f"\n**Most Significant Event:** A **{change_type}** occurred around **{ts}**."
        )
        
        # Explain the cause using the flux analysis for THAT event
        flux_context = most_significant_event.get('flux_context', {})
        positive_fluxes = flux_context.get('positive_fluxes', {})
        negative_fluxes = flux_context.get('negative_fluxes', {})

        if positive_fluxes or negative_fluxes:
            context.append("\n**Primary Causes at that Moment:**")
            for name, value in positive_fluxes.items():
                context.append(f"- **Heating Force:** {name} (Value: {value:.2f})")
            for name, value in negative_fluxes.items():
                context.append(f"- **Cooling Force:** {name} (Value: {value:.2f})")
        
        return "\n".join(context)

    def format_gradual_trend_for_llm(self, analysis_result: Dict[str, Any]) -> str:
        """
        Formats the analysis of a gradual trend for the LLM, focusing on dominant forces.
        """
        if 'error' in analysis_result:
            return f"Error during analysis: {analysis_result['error']}"

        heating_force = analysis_result.get('dominant_heating_force')
        cooling_force = analysis_result.get('dominant_cooling_force')

        context = [
            f"--- Causal Analysis of a Gradual Trend ---",
            f"\n**Observed Trend:** {analysis_result.get('trend_description', 'A gradual change was observed.')}",
        ]

        if heating_force:
            context.append(
                f"\n**Primary Heating Force (Source of Increase):**\n"
                f"- **Name:** {heating_force['name']}\n"
                f"- **Average Contribution:** {heating_force['average_contribution']:.2f} W/m²"
            )
        
        if cooling_force:
            context.append(
                f"\n**Primary Cooling Force (Counteracting Force):**\n"
                f"- **Name:** {cooling_force['name']}\n"
                f"- **Average Contribution:** {cooling_force['average_contribution']:.2f} W/m²"
            )
            
        context.append(f"\n**Net Effect Explanation:** {analysis_result.get('net_effect_explanation', 'The net effect could not be determined.')}")

        return "\n".join(context)

    def find_and_explain_reaction(self, disturbance_node: str, reaction_nodes: List[str], date: datetime.date) -> Dict[str, Any]:
        """
        Finds the most significant event for a disturbance variable and explains the reaction
        of other specified variables at that exact moment.
        """
        print(f"DEBUG - Finding event for '{disturbance_node}' and analyzing reactions of {reaction_nodes} on {date}.")
        analysis_result = {}

        disturbance_col = self.node_to_column.get(disturbance_node)
        if not disturbance_col or disturbance_col not in self.data.columns:
            return {'error': f"Data column for disturbance '{disturbance_node}' not found."}

        try:
            day_data = self.data[self.data.index.date == date]
            if day_data.empty:
                return {'error': "No data for the specified date."}
        except Exception as e:
            return {'error': f"Error slicing data: {e}"}

        # --- 1. Find the Event: The largest single drop in the disturbance ---
        # We only care about daytime events for solar radiation
        if disturbance_node == 'Qrad':
            # Create a copy to avoid SettingWithCopyWarning
            day_data = day_data[day_data[disturbance_col] > 10].copy() 
            
        if len(day_data) < 2:
            return {'error': "Not enough daytime data to find a solar radiation drop."}
            
        # .diff() calculates the change from the previous step. We want the most negative change (biggest drop).
        changes = day_data[disturbance_col].diff()
        event_timestamp = changes.idxmin() # idxmin() finds the index of the minimum value
        
        # Ensure the timestamp and the one before it exist in the original day_data for safe lookup
        if event_timestamp not in day_data.index or day_data.index.get_loc(event_timestamp) == 0:
            return {'error': "Could not find a valid preceding data point for the event."}
            
        value_at_event = day_data.loc[event_timestamp, disturbance_col]
        value_before_event = day_data.loc[day_data.index[day_data.index.get_loc(event_timestamp) - 1], disturbance_col]
        
        analysis_result['event'] = {
            'trigger_node': disturbance_node,
            'trigger_name': self.node_to_name.get(disturbance_node, disturbance_node),
            'timestamp': event_timestamp,
            'description': f"The most significant drop occurred around {event_timestamp.strftime('%H:%M')}, "
                        f"when the value fell from {value_before_event:.2f} to {value_at_event:.2f}."
        }

        # --- 2. Analyze the Reactions ---
        analysis_result['reactions'] = []
        data_at_event_row = day_data.loc[event_timestamp]
        data_before_event_row = day_data.loc[day_data.index[day_data.index.get_loc(event_timestamp) - 1]]

        for reaction_node in reaction_nodes:
            reaction_col = self.node_to_column.get(reaction_node)
            if not reaction_col or reaction_col not in day_data.columns: continue

            value_at = data_at_event_row.get(reaction_col)
            value_before = data_before_event_row.get(reaction_col)
            
            # Check for NaN before subtraction
            if pd.notna(value_at) and pd.notna(value_before):
                change = value_at - value_before
            else:
                change = None
            
            reaction_summary = {
                'node': reaction_node,
                'name': self.node_to_name.get(reaction_node, reaction_node),
                'value_before': value_before,
                'value_at': value_at,
                'change': change
            }
            analysis_result['reactions'].append(reaction_summary)

        return analysis_result

    def analyze_system_event(self, timestamp: datetime, target_nodes: list) -> Dict[str, Any]:
        """
        (DEFINITIVE, CONTEXT-AWARE VERSION)
        Analyzes system-wide events with a more robust lookback logic to correctly
        identify triggers like sunset that are processes, not single-step events.
        """
        print(f"DEBUG (System Event Analysis): Analyzing coordinated event at {timestamp} affecting {target_nodes}")
        
        try:
            nearest_timestamp = self.find_nearest_timestamp(timestamp)
            if nearest_timestamp is None:
                return {'trigger': 'Unknown', 'confidence': 0.0, 'reasoning': 'No data available'}
            event_idx = self.data.index.get_loc(nearest_timestamp)
            data_at_event = self.data.iloc[event_idx]
            data_before_event = self.data.iloc[max(0, event_idx - 1)]
            window_slice = self.data.iloc[max(0, event_idx - 12):event_idx]
            # Define a 1-hour lookback window to understand the preceding context
            lookback_start_time = timestamp - pd.Timedelta(hours=1)
            preceding_hour_data = self.data.loc[lookback_start_time:timestamp]
        except (IndexError, KeyError) as e:
            return {'error': f"Could not retrieve data for system event analysis: {e}"}

        analysis_result = {
            'event_type': 'Unknown System Event',
            'trigger': 'Unknown',
            'explanation': 'Could not determine the cause of the system-wide event.',
            'evidence': {},
            'confidence': 0.0  # Default confidence for unknown events
        }

        qrad_col = self.node_to_column.get('Qrad')
        tout_col = self.node_to_column.get('Tout')
        t_ref_col = self.node_to_column.get('T_ref')
        tieq_lg_col = self.node_to_column.get('T_ieq')
        cieq_lg_col = self.node_to_column.get('C_ieq')
        hieq_lg_col = self.node_to_column.get('H_ieq')
    
        qrad_at = data_at_event.get(qrad_col)
        qrad_before = data_before_event.get(qrad_col)
        tout_at = data_at_event.get(tout_col)
        if timestamp.hour >= 22 or timestamp.hour < 4: # Check if it's late night/early morning
            tieq_val = data_at_event.get(tieq_lg_col, 0)
            cieq_val = data_at_event.get(cieq_lg_col, 0)
            hieq_val = data_at_event.get(hieq_lg_col, 0)

            # The signature of a stable system is that all constraint multipliers are near zero.
            is_stable = abs(tieq_val) < self.config['lagrangian_active_threshold']['T_ieq'] and \
                    abs(cieq_val) < self.config['lagrangian_active_threshold']['C_ieq'] and \
                    abs(hieq_val) < self.config['lagrangian_active_threshold']['H_ieq']

            if is_stable:
                analysis_result.update({
                    'confidence': 2.0,
                    'event_type': "Optimal Nighttime Stability (Coasting)",
                    'trigger': "Achievement of a stable, low-energy equilibrium",
                    'explanation': "The lack of control actions represents the controller successfully achieving its optimal nighttime state. All environmental variables are comfortably within their constraints, so the most cost-effective action is to do nothing.",
                    'evidence': {
                        'Optimizer\'s State': "All primary Lagrangian multipliers (Temperature, CO₂, Humidity) are flat and near-zero, which is the mathematical proof that no constraints are active.",
                        'Economic Rationale': "With no major disturbances, the controller's optimal decision is to minimize cost by taking no action, demonstrating it has found an efficient equilibrium."
                    }
                })
                print("SUCCESS: Identified Stable Nighttime Coasting.")
                return analysis_result
            
        if qrad_at is not None:
            qrad_max_before = preceding_hour_data[qrad_col].max()
            qrad_at_event = data_at_event.get(qrad_col)
            is_daytime_before = qrad_max_before > 150
            is_nighttime_now = qrad_at < 75
            is_sunset = is_daytime_before and is_nighttime_now
            
            is_sunrise = (qrad_max_before < 5 and qrad_at > 50)
            
            if pd.notna(qrad_at_event) and pd.notna(qrad_max_before):

                # --- SIGNATURE 1: Sunset Event ---
                was_daytime_recently = qrad_max_before > 100  # Was there useful light?
                is_nighttime_now = qrad_at_event < 50      # Is it dark now?

                if was_daytime_recently and is_nighttime_now:
                    analysis_result.update({
                        'confidence': 9.5,
                        'event_type': "Day-to-Night Regime Change",
                        'trigger': "Sunset (Solar Radiation became insufficient for growth)",
                        'explanation': (
                            "This was an excellent and necessary trade-off. The controller correctly identified that with the sun setting, the potential for profitable growth was gone. "
                        "It immediately and correctly switched to a dual-purpose cost-saving and health-preservation mode for the night."
                    ),
                    'evidence': {
                        'The Trigger (Sensor Data)': f"In the hour leading up to {timestamp.strftime('%H:%M')}, solar radiation was at a useful level (peaking at {qrad_max_before:.1f} W/m²), but it dropped to a photosynthetically useless level of {qrad_at_event:.1f} W/m² at the time of the change.",
                        'Action 1 (Cost Saving)': "It shut off CO₂ injection to stop wasting expensive resources when the plants could no longer use them. Continuing to inject CO₂ would have resulted in a financial loss with no potential for growth.",
                        'Action 2 (Health Preservation)': "It proactively lowered the humidity target to prevent condensation on the plants as temperatures cool, which is critical for preventing crop diseases like botrytis overnight. This prioritizes long-term crop health."
                    }
                    })
                    print("SUCCESS: Identified Day-to-Night Transition based on sensor data.")
                    return analysis_result

                # --- SIGNATURE 2: Sunrise Event ---
                was_nighttime_recently = qrad_max_before < 10 # Was it dark?
                is_daytime_now = qrad_at_event > 50         # Is there useful light now?

                if was_nighttime_recently and is_daytime_now:
                    analysis_result.update({
                        'confidence': 9.5,
                        'event_type': "Night-to-Day Regime Change",
                        'trigger': "Sunrise (Solar Radiation became sufficient for growth)",
                        'explanation': "The controller detected sunrise and switched from a nighttime 'maintenance' mode to a daytime 'aggressive growth' mode.",
                        'evidence': {
                            'The Trigger (Sensor Data)': f"Solar radiation increased from a nighttime level of {qrad_max_before:.1f} W/m² to a useful daytime level of {qrad_at_event:.1f} W/m².",
                            'Strategic Shift': "This triggered a coordinated increase in setpoints for CO₂ and humidity to prepare the greenhouse for a productive day of photosynthesis."
                        }
                    })
                    print("SUCCESS: Identified Night-to-Day Transition based on sensor data.")
                    return analysis_result

        # --- REASONING 3: Scheduled Regime Change (Time-Based Fallback) ---
        if timestamp.hour == 18 and timestamp.minute == 0:
            if t_ref_col and t_ref_col in data_at_event.index:
                t_ref_at = data_at_event.get(t_ref_col)
                t_ref_before = data_before_event.get(t_ref_col)
                if pd.notna(t_ref_at) and pd.notna(t_ref_before) and (t_ref_before - t_ref_at > 2):
                    analysis_result.update({
                        'event_type': "Scheduled 'End of Day' Regime Change",
                        'trigger': "Pre-programmed time-based schedule (18:00)",
                        'explanation': "The coordinated drop in setpoints appears to be a scheduled transition from a 'daytime' to a 'nighttime' mode, triggered by the clock rather than a sensor.",
                        'evidence': {
                            'Trigger Data': f"The event occurred precisely at 18:00 and involved a significant drop in the Temperature setpoint from {t_ref_before:.1f}°C to {t_ref_at:.1f}°C.",
                            'Operational Context': "This is a common strategy to begin saving energy before complete sunset."
                        }
                    })
                    print("SUCCESS: Identified Scheduled Regime Change.")
                    return analysis_result
                
        # --- REASONING 3.5: Evening Sunset Transition ---
        if timestamp.hour >= 17 and timestamp.hour <= 20:
            analysis_result.update({
                'confidence': 7.0,
                'event_type': "Day-to-Night Regime Change",
                'trigger': "Evening time - sunset transition",
                'explanation': "The coordinated change in setpoints marks the controller's transition from 'daytime growth' to 'nighttime maintenance' mode, triggered by the approach of sunset.",
                'evidence': {
                    'Trigger Data': f"The event occurred at {timestamp.hour}:00, which is evening time when solar radiation naturally decreases.",
                    'Economic Rationale': "With diminishing light for photosynthesis, the economic incentive shifts from maximizing growth to minimizing costs, triggering a regime change to nighttime maintenance mode."
                }
            })
            print("SUCCESS: Identified Evening Sunset Transition.")
            return analysis_result
                
        # --- REASONING 4: Sudden Cloud Cover / Rain Event (Daytime) ---
        is_cloud_event = (qrad_col and pd.notna(qrad_at) and pd.notna(qrad_before) and
                        qrad_before > 200 and (qrad_at < qrad_before * 0.5))
        if is_cloud_event:
            analysis_result.update({
                'confidence': 7.5,
                'event_type': "Cloud Cover Compensation",
                'trigger': "Sudden drop in solar radiation during daytime",
                'explanation': "The system adjusted its strategy to compensate for a sudden loss of solar energy, likely due to heavy cloud cover. It is reacting to maintain stability.",
                'evidence': {
                    'Trigger Data': f"Solar Radiation dropped sharply from {qrad_before:.1f} W/m² to {qrad_at:.1f} W/m².",
                    'Controller Response': "The controller likely reduced cooling and may have activated heating to compensate for the loss of solar heat gain, while also adjusting CO₂ injection as photosynthesis potential decreased."
                }
            })
            print("SUCCESS: Identified Cloud Cover Event.")
            return analysis_result
                
        # --- REASONING 5: Extreme Weather Response (Heatwave or Cold Snap) ---
        EXTREME_HEAT_THRESHOLD = 30
        EXTREME_COLD_THRESHOLD = 5
        if tout_col and pd.notna(tout_at) and not window_slice.empty:
            # Heatwave check
            if tout_at > EXTREME_HEAT_THRESHOLD and window_slice[tout_col].is_monotonic_increasing:
                analysis_result.update({
                    'confidence': 9.2,
                    'event_type': "Defensive Mode: Heatwave",
                    'trigger': "Sustained and extreme outside temperature",
                    'explanation': "The controller has entered a defensive mode to protect the crop from an external heatwave. Growth objectives are likely secondary to preventing catastrophic heat stress.",
                    'evidence': {
                        'Trigger Data': f"Outside temperature reached an extreme level of {tout_at:.1f}°C and was trending upwards.",
                        'Controller Response': "Actions are now focused on maximum cooling. Ventilation may be shut to prevent bringing in more heat, and other targets relaxed to focus all energy on survival."
                    }
                })
                print("SUCCESS: Identified Heatwave Response.")
                return analysis_result
                
            # Cold snap check
            if tout_at < EXTREME_COLD_THRESHOLD and window_slice[tout_col].is_monotonic_decreasing:
                analysis_result.update({
                    'confidence': 9.2,
                    'event_type': "Defensive Mode: Cold Snap",
                    'trigger': "Sustained and extreme outside temperature",
                    'explanation': "The controller has entered a defensive mode to protect the crop from an external cold snap. Growth objectives are secondary to preventing frost damage.",
                    'evidence': {
                        'Trigger Data': f"Outside temperature dropped to an extreme level of {tout_at:.1f}°C and was trending downwards.",
                        'Controller Response': "Actions are focused on maximum heating. Ventilation will be minimized to prevent heat loss, and other setpoints adjusted to conserve energy for heating."
                    }
                })
                print("SUCCESS: Identified Cold Snap Response.")
                return analysis_result

        # --- REASONING 6: Pre-Dawn Ventilation for CO2 Pre-Charging ---
        # Check for early morning ventilation spike to inhale free CO2
        if timestamp.hour >= 3 and timestamp.hour <= 6:  # Pre-dawn hours
            # Get relevant columns
            vent_col = self.node_to_column.get('uV')
            heat_col = self.node_to_column.get('uQh')
            co2_in_col = self.node_to_column.get('C')
            co2_out_col = self.node_to_column.get('Cout')
            ceq_lg_col = self.node_to_column.get('C_eq')
            
            if all(col and col in self.data.columns for col in [vent_col, heat_col, co2_in_col, co2_out_col, ceq_lg_col]):
                vent_at = data_at_event.get(vent_col)
                heat_at = data_at_event.get(heat_col)
                co2_in_at = data_at_event.get(co2_in_col)
                co2_out_at = data_at_event.get(co2_out_col)
                ceq_lg_at = data_at_event.get(ceq_lg_col)
                
                # Check conditions: ventilation active, heating active, outside CO2 > inside, optimizer wants more CO2
                is_vent_active = pd.notna(vent_at) and vent_at > 0.1  # Ventilation is on
                is_heat_active = pd.notna(heat_at) and heat_at > 0.1  # Heating is on
                co2_gradient_favorable = pd.notna(co2_out_at) and pd.notna(co2_in_at) and co2_out_at > co2_in_at + 50  # Outside CO2 significantly higher
                optimizer_wants_co2 = pd.notna(ceq_lg_at) and ceq_lg_at < -1e-9  # Strong desire to increase CO2
                
                if is_vent_active and is_heat_active and co2_gradient_favorable and optimizer_wants_co2:
                    analysis_result.update({
                        'confidence': 8.5,
                        'event_type': "Pre-Dawn CO2 Pre-Charging",
                        'trigger': "Cost-saving opportunity to inhale free CO2 before sunrise",
                        'explanation': "The controller performed a brief, coordinated ventilation and heating action to pre-charge the greenhouse with free CO2 from outside air, avoiding the need for expensive CO2 injection later in the day.",
                        'evidence': {
                            'Physical Evidence': f"Outside CO2 concentration ({co2_out_at:.0f} ppm) was significantly higher than inside ({co2_in_at:.0f} ppm), creating a favorable gradient for natural CO2 intake.",
                            'Optimizer Evidence': f"The CO2 equality constraint Lagrangian ({ceq_lg_at:.2E}) shows a strong desire to increase internal CO2 concentration.",
                            'Controller Actions': f"Ventilation was activated ({vent_at:.3f}) while heating ({heat_at:.3f}) compensated for heat loss, making this a cost-effective pre-dawn maneuver."
                        }
                    })
                    print("SUCCESS: Identified Pre-Dawn CO2 Pre-Charging.")
                    return analysis_result

        return analysis_result

    def analyze_gradual_trend(self, primary_node: str, date: date, start_time: time, end_time: time) -> Dict[str, Any]:
        """
        (NEW, SPECIALIZED EXPERT)
        Analyzes the root causes of a slow, gradual trend by averaging the dominant
        physical fluxes over the entire time window.
        """
        from datetime import datetime
        print(f"DEBUG (Gradual Trend Expert): Analyzing {primary_node} from {start_time} to {end_time}.")
        
        try:
            start_dt = datetime.combine(date, start_time)
            end_dt = datetime.combine(date, end_time)
            window_data = self.data.loc[start_dt:end_dt]
            if window_data.empty: return {'error': "No data for this time."}
        except Exception as e:
            return {'error': f"Error slicing data: {e}"}

        # --- 1. Identify the dominant physical fluxes that influence the primary_node ---
        # We get this from our Knowledge Graph.
        influencing_fluxes = []
        for predecessor in self.G.predecessors(primary_node):
            if self.G.nodes[predecessor].get('type') == 'Flux':
                influencing_fluxes.append(predecessor)

        if not influencing_fluxes:
            return {'error': f"No known physical fluxes influence {primary_node}."}
            
        # --- 2. Calculate the average contribution of each flux over the window ---
        flux_contributions = []
        for flux_node in influencing_fluxes:
            flux_col = self.node_to_column.get(flux_node)
            if flux_col in window_data.columns:
                avg_flux_value = window_data[flux_col].mean()
                flux_contributions.append({
                    'name': self.node_to_name.get(flux_node, flux_node),
                    'average_contribution': avg_flux_value,
                    'effect_on_target': self.G.get_edge_data(flux_node, primary_node).get('relationship')
                })

        # --- 3. Find the most dominant heating and cooling forces ---
        heating_forces = [f for f in flux_contributions if f['effect_on_target'] == '+']
        cooling_forces = [f for f in flux_contributions if f['effect_on_target'] == '-']

        dominant_heating = max(heating_forces, key=lambda x: x['average_contribution']) if heating_forces else None
        dominant_cooling = max(cooling_forces, key=lambda x: abs(x['average_contribution'])) if cooling_forces else None

        # --- 4. Synthesize the explanation ---
        return {
            'trend_description': f"A gradual increase in {self.node_to_name.get(primary_node)}.",
            'dominant_heating_force': dominant_heating,
            'dominant_cooling_force': dominant_cooling,
            'net_effect_explanation': "The gradual increase occurred because the heating forces were consistently stronger than the cooling forces during this period."
        }

    def format_control_value(self, value: float) -> str:
        """
        Format control values appropriately for display.
        Uses scientific notation for very small numbers and appropriate decimal places.
        """
        abs_value = abs(value)

        if abs_value == 0:
            return "0"
        elif abs_value < 1e-6:
            return f"{value:.2E}"  # Very small: scientific notation
        elif abs_value < 1e-3:
            return f"{value:.6f}"  # Small: 6 decimal places
        elif abs_value < 0.01:
            return f"{value:.4f}"  # Medium-small: 4 decimal places
        elif abs_value < 1:
            return f"{value:.3f}"  # Small: 3 decimal places
        elif abs_value < 10:
            return f"{value:.2f}"  # Normal: 2 decimal places
        else:
            return f"{value:.1f}"  # Large: 1 decimal place

    def format_control_change(self, value_before: float, value_after: float) -> str:
        """
        Format a control value change, handling cases where the change is negligible.
        """
        change = abs(value_after - value_before)
        relative_change = change / max(abs(value_before), abs(value_after), 1e-10)

        # If the change is negligible (less than 0.1% relative change), don't show it
        if relative_change < 0.001:
            return f"at {self.format_control_value(value_after)}"

        # Otherwise show the change
        return f"from {self.format_control_value(value_before)} to {self.format_control_value(value_after)}"

    def analyze_causal_trend(self, primary_node: str, timestamp: datetime) -> Dict[str, Any]:
        """
        (FINAL V10 - Robust Explanation Logic)
        Analyzes a causal trend by finding the most relatively significant control action
        at the user's timestamp and using robust logic to classify and explain the event.
        """
        print(f"DEBUG - V10 Causal Trend: Analyzing trend for '{primary_node}' around {timestamp}.")

        # --- 1. Identify all potential CONTROL ACTION drivers from the Knowledge Graph ---
        potential_drivers = self._find_potential_control_drivers(primary_node)
        if not potential_drivers:
            return {'error': f"No control actions are known to influence '{primary_node}'."}
        print(f"DEBUG - V10 Causal Trend: Potential control drivers are {set(potential_drivers)}")

        # --- 2. Time-Centric Event Search with RELATIVE SIGNIFICANCE ---
        # CRITICAL FIX: For peaks/drops, we need to look BEFORE the event, not AT it
        # A peak at 17:30 was caused by something that happened BEFORE 17:30
        try:
            # Look in a window BEFORE the event timestamp to find what caused it
            search_start = timestamp - timedelta(minutes=30)  # Extended backward window
            search_end = timestamp  # End AT the peak time
            driver_cols = [self.node_to_column.get(node) for node in set(potential_drivers) if self.node_to_column.get(node)]
            search_data = self.data.loc[search_start:search_end, driver_cols]

            if len(search_data) < 2:
                return {'error': "Not enough data around the event time to analyze."}

            # Find the most significant change in the window LEADING UP TO the peak
            event_timestamp_actual = self.find_nearest_timestamp(timestamp)
            changes_df = search_data.diff()
            
            # Look at ALL changes in the window, not just at the peak time
            # Find the largest relative change that occurred BEFORE the peak
            # Strategy: Collect ALL significant changes, then prioritize ones that explain a peak
            
            all_changes = []
            
            for idx in changes_df.index:
                if idx > event_timestamp_actual:  # Skip changes AFTER the peak
                    continue
                for col in search_data.columns:
                    change_magnitude = abs(changes_df.loc[idx, col])
                    if change_magnitude < 1e-9:  # Skip negligible changes
                        continue
                        
                    series_range = search_data[col].max() - search_data[col].min()
                    if series_range < 1e-9:  # Skip variables that don't change
                        continue
                        
                    score = change_magnitude / series_range
                    
                    node = self.column_to_node.get(col)
                    if node:
                        # CRITICAL FIX: Map setpoint variables to their control actions
                        # uC_ref → uC, uQc_ref → uQc, etc.
                        control_node = self.get_control_action_base_code(node)
                        if not control_node:
                            control_node = node
                        
                        effect = self.get_effect_direction(control_node, primary_node)
                        change_val = changes_df.loc[idx, col]
                        
                        # Categorize: would this change INCREASE or DECREASE the target?
                        causes_increase = False
                        if effect == 'increase' and change_val > 0:  # Heating increased
                            causes_increase = True
                        elif effect == 'decrease' and change_val < 0:  # Cooling decreased
                            causes_increase = True
                        
                        # Debug for top candidates
                        if score > 0.01:  # Only show significant ones
                            print(f"  DEBUG - Candidate: {node} → {control_node}, effect={effect}, change={change_val:.6f}, causes_increase={causes_increase}, score={score:.4f}")
                        
                        all_changes.append({
                            'col': col,
                            'idx': idx,
                            'score': score,
                            'node': node,
                            'causes_increase': causes_increase,
                            'change_val': change_val,
                            'effect': effect
                        })
            
            # Prioritize changes that would cause a temperature INCREASE (for a peak)
            # But fall back to any significant change if none found
            peak_causing_changes = [c for c in all_changes if c['causes_increase']]
            
            print(f"DEBUG - V10 Causal Trend: Found {len(all_changes)} total changes, {len(peak_causing_changes)} would cause peak")
            
            if peak_causing_changes:
                # Pick the most significant change that would cause the peak
                best_change = max(peak_causing_changes, key=lambda x: x['score'])
                print(f"DEBUG - V10 Causal Trend: Selected peak-causing change")
            elif all_changes:
                # No changes that would cause a rise, but pick the most significant anyway
                # (and let the explanation logic handle the apparent contradiction)
                best_change = max(all_changes, key=lambda x: x['score'])
                print(f"DEBUG - V10 Causal Trend: WARNING - Most significant change would NOT cause a peak!")
                print(f"  Best change: {best_change['node']}, effect={best_change.get('effect')}, change={best_change['change_val']:.6f}, causes_increase={best_change['causes_increase']}")
            else:
                best_change = None
            
            if best_change:
                most_significant_driver_col = best_change['col']
                most_significant_timestamp = best_change['idx']
                max_significance = best_change['score']
            else:
                most_significant_driver_col = None
                most_significant_timestamp = None

            
            if most_significant_driver_col is None or most_significant_timestamp is None:
                return {'error': "Could not find a significant control change leading up to the event."}
            
            most_significant_driver_node = self.column_to_node.get(most_significant_driver_col)

            if not most_significant_driver_node:
                return {'error': "Could not map the most significant change back to a control node."}

            event_idx = self.data.index.get_loc(most_significant_timestamp)
            event = {
                'driver_node': most_significant_driver_node,
                'driver_name': self.node_to_name.get(most_significant_driver_node),
                'value_before': self.data.iloc[event_idx - 1][most_significant_driver_col],
                'value_after': self.data.iloc[event_idx][most_significant_driver_col],
                'timestamp': most_significant_timestamp,
                'peak_timestamp': event_timestamp_actual
            }
            
            # Debug output
            print(f"DEBUG - V10 Causal Trend: Found most significant change:")
            print(f"  Driver: {event['driver_name']} ({most_significant_driver_node})")
            print(f"  Timestamp of change: {most_significant_timestamp}")
            print(f"  Value before: {event['value_before']:.6f}")
            print(f"  Value after: {event['value_after']:.6f}")
            print(f"  Change direction: {'increased' if event['value_after'] > event['value_before'] else 'decreased'}")
            print(f"  Peak timestamp: {event_timestamp_actual}")
            
        except (KeyError, IndexError, ValueError) as e:
            return {'error': f"Could not find a significant causal event at the specified time. Error: {e}"}

        # --- 3. Build the explanation ---
        
        # --- Simplified but Robust On/Off Detection ---
        change_type = ""
        direction = "increased" if event['value_after'] > event['value_before'] else "decreased"
        OFF_THRESHOLD = 1e-6 
        ON_THRESHOLD = 1e-5 

        if event['value_before'] > ON_THRESHOLD and event['value_after'] < OFF_THRESHOLD:
            change_type = "turned off"
        elif event['value_before'] < OFF_THRESHOLD and event['value_after'] > ON_THRESHOLD:
            change_type = "turned on"
        else:
            change_type = f"{direction} sharply"
        
        # --- INTELLIGENT MECHANISM EXPLANATION (THE CRITICAL FIX FOR PEAKS) ---
        effect_on_target = self.get_effect_direction(event['driver_node'], primary_node)
        mechanism = ""

        # CRITICAL: For peaks/drops, explain how the change CAUSED the peak/drop
        # A temperature PEAK is caused by something that INCREASES temperature
        # So if cooling DECREASED before the peak, that would cause temperature to RISE
        
        if direction == 'decreased':
            if effect_on_target == 'decrease': # e.g., DECREASING a COOLING system
                # Less cooling = More heating = Temperature RISES (causes a PEAK)
                mechanism = f"This action **removed a significant cooling influence** on the greenhouse climate. With the {event['driver_name']} no longer actively reducing the temperature, the remaining natural heat sources caused the temperature to rise to its peak."
            elif effect_on_target == 'increase': # e.g., DECREASING a HEATING system
                # Less heating = Less heat = Temperature FALLS (causes a DROP)
                mechanism = f"This action **removed a significant heating influence** on the greenhouse climate. With the {event['driver_name']} no longer actively adding heat, the natural cooling processes became dominant, causing the temperature to fall."
        
        elif direction == 'increased':
            if effect_on_target == 'decrease': # e.g., INCREASING a COOLING system
                # More cooling = Less heat = Temperature FALLS (causes a DROP)
                mechanism = f"This action **introduced a new cooling influence** into the greenhouse climate. The activation of the {event['driver_name']} began to actively reduce the temperature, counteracting the natural heat sources."
            elif effect_on_target == 'increase': # e.g., INCREASING a HEATING system
                # More heating = More heat = Temperature RISES (causes a PEAK)
                mechanism = f"This action **introduced a new heating influence** into the greenhouse climate. The activation of the {event['driver_name']} began to actively raise the temperature."

        if not mechanism: # Fallback
            mechanism = f"This change in the {event['driver_name']} system altered the balance of forces affecting the {self.node_to_name.get(primary_node)}."

        return {
            "primary_cause": {
                "name": event['driver_name'],
                "description": f"The **{event['driver_name']}** system **{change_type}** around {event['timestamp'].strftime('%H:%M')}, {self.format_control_change(event['value_before'], event['value_after'])}."
            },
            "mechanism_explanation": mechanism
        }

    def format_causal_trend_for_llm(self, analysis_result: Dict[str, Any]) -> str:
        """Formats the causal trend analysis for the LLM."""
        if 'error' in analysis_result:
            return f"Error during analysis: {analysis_result['error']}"

        context = [
            f"--- Direct Causal Analysis ---",
            f"\n**Primary Causal Event:** {analysis_result['primary_cause']['description']}",
            f"\n**Physical Mechanism:** {analysis_result['mechanism_explanation']}",
        ]
        return "\n".join(context)

    def format_system_event_for_llm(self, analysis_result: Dict[str, Any]) -> str:
        """Formats the system event analysis for the LLM."""
        if 'error' in analysis_result:
            return f"Error analyzing system event: {analysis_result['error']}"
        
        context = [
            f"--- Analysis of System-Wide Event: {analysis_result['event_type']} ---",
            f"\n**Primary Trigger:** {analysis_result['trigger']}",
            f"\n**Explanation:** {analysis_result['explanation']}",
            "\n**Supporting Evidence:**"
        ]
        for key, value in analysis_result.get('evidence', {}).items():
            context.append(f"- **{key}:** {value}")
            
        return "\n".join(context)

    def format_control_action_context_for_llm(self, analysis_results: Dict[str, Any]) -> str:
        """
        (FINAL, CORRECTED VERSION)
        Formats the final analysis result, correctly reading the top-level keys
        for the primary reason and supporting evidence.
        """
        if 'error' in analysis_results:
            return f"Error: {analysis_results['error']}"

        # --- Extract all information directly from the top level of the dictionary ---
        action_name = analysis_results.get('action_name', 'The control action')
        ts = analysis_results.get('timestamp')
        ts_str = ts.strftime('%H:%M on %Y-%m-%d') if isinstance(ts, pd.Timestamp) else 'N/A'
        
        conclusion = analysis_results.get('primary_reason', "Analysis could not determine a specific trigger.")
        driver_type = analysis_results.get('causal_driver_type', 'Unknown')
        
        # --- Build the context string ---
        context = [
            f"--- Causal Analysis for '{action_name}' action at {ts_str} ---",
            "\n**1. Primary Reason:**",
            f"- **Conclusion:** {conclusion}",
            f"- **Causal Driver Type:** {driver_type}"
        ]
        
        # --- Supporting Evidence ---
        # This now correctly looks for the top-level 'supporting_evidence' key
        evidence = analysis_results.get('supporting_evidence', {})
        if evidence:
            context.append("\n**2. Supporting Evidence & Context:**")
            for key, value in evidence.items():
                context.append(f"- **{key}:** {value}")
                
        # --- Observed Effects ---
        # This part was already correct.
        context.append("\n**3. Observed Effects (What happened next):**")
        effects = analysis_results.get('observed_effects', {})
        if effects.get('summary'):
            context.append(f"- {effects['summary']}")
        else:
            context.append("- No significant effects could be determined.")
                
        return "\n".join(context)

    
    def format_context_for_llm_query(self, context: Dict[str, Any]) -> str:
        """
        (REVISED)
        Formats the full context from analyze_context_at_timestamp into a clear,
        structured, and human-readable block of text for an LLM query.
        This version includes trend analysis for fluxes and rich formatting for correlations.
        """
        if 'error' in context:
            return f"Error during context analysis: {context['error']}"

        # --- 1. Extract Core Information Safely ---
        data_info = context.get('timestamp_data', {})
        ts_obj = data_info.get('timestamp')
        ts = ts_obj.strftime('%H:%M on %Y-%m-%d') if isinstance(ts_obj, pd.Timestamp) else 'N/A'
        
        query_term = data_info.get('target_variable_query', 'N/A')
        kg_node = data_info.get('kg_node', 'N/A')
        value = data_info.get('value')
        friendly_name = self.node_to_name.get(kg_node, query_term)
        unit = self.constraint_limits.get(kg_node, {}).get('unit', '')

        # Format value with unit
        value_str = f"{value:.2f} {unit}".strip() if isinstance(value, (int, float)) else str(value)

        # --- 2. Build the Context String ---
        # Start with a clear summary line
        llm_context = [
            f"--- Contextual Analysis for '{friendly_name}' at {ts} ---",
            f"The recorded value for **{friendly_name}** was **{value_str}**."
        ]

        # --- Constraint Status ---
        constraints_info_dict = context.get('constraints', {})
        if constraints_info_dict and constraints_info_dict.get('status') != 'No constraints defined':
            llm_context.append("\n**Constraint Status:**")
            # Use the high-level status and then list details
            llm_context.append(f"- **Overall:** {constraints_info_dict['status']}.")
            for detail in constraints_info_dict.get('details', []):
                llm_context.append(f"- **Detail:** {detail}")

        # --- Associated Fluxes ---
        flux_values_dict = context.get('flux_values')
        if flux_values_dict:
            llm_context.append(f"\n**Active Fluxes Influencing {friendly_name}:**")
            for flux_node, flux_info in flux_values_dict.items():
                flux_name = flux_info.get('name', flux_node)
                flux_val = flux_info.get('value')
                if pd.notna(flux_val):
                    llm_context.append(f"- **{flux_name}**: {flux_val:.3f}")

        # --- Local Correlations (NEW, RICHER FORMATTING) ---
        correlations_dict = context.get('correlations', {})
        if correlations_dict and 'top_correlations' in correlations_dict:
            llm_context.append("\n**Local Correlations (in the surrounding hour):**")
            top_corrs = correlations_dict['top_correlations']
            if top_corrs:
                for name, corr_val in top_corrs.items():
                    strength = "strong" if abs(corr_val) > 0.7 else "moderate" if abs(corr_val) > 0.4 else "weak"
                    direction = "positively" if corr_val > 0 else "negatively"
                    llm_context.append(f"- It was **{strength} {direction} correlated** with **{name}** (Correlation: {corr_val:.2f}).")
            else: # This case handles when the dictionary is present but empty
                llm_context.append("- No other variables were significantly correlated in this specific time window.")
        elif correlations_dict.get('summary'):
            # Fallback to the summary if 'top_correlations' key is missing
            llm_context.append(f"\n**Local Correlations:** {correlations_dict['summary']}")

        # --- Causal Links (PCMCI) ---
        causal_info = context.get('causal_effects', {}).get('pcmci')
        if causal_info:
            llm_context.append("\n**Potential Causal Links (from historical data analysis):**")
            llm_context.append(f"- {causal_info}")

        # --- Optimizer State Interpretation (NEW SECTION) ---
        optimizer_interp_dict = context.get('optimizer_interpretation')
        if optimizer_interp_dict:
            # Filter out stable interpretations before adding the header
            active_interpretations = {
                var: interp for var, interp in optimizer_interp_dict.items() if "Stable" not in interp
            }
            if active_interpretations:
                llm_context.append("\n**Optimizer State Interpretation (The Controller's Goal):**")
                for var, interp in active_interpretations.items():
                    llm_context.append(f"- **{self.node_to_name.get(var, var)}:** {interp}")


        # --- Knowledge Graph Relationships ---
        kg_rels = context.get('kg_relationships', {})
        incoming = kg_rels.get('incoming_edges', [])
        if incoming:
            llm_context.append(f"\n**Factors that typically INFLUENCE '{friendly_name}' (from Knowledge Graph):**")
            for edge in incoming[:3]: # Show top 3 for brevity
                source_name = edge.get('source_name', '?')
                description = edge.get('description', 'related')
                llm_context.append(f"- **{source_name}**: {description}")

        return "\n".join(llm_context)

    def format_range_context_for_llm(self, context: Dict) -> str:
        """Formats the time range analysis context for the LLM."""
        analysis = context.get('time_range_analysis', {})
        stats = analysis.get('summary_stats', {})
        
        if not analysis or not stats:
            return "No analysis data to format."
            
        query_term = analysis.get('target_variable_query')
        start_time = analysis.get('start_time').strftime('%H:%M')
        end_time = analysis.get('end_time').strftime('%H:%M')
        date = analysis.get('start_time').strftime('%Y-%m-%d')
        
        formatted_str = f"""
        Analysis of '{query_term}' between {start_time} and {end_time} on {date}:
        - Start Value ({start_time}): {stats.get('start_value'):.2f}
        - End Value ({end_time}): {stats.get('end_value'):.2f}
        - Overall Change: {stats.get('change'):.2f} ({stats.get('change_percent'):.2f}%)
        - Average Value: {stats.get('mean_value'):.2f}
        - Minimum Value: {stats.get('min_value'):.2f} at {stats.get('min_time').strftime('%H:%M')}
        - Maximum Value: {stats.get('max_value'):.2f} at {stats.get('max_time').strftime('%H:%M')}
        
        Based on this data, provide a concise summary of what happened to {query_term} during this period.
        """
        return formatted_str.strip()

    def format_profitability_for_llm(self, analysis_result: Dict[str, Any]) -> str:
        """
        Formats the profitability analysis result into a structured string for the LLM.
        """
        # Handle the case where the analysis itself returned an error
        if 'error' in analysis_result:
            return f"Error during profitability analysis: {analysis_result['error']}"

        # Start building the context string with a clear title
        context = [
            "--- Profitability Analysis: Cooling vs. Growth ---",
            f"\n**Conclusion:** {analysis_result.get('conclusion', 'Not available.')}",
            f"\n**Explanation:** {analysis_result.get('explanation', 'Not available.')}"
        ]

        # Add the evidence section, which is the most important part for the LLM
        evidence = analysis_result.get('evidence', {})
        if evidence:
            context.append("\n**Supporting Evidence:**")
            # Iterate through the key-value pairs in the evidence dictionary
            for key, value in evidence.items():
                # Format each piece of evidence as a clear, labeled bullet point
                context.append(f"- **{key}:** {value}")
        
        # Join all the parts into a single string
        return "\n".join(context)

    def format_control_efficiency_for_llm(self, analysis_result: Dict[str, Any]) -> str:
        """
        Formats the control efficiency analysis result into a structured string for the LLM.
        """
        # Handle the case where the analysis itself returned an error
        if 'error' in analysis_result:
            return f"Error during efficiency analysis: {analysis_result['error']}"

        # Start building the context string with a clear title
        context = [
            "--- Control Efficiency Analysis ---",
            f"\n**Conclusion:** {analysis_result.get('conclusion', 'Not available.')}",
            f"\n**Explanation:** {analysis_result.get('explanation', 'Not available.')}"
        ]

        # Add the evidence section, which is the most important part for the LLM
        evidence = analysis_result.get('evidence', {})
        if evidence:
            context.append("\n**Supporting Evidence:**")
            # Iterate through the key-value pairs in the evidence dictionary
            for key, value in evidence.items():
                # Format each piece of evidence as a clear, labeled bullet point
                context.append(f"- **{key}:** {value}")
        
        # Join all the parts into a single string
        return "\n".join(context)

    def format_obstacle_for_llm(self, analysis_result: Dict[str, Any]) -> str:
        if 'error' in analysis_result:
            return f"Error during analysis: {analysis_result['error']}"

        context = [f"--- Analysis of Physical Obstacle: {analysis_result['obstacle_name']} ---"]
        context.append(f"\n**Explanation:** {analysis_result['explanation']}")
        
        if analysis_result.get('evidence'):
            context.append("\n**Supporting Evidence:**")
            for key, value in analysis_result['evidence'].items():
                context.append(f"- **{key}:** {value}")
                
        return "\n".join(context)
    

    def format_daily_total_for_llm(self, analysis_result: Dict[str, Any]) -> str:
        """Formats the daily total analysis into a string for the LLM."""
        if 'error' in analysis_result:
            return f"Error during analysis: {analysis_result['error']}"

        context = [f"--- Daily Totals and Comparison Analysis for {analysis_result['date']} ---"]
        
        for node, data in analysis_result.get('variables', {}).items():
            if 'error' in data:
                context.append(f"- {node}: {data['error']}")
            else:
                display_value = data.get('total_MJ_per_m2_display', 'N/A')
                context.append(f"- Total Contribution from '{data['name']}': {display_value} MJ/m²")
                
        if analysis_result.get('comparison'):
            context.append(f"\n**Comparison Result:** {analysis_result['comparison']}")
            
        return "\n".join(context)

    def _derive_hca_reference_timestamp(self, resolved_info: Dict[str, Any]) -> Optional[datetime]:
        """Derives a single datetime to anchor HCA evidence for appendices."""
        date_obj = resolved_info.get('date')
        if not date_obj:
            return None

        if resolved_info.get('time'):
            return datetime.combine(date_obj, resolved_info['time'])

        start_time_obj = resolved_info.get('start_time')
        end_time_obj = resolved_info.get('end_time')

        if start_time_obj and end_time_obj:
            start_dt = datetime.combine(date_obj, start_time_obj)
            end_dt = datetime.combine(date_obj, end_time_obj)
            if end_dt <= start_dt:
                end_dt += timedelta(days=1)
            return start_dt + (end_dt - start_dt) / 2

        if start_time_obj:
            return datetime.combine(date_obj, start_time_obj)

        if end_time_obj:
            return datetime.combine(date_obj, end_time_obj)

        return datetime.combine(date_obj, time(12, 0))

    def _build_hca_appendix(self,
                            resolved_info: Dict[str, Any],
                            focus_node: Optional[str] = None,
                            focus_timestamp: Optional[datetime] = None,
                            focus_query_term: Optional[str] = None) -> str:
        """
        Builds a reusable appendix that layers physics, optimizer, and causal
        context for whichever node/time is being discussed.
        """
        if getattr(self, 'ablation_mode', AblationMode.FULL_HCA) == AblationMode.TEMPLATE_ONLY:
            return ""

        components = self._get_active_hca_components()
        if not any(components.values()):
            return ""

        node = focus_node or resolved_info.get('primary_node')
        if not node:
            target_nodes = resolved_info.get('target_nodes') or []
            node = target_nodes[0] if target_nodes else None
        if not node:
            return ""

        anchor_dt = focus_timestamp or self._derive_hca_reference_timestamp(resolved_info)
        if not anchor_dt:
            return ""

        query_term = focus_query_term or resolved_info.get('query_term') or self.node_to_name.get(node, node)
        try:
            context = self.analyze_context_at_timestamp(
                node,
                query_term,
                anchor_dt.strftime('%Y-%m-%d'),
                anchor_dt.strftime('%H:%M')
            )
        except Exception as exc:
            print(f"DEBUG - HCA appendix failed for node {node}: {exc}")
            return ""

        if isinstance(context, dict) and context.get('error'):
            print(f"DEBUG - HCA appendix unavailable: {context['error']}")
            return ""

        return self.format_context_for_llm_query(context)

    def respond_with_llm(self,
                         context: str,
                         original_query: str,
                         intent: str,
                         resolved_info: Dict[str, Any],
                         attach_hca: bool = True,
                         hca_focus_node: Optional[str] = None,
                         hca_focus_timestamp: Optional[datetime] = None,
                         hca_focus_query_term: Optional[str] = None) -> str:
        """
        Centralizes LLM invocation so every routed intent can optionally append
        physics- and causality-grounded evidence before prompting.
        """
        enriched_context = context
        if attach_hca:
            hca_appendix = self._build_hca_appendix(
                resolved_info,
                focus_node=hca_focus_node,
                focus_timestamp=hca_focus_timestamp,
                focus_query_term=hca_focus_query_term
            )
            if hca_appendix:
                enriched_context = (
                    f"{context}\n\n--- Physics & Causality Evidence (HCA Layer) ---\n{hca_appendix}"
                )

        return self.send_context_to_llm(enriched_context, original_query, intent)

    def send_context_to_llm(self, context: str, original_query: str, intent: str) -> str:
        """
        Selects a tailored prompt based on the query's intent and sends the
        context to the LLM for a structured, high-quality response.
        """
        print(f"\n--- Sending to LLM for Intent: {intent} ---")
        print("Original Query:", original_query)
        
    
        prompts = {
            'timestamp': f"""
                You are an expert greenhouse analyst. Your task is to provide a concise yet insightful summary about a specific data point, answering the user's query: "{original_query}"

                Here is the detailed context for the requested timestamp:
                {context}

                **Instructions for your response:**
                1.  **Direct Answer:** Start by directly stating the value of the variable at the specified time, including its unit (e.g., "At 10:00, the Temperature was 18.57°C.").
                2.  **Constraint Context:** Briefly mention its status relative to its limits (e.g., "...which was stable and within its optimal range."). Use the "Constraint Status" section for this.
                3.  **Explain the Dynamics (The "Why"):**
                    -   Look at the "Active Fluxes" section. These are the physical processes happening *at that moment*.
                    -   Describe the main forces pushing the value up (positive fluxes) and down (negative fluxes).
                    -   **Use plain language.** Instead of "Solar Heat Flow", say "heat from the sun". Instead of "Transpiration Heat Flow", say "cooling from plant transpiration".
                4.  **Add Predictive Insight (Optional):**
                    -   Look at the "Local Correlations" and "Potential Causal Links".
                    -   If there is a strong correlation or a clear causal link (especially with a disturbance like solar radiation or outside temperature), mention it briefly to add context. For example: "This was happening while the outside temperature was also rising, which are closely linked." or "Historically, we see that solar radiation at this level leads to a temperature increase a few hours later."
                5.  **Be Concise:** Combine these points into a smooth, easy-to-read paragraph. Do not just list the data. Synthesize it.

                    Answer:
                    """,
            'explain_profitability': f"""
                    You are an expert greenhouse manager and economist, explaining a complex but profitable control strategy to a manager concerned about costs.

                    User's Query: "{original_query}"

                    Here is the deep-dive analysis of the strategy:
                    {context}

                    **Instructions for your response:**
                    1.  **State the Conclusion First:** Begin with the "Conclusion" directly (e.g., "Yes, this is a highly profitable strategy.").
                    2.  **Address the Misconception:** Use the "Explanation" to directly address the user's concern that the systems are "fighting each other." Clarify that they are performing independent, necessary tasks in response to the same opportunity.
                    3.  **Tell the Causal Story:** Weave the "Supporting Evidence" into a clear narrative.
                        -   Start with "The Opportunity" (intense sun).
                        -   Explain "Necessary Task 1" (cooling to prevent damage).
                        -   Explain "Necessary Task 2" (CO₂ to fuel growth).
                    4.  **Provide the Proof:** Use the "Proof of Profitability" evidence (the biomass gain) as the definitive data point that proves the strategy worked and was financially sound.
                    5.  **Synthesize:** Combine these points into a confident, easy-to-understand paragraph that reassures the manager of the controller's intelligence.

                    Answer:
                    """,

            'analyze_daily_total': f"""
                    You are an expert greenhouse analyst explaining the strategic implications of different energy fluxes to an operator. Your goal is to provide a clear, insightful answer to the user's query: "{original_query}"

                    Here is the quantitative analysis of the daily totals:
                    {context}

                    **Instructions for your response:**
                    1.  **Frame the Narrative:** Your explanation MUST revolve around the strategic concepts of **"free, natural" processes versus "active, energy-consuming" system actions**. Use these exact phrases.

                    2.  **Quantify the Contributions:**
                        -   Clearly state the total contribution of the natural process (e.g., 'free' cooling from plant transpiration), providing its calculated value in MJ/m².
                        -   Clearly state the total contribution of the active system (e.g., active cooling), providing its calculated value in MJ/m².
                        -   If a value is zero or very small (e.g., 0.00), describe it as "negligible" or state that the system "was not used".

                    3.  **State the Comparison Conclusion:** Use the "Comparison Result" from the context to state which was more significant and by how much. Use the human-friendly text provided (e.g., "overwhelmingly larger").

                    4.  **Provide the Strategic Interpretation (Most Important Part):**
                        -   Explain **WHY** this imbalance is intentional and efficient.
                        -   Describe the natural process as the **"primary tool"** or the one that handles the **"bulk heat load"**.
                        -   Describe the active system as a **"precise trimming tool"** or a **"safety mechanism"** that is used *only when necessary* to keep the climate within its critical limits.
                        -   Conclude by explaining that this strategy is highly efficient, minimizes energy costs, and is a sign of a well-designed system.

                    5.  **Synthesize:** Weave all these points into a concise, confident, and easy-to-understand narrative. Do not just list the data; explain its meaning.

                    Answer:
                    """,
            'pattern': f"""
                    You are an analyst explaining a data pattern over a period of time. Your task is to answer the user's query: "{original_query}"

                    Here is the detailed analysis of the pattern:
                    {context}

                    **Instructions:**
                    1.  **Summarize the Overall Pattern:** Start by describing the main trend (e.g., "The temperature steadily increased throughout the morning," or "There was a sharp, unexpected drop in humidity.").
                    2.  **Explain the Root Causes:**
                        - Look at the "Flux Analysis" section. These are the factors causing the change.
                        - For each significant flux, explain its contribution in simple terms. **DO NOT use technical flux names (e.g., H_ref, Q_sun)**. Instead, say "Heating from the system contributed..." or "Sunlight provided a significant amount of heat..."
                    3.  **Reference Key Data:** Mention the start/end values and any min/max points to support your explanation.
                    4.  **Be Strict with Data:** Stick strictly to the values and factors provided in the context. Do not invent reasons or offer general advice.

                    Answer:
                    """,
            'evaluate_trade_off': f"""
                    You are an expert greenhouse analyst, evaluating the quality of a strategic decision made by an advanced controller.

                    User's Query: "{original_query}"

                    Here is the deep-dive analysis of the controller's strategy:
                    {context}

                    **Instructions for your response:**
                    1.  **State the Conclusion First:** Begin by directly stating the main conclusion (e.g., "Yes, this was an acceptable and deliberate trade-off...").
                    2.  **Explain the Primary Goal:** Use the "Controller's Primary Goal" evidence to explain that the main objective was maximizing biomass. Emphasize that a negative Biomass Lagrangian is the proof of this goal.
                    3.  **Explain the Enabling Conditions:** Use the "Favorable Conditions" evidence to explain *why* this strategy was viable (e.g., high sunlight).
                    4.  **Explain the "Cost" or Trade-Off:** Use the "The Necessary 'Cost'" evidence to explain that hitting the CO₂ limit was a *consequence* of aggressively pursuing the biomass goal, not a mistake. Explain that this is a sign of a well-tuned system operating at its peak.
                    5.  **Synthesize:** Weave these points into a clear, confident, and insightful paragraph that explains the controller's sophisticated, profit-driven behavior.

                    Answer:
                    """,
            'analyze_disease_risk': f"""
                    You are a senior agronomist and greenhouse manager, reassuring a concerned grower about disease risk. Your tone should be expert, confident, and educational.

                    User's Query: "{original_query}"

                    Here is the strategic analysis of both the daytime risk and the controller's nighttime response:
                    {{context}}

                    **Instructions for your response:**

                    1.  **Acknowledge and Validate:** Start by acknowledging the user's concern. State that they are right to be observant, as the high daytime humidity is indeed a risk factor. Use the "Daytime Risk Factor" evidence.

                    2.  **Pivot to the Solution (The "However"):** Immediately pivot to the controller's response. This is the most important part. Use a transition like "However, the controller correctly identified this risk and executed a textbook nighttime disease prevention strategy."

                    3.  **Explain the Mitigation Strategy:**
                        -   Describe the **"Controller Action"**: Explain that the controller forcefully lowered the humidity setpoint after sunset.
                        -   Explain the **"Physical Mechanism"**: Describe how this was achieved (e.g., "This was done efficiently using passive condensation...").

                    4.  **State the Final Verdict:** Conclude by explaining that this proactive measure is a sign of a well-functioning, intelligent system. Reassure the user that while the daytime conditions created a risk, the system's automated nighttime strategy effectively mitigated it, ensuring long-term crop health.

                    5.  **Synthesize:** Weave all these points into a single, cohesive, and easy-to-understand paragraph. Do not just list the data; tell the story of "Problem Detected -> Solution Executed."
                    """,
            'explain_trade_off': f"""
                    You are an expert control systems analyst and greenhouse physicist, explaining a sophisticated, multi-objective control decision to an operator. Your answer must be insightful and clear.

                    User's Query: "{original_query}"

                    Here is the deep-dive analysis comparing the controller's effort on two different climate goals:
                    {context}

                    **Instructions for your response:**

                    1.  **State the Definitive Answer First:** Begin by directly and confidently stating which objective was the primary purpose of the control action.

                    2.  **Provide the "Smoking Gun" Evidence:** Explain that the definitive proof lies in the optimizer's internal state (the Lagrangian multipliers).
                        -   Use the provided high Lagrangian value to prove which constraint the controller was actively fighting (e.g., "the temperature Lagrangian was strongly positive, proving the system was fighting to keep the temperature from exceeding its maximum limit.").
                        -   Use the provided near-zero Lagrangian value to prove that the other objective was not a critical concern at that moment.

                    3.  **Explain the Strategic Narrative (The MOST Important Part):**
                        -   Acknowledge that the physical action (in this case, active cooling) has **multiple effects** (it reduces both temperature and humidity).
                        -   Frame the less critical effect as a **"beneficial side-effect"** or a **"free service"**.
                        -   Synthesize these points into a narrative that highlights the controller's efficiency. Explain that it solved the most critical problem (e.g., preventing overheating) and received the other benefit (dehumidification) as a valuable bonus.

                    4.  **Be Concise and Authoritative:** Weave these points into a single, cohesive, and easy-to-understand paragraph. Do not just list the data; explain its strategic meaning.

                    Answer:
                    """,     
            'find_and_explain_reaction': f"""
                    You are an expert greenhouse analyst explaining how a smart controller reacts to a sudden environmental change. Your task is to provide a clear, causal explanation in response to the user's query: "{original_query}"

                    Here is the analysis of the event and the system's reaction:
                    {context}

                    **Instructions for your response:**
                    1.  **State the Event:** Begin by clearly stating the event that was found. Use the "Event Description" from the context, including the time and the specific values.
                    2.  **Structure the Reactions:** Create a separate, clear section for each of the reacting systems (e.g., "CO₂ Injection Reaction", "Active Cooling Reaction").
                    3.  **For Each Reaction, Explain "What" and "Why":**
                        -   **What Happened:** Describe the reaction quantitatively. State how the variable's value changed (e.g., "The CO₂ injection setpoint was immediately cut from ~X to ~Y."). If a value is zero or very small (e.g., 0.00), describe it as "negligible" or state that the system "was not used".
                        -   **Why It Happened:** Provide the strategic reason for the reaction. Use your built-in knowledge and the context to explain the logic (e.g., "This is a resource-saving strategy because with less light, the plants cannot use the CO₂..."). Connect the reaction back to the triggering event.
                    4.  **Conclude with a Synthesis:** End with a concluding paragraph that summarizes the controller's overall intelligence, highlighting its ability to react predictively to the root cause (the disturbance) to save resources and energy.

                    Answer:
                    """,    
            'explain_obstacle': f"""
                    You are a physics and engineering expert explaining a complex dynamic in a greenhouse to an operator. Your answer should be clear, intuitive, and use analogies. Your task is to answer the user's query: "{original_query}"

                    Here is the deep physical analysis of the situation:
                    {context}

                    **Instructions for your response:**
                    1.  **State the Obstacle:** Start by naming the primary obstacle using the title provided (e.g., "The 'Wet Sponge' Effect of Warm Air").
                    2.  **Explain with an Analogy:** Use the provided explanation, but feel free to enhance it with analogies (like the 'wet sponge' concept) to make it easy to understand.
                    3.  **Break Down the Physics:** Clearly explain the step-by-step physical processes at play. Use the "Supporting Evidence" to provide concrete data points.
                    4.  **Describe the Solution:** Explain what physical process the controller was relying on to eventually overcome the obstacle (e.g., "slow, passive condensation").
                    5.  **Synthesize:** Conclude by summarizing why the observed lag is a normal and expected outcome of the underlying physics, not a system failure.

                    Answer:
                    """,
            'summarize_day_strategy': f"""
                    You are an expert agronomist and greenhouse manager explaining a complex control strategy. Your tone should be confident, reassuring, and educational.

                    User's Query: "{original_query}"

                    Here is the detailed analysis of the day's strategy, including growth performance and risk management:
                    {context}

                    **Instructions for your response:**
                    1.  **Check for "User Concern":** If the context includes a "User Concern" section, your answer MUST start by directly acknowledging it (e.g., "That's an excellent question, and your concern about botrytis is valid.").
                    2.  **Explain the Trade-Off:** Weave the provided points into a clear narrative about a calculated trade-off.
                        -   Start with the **"Growth Rationale."** Explain *why* the risky condition (e.g., high humidity) is beneficial for the plants.
                        -   Then, pivot to the solution. Use a transition like "However, the controller is programmed to manage this risk..."
                        -   Describe the **"Risk Mitigation Strategy,"** explaining both the daytime management and the critical nighttime prevention steps.
                    3.  **Synthesize, Don't List:** Do not just list the data points. Tell a cohesive story of "Goal -> Calculated Risk -> Mitigation -> Success."
                    4.  **If there is no "User Concern" section,** provide a general summary based on the "Overall Growth Performance," "Cost-Benefit Analysis," and "Controller's Overall Strategy" sections.

                    Answer:
                    """,
            'explain_causal_trend': f"""
                    You are an expert greenhouse analyst explaining a root cause. Your task is to answer the user's query: "{original_query}"

                    Here is the direct causal analysis identifying the single most important event:
                    {context}

                    **Instructions for your response:**
                    1.  **State the Primary Cause First:** Begin by directly stating the "Primary Causal Event". This is the definitive answer.
                    2.  **Explain the "How":** Use the "Physical Mechanism" to explain how this cause led to the observed trend.
                    3.  **Be Concise and Confident:** Synthesize these two points into a short, clear, and direct causal explanation. Do not mention statistics or correlations unless they are part of the provided mechanism.

                        Answer:
                        """,

            'explain_oscillation': f"""
                        You are an expert control systems analyst interpreting complex controller behavior for a greenhouse operator. Your answer must be insightful, using clear analogies. Your task is to answer the user's query: "{original_query}"

                        Here is the deep-dive analysis of the volatility:
                        {context}

                        **Instructions for your response:**
                        1.  **Explain the Target Variable's Meaning:** Start by explaining what the noisy variable (e.g., `Ceq_lg`) represents in simple terms. Describe it as the controller's "effort" or "shadow price." Explain that a noisy signal means the controller is working extremely hard and making constant, rapid adjustments.

                        2.  **Identify the "Battle of Forces":** Use the "Contributing Physical Forces" from the context. Frame your explanation as a battle between these unpredictable forces (e.g., volatile photosynthesis vs. volatile ventilation).

                        3.  **Explain Each Force's Instability:** For each contributing factor that was found to be volatile, explain *why* it was unstable, using the "Root Cause" information. For example, "Photosynthesis was unstable because it's driven by solar radiation, which was jagged and unpredictable due to clouds."

                        4.  **Describe the Controller's Behavior:** Use an analogy. Describe the controller's performance as a "skilled pilot flying through severe turbulence." The passenger (the actual CO2 level) feels a relatively smooth ride, but the pilot's actions (the Lagrangian) are frenetic.

                        5.  **State the Final Conclusion:** Conclude by stating that the oscillation is not a sign of a faulty controller, but rather a sign of a **masterful control performance under extremely difficult conditions**.

                        Answer:
                        """,

            'explain_system_event': f"""
                        You are an expert greenhouse analyst explaining a major, coordinated change in the greenhouse's operational strategy (a 'regime change'). Your task is to answer the user's query: "{original_query}"

                        Here is the deep-dive analysis of the system-wide event:
                        {context}

                        **Instructions for your response:**
                        1.  **Identify the Event:** Start by clearly stating the name of the system-wide event (e.g., "This coordinated drop in setpoints was a planned 'Day-to-Night Regime Change.'").
                        2.  **State the Primary Trigger:** Clearly state what caused the event, using the "Primary Trigger" from the analysis.
                        3.  **Explain the Rationale:** Synthesize the "Explanation" and "Economic Rationale" from the analysis into a clear, easy-to-understand paragraph. Explain *why* this change in strategy makes sense from an operational and economic perspective.
                        4.  **Use the Evidence:** Reference the specific "Trigger Data" to support your explanation.
                        5.  **Be Definitive:** Present the information as a confident analysis, not a guess. The system has already determined the cause.

                        Answer:
                        """,
            'explain_gradual_trend': f"""
                        You are a greenhouse physicist explaining the root cause of a slow, steady climate trend.

                        User's Query: "{original_query}"

                        Here is the analysis of the dominant physical forces over that period:
                        {context}

                        **Instructions for your response:**
                        1.  **Start with the "Net Effect Explanation":** Begin by directly stating that the trend was caused by heating forces being stronger than cooling forces.
                        2.  **Identify the Main Driver:** Clearly state the "Primary Heating Force" and its average contribution. Use simple language (e.g., "The main source of heat was from the sun...").
                        3.  **Identify the Counteracting Force:** State the "Primary Cooling Force" and its contribution, explaining that it was not strong enough to overcome the heating.
                        4.  **Synthesize:** Weave these points into a single, clear, and concise paragraph that explains the simple physics of why the temperature gradually increased. Do not mention the minor wiggles or "sudden events."

                        Answer:
                        """,
            'explain_sudden_event': f"""
                        You are a greenhouse analyst explaining the cause of a specific, sudden event.

                        User's Query: "{original_query}"

                        Here is the analysis of the most significant event in that time range:
                        {context}

                        **Instructions for your response:**
                        1.  **Focus ONLY on the "Most Significant Event":** Start by stating what happened and when (e.g., "The sharp drop you observed happened around 14:50...").
                        2.  **Explain the Immediate Cause:** Use the "Primary Causes at that Moment" to explain the physical forces that were dominant at that precise time.
                        3.  **Be Direct and Causal:** Create a clear, cause-and-effect explanation. For example: "This drop was caused by the cooling from plant transpiration suddenly becoming much stronger than the heat from the sun."
                        4.  **Do NOT describe the overall trend** or other minor events. Focus exclusively on explaining the single most important spike or drop.

                        Answer:
                        """,
            'explain_anomaly': f"""
                        You are an expert greenhouse analyst explaining a sophisticated, predictive control strategy to an operator. Your task is to answer the user's query: "{original_query}"

                        Here is the deep-dive analysis of the unusual event:

                        **Instructions for your response:**
                        1.  **State the Conclusion First:** Begin by directly stating the main explanation. Use the "Explanation" field from the context.
                        2.  **Present the Evidence:** Clearly present the supporting evidence in bullet points or a short, clear paragraph. Use the "Supporting Evidence" fields provided in the context.
                        3.  **Synthesize, Don't Just List:** Weave the evidence into a cohesive narrative that tells the story of what the controller did and why.
                        4.  **Emphasize the "Smart" Aspect:** Conclude by highlighting that this was not a malfunction, but a sign of an advanced, cost-optimizing controller. Use the "Conclusion" field from the context to help with this.
                        5.  **Be Definitive and Clear:** Present the information as a confident analysis.

                            Answer:
                            """, 
            'evaluate_setpoint_strategy': f"""
                        You are a senior greenhouse manager and agronomist, evaluating a specific control strategy. Your task is to provide a clear, confident, and insightful answer to the user's query: "{original_query}"

                        Here is the deep-dive analysis of the strategy:
                        {context}

                        **Instructions for your response:**

                        1.  **State the Verdict First:** Begin by directly stating the "Verdict" from the context (e.g., "Yes, the high CO₂ level was a deliberate and highly effective growth strategy.").
                        2.  **Tell the Strategic Story:** Do not just list the evidence. Weave the "Supporting Evidence" points into a cohesive narrative that explains the *why* behind the strategy.
                            -   Start with the primary goal (e.g., "The overarching goal was to maximize profit...").
                            -   Explain the conditions that made it possible (e.g., "...which was viable due to ample sunlight...").
                            -   Provide the definitive proof that the strategy was being executed (e.g., "...the proof is that the plants were consuming CO₂ at their maximum rate...").
                            -   Mention the optimizer's state as confirmation of the strategy's aggressiveness or stability.
                        3.  **Be Authoritative and Clear:** Your tone should be that of an expert explaining a complex but intentional process. Synthesize the information into a single, easy-to-understand paragraph.
                        Answer:
                        """,
            'explain_causal_anomaly': f"""
                    You are a plant physiologist and greenhouse expert, explaining a counter-intuitive biological process.

                    User's Query: "{original_query}"

                    Here is the deep-dive analysis of the event:
                    {context}

                    **Instructions:**
                    1.  **State the Conclusion First:** Start by directly answering the user's question, stating that it is *not* a sign of stress but a sign of success.
                    2.  **Explain the "Why":** Use the "Explanation" and "Evidence" to tell the causal story. Explain that high humidity makes it harder for plants to transpire.
                    3.  **Use an Analogy:** Frame the explanation in simple terms. Your drafted answer's analogy of "don't need to 'sweat' as much" is perfect.
                    4.  **Be Reassuring and Authoritative:** Your tone should be that of an expert reassuring a concerned grower that the system is behaving as expected.
                    """,
            'explain_anomaly_fault': f"""
                    You are a senior control systems engineer providing a clear, concise diagnostic report about a system fault. Your task is to explain the anomaly mentioned in the user's query: "{original_query}"

                    Here is the diagnostic report from the analysis system:
                    {context}

                    **Instructions for your response:**
                    1.  **State the Diagnosis Clearly:** Begin by directly stating the "Diagnosis" from the context. Use definitive language (e.g., "This event was caused by a system fault," not "it might have been a fault").
                    2.  **Explain the Causal Chain:** Use the "Causal Chain of Evidence" to explain what happened in a step-by-step, cause-and-effect manner. This is the most important part.
                        -   Start with the "Root Cause" (e.g., the Lagrangian signal).
                        -   Describe the "Physical Consequence" (e.g., the heat flux).
                        -   Explain the "Impact on Climate" (e.g., the temperature spike).
                        -   Mention any "Secondary Impacts" (e.g., the biological response).
                    3.  **Use Simple Analogies:** Translate technical terms into simple analogies. For example, describe the Lagrangian spike as "the optimizer sending a maximum-effort distress signal" or the control saturation as "the gas pedal being stuck to the floor."
                    4.  **Do NOT Justify the Action:** Your tone should be diagnostic, not defensive. Explain what went wrong and why. Do not try to frame this as a clever or planned event.
                    5.  **Synthesize:** Weave these points into a clear, easy-to-understand incident report for an operator or manager.

                        Answer:
                        """,
            'explain_lagrangian_state': f"""
                    You are an expert in Model Predictive Control theory, providing a definitive explanation. Your task is to answer the user's query: "{original_query}"

                    Here is the precise analysis and interpretation of the signal in question:
                    {context}

                    **Instructions for your response:**
                    1.  **Synthesize the "Implication" text provided above into a clear, confident answer.** This text contains the complete and correct explanation.
                    2.  **Do NOT mention the concept of a signal being "flat and near-zero" unless that phrase is explicitly part of the provided "Implication" text.** Your task is to explain the specific behavior that was observed, not other general concepts.
                    3.  Your tone should be authoritative, as if you are explaining a fundamental principle from a textbook.

                    Answer:
                    """,

            'explain_control_action': f"""
                    You are an expert system analyst explaining a multi-objective automated control action.
                    Your task is to synthesize ALL the contributing factors into a single, cohesive explanation.

                    Original Query: "{original_query}"

                    Here is the comprehensive analysis of the control action:
                    {context}

                    **Instructions for your response:**
                    Your answer MUST be structured in three parts.

                    **Part 1: The Reason (Why was the action taken?)**
                    -   Start by stating the **"Primary Reason"** from the context.
                    -   Weave in the **"Supporting Evidence"** to explain the full strategic context.
                        -   **CRITICAL:** If "Side-Effect Management" is part of the evidence, you MUST include it in your explanation of the strategy's effectiveness.

                    **Part 2: The Observed Effect (What happened right after?)**
                    -   Describe the outcome for the primary goal using the "Primary Effect" from the "Observed Effects" section.
                    -   If "Side-Effect Management" is present, describe the observed effect on that variable as well.

                    **Part 3: The Typical Outcome (Is this normal?)**
                    -   Conclude by confirming if this multi-faceted action is a standard procedure for an advanced controller designed to balance multiple goals.
                    """,
            'explain_kickstart_maneuver': f"""
                    You are a senior control systems engineer explaining a sophisticated, counter-intuitive energy-saving strategy to a greenhouse operator who is concerned about waste.

                    User's Query: "{original_query}"

                    Here is the deep-dive analysis of the coordinated control action:
                    {context}

                    **Instructions for your response:**

                    1.  **Directly Address the Misconception:** Start by acknowledging the user's concern and stating that the action, while seemingly wasteful, was actually an intelligent **energy-saving maneuver**. Use these exact words.
                    2.  **State the Primary Goal:** Use "The Goal" evidence to explain that the main objective was to rapidly warm the greenhouse for the day.
                    3.  **Reveal the "Secret":** Use "The Opportunity" evidence to explain the core insight – that the controller identified the warmer outside air as a source of **"free heat."**
                    4.  **Explain the Coordinated Action:** Describe how the controller used ventilation to *import* this free heat and the mechanical heater only as a *supplement* to speed things up.
                    5.  **Summarize the Net Effect:** Conclude by stating that this coordinated action was the most cost-effective way to achieve the morning warm-up goal.
                    6.  **Synthesize:** Weave all these points into a clear, confident, and insightful narrative that directly answers the user's "why" question.
                    """,
            'explain_net_effect': f"""
                    You are an expert system analyst, explaining how two competing physical forces balance each other out and affect the overall system state.

                    User's Query: "{original_query}"

                    Here is the deep-dive analysis of the competing fluxes:
                    {context}

                    **Instructions for your response:**
                    1.  **State the Conclusion First:** Begin by directly stating the main conclusion about the equilibrium between the forces.
                    2.  **Quantify the Competing Forces:** For each force mentioned in the "Competing Forces" evidence, state its name, its effect (heating/cooling), and its average value.
                    3.  **State the Net Effect:** Clearly state the calculated "Net Effect" value.
                    4.  **Explain the System's Response:** Explain how this net effect influenced the controller's action, using the "System Response" evidence. Emphasize that this demonstrates an efficient, cost-saving strategy.
                    5.  **Synthesize:** Weave these points into a clear and insightful narrative.

                    Answer:
                    """,    
            'analyze_model_discrepancy': """
                    You are a senior control systems engineer diagnosing the performance of an MPC controller's internal model. Your task is to provide a clear, evidence-based answer to the user's query: "{original_query}"

                    Here is the diagnostic analysis comparing the controller's plan to physical reality:
                    Finding: {finding}

                    Explanation: {explanation}

                    Evidence: {evidence}

                    **Instructions for your response:**
                    1.  **State the Finding First:** Begin with a clear and direct answer, using the "Finding:" provided above.
                    2.  **Explain the Discrepancy:** Synthesize the "Explanation:" provided above into a clear narrative.
                    3.  **Present the Evidence (The Three Pillars):** Structure your proof into three clear points, using the data from the "Evidence:" JSON block above.
                        -   The Controller's Plan
                        -   The Physical Reality
                        -   The 'Effort' Signal (Lagrangian)
                    4.  **Conclude with the "Why":** Conclude by explaining *why* this mismatch likely occurred (e.g., higher-than-expected wind).

                    Answer:
                    """,
            'analyze_volatility': f"""
                    You are a senior control systems engineer evaluating controller performance under stress. Your tone should be confident and definitive.

                    User's Query: "{original_query}"

                    Here is the deep-dive analysis of the system's volatility:
                    {context}

                    **Instructions for your response:**

                    1.  **State the Verdict First:** Begin by directly stating the "Conclusion" from the context (e.g., "The controller's performance was excellent...").
                    2.  **Identify the Root Cause:** Clearly state the "External Driver of Instability" from the evidence. Explain that the problem was external, not internal.
                    3.  **Re-frame the Controller's Actions:** Explain the "Controller's Response" from the evidence. It is CRITICAL that you frame the jagged control signal not as a problem, but as **proof of the controller's high-speed, precise work**.
                    4.  **Deliver the Final Performance Review:** Use the "Performance Verdict" to conclude that maintaining stability in the face of chaos is the mark of a well-tuned, high-performance system.
                    5.  **Synthesize:** Weave these points into a single, cohesive, and insightful paragraph.
                    Answer:
                    """,
            'find_and_analyze_event': f"""
                    You are an expert system analyst, summarizing the cause and effect of a peak event.

                    User's Query: "{original_query}"

                    Here is the analysis of the event:
                    {context}

                    **Instructions for your response:**
                    1.  **State the Finding:** Begin by directly stating the main conclusion, including the time of the event and the peak value.
                    2.  **Evaluate Performance:** State the "Performance Evaluation" conclusion clearly.
                    3.  **Explain the Contributing Factors:** In a clear, narrative style, explain the factors from the evidence. Describe the primary heat source and how it was counteracted by natural cooling.
                    4.  **Describe the Controller's State:** Mention the "Controller State" evidence to prove that the system was working hard to manage the situation.
                    5.  **Synthesize:** Weave all points into a concise, insightful paragraph.

                    Answer:
                    """,
            'evaluate_control_strategy': f"""
                    You are an expert greenhouse analyst, evaluating the performance of a control strategy.

                    User's Query: "{original_query}"

                    Here is the deep-dive analysis of the strategy's effectiveness and side-effects:
                    {context}

                    **Instructions for your response:**
                    1.  **State the Conclusion First:** Begin by directly stating the main conclusion about the strategy's effectiveness.
                    2.  **Explain the Primary Effect:** Use the "Effectiveness on Primary Goal" evidence to describe what the controller did and how well it achieved its main objective. Use the quantitative data.
                    3.  **Explain the Side-Effect Management:** If available, use the "Side-Effect Management" evidence to explain how the controller handled any negative consequences. Highlight the controller's sophistication in balancing multiple factors.
                    4.  **Synthesize:** Weave these points into a clear, confident, and easy-to-understand evaluation for an operator.

                    Answer:
                    """,
            'explain_disturbance_significance': f"""
                    You are an expert greenhouse analyst providing a definitive causal explanation.

                    User's Query: "{original_query}"

                    Here is the deep-dive analysis for the requested timestamp:
                    {context}

                    **Instructions for your response:**
                    1.  **State the Value:** Begin by stating the value of the disturbance variable.
                    2.  **Declare its Significance:** Immediately state "Yes, this was a highly significant factor..."
                    3.  **Provide the Definitive Proof:** The most crucial piece of evidence is the **"Optimizer State Interpretation"**. This section tells you exactly what the controller was trying to do. Use this as your primary proof. For example: "The definitive proof is that the controller was applying maximum effort to prevent the internal temperature from violating its upper limit."
                    4.  **Connect the Dots:** Explain the logical link from the KG. For example: "This was necessary because the high outside temperature was causing a major heat load on the greenhouse."
                    5.  **Synthesize:** Combine these points into a clear, concise, and powerful explanation. **Focus only on the constraints that the optimizer interpretation says are active.** Do not mention stable constraints (like CO2 or Humidity if they are not listed as active).

                    Answer:
                    """,
            'explain_strategy': f"""
                    You are an expert greenhouse analyst, summarizing a complex, day-long operational strategy.

                    Original User Query: "{original_query}"

                    Here is a synthesized analysis of the day vs. night strategy:
                    {context}

                    **Instructions:**
                    1.  Synthesize the provided analysis into a single, cohesive paragraph.
                    2.  Start by stating the core finding: the controller uses a distinct two-part strategy for day and night.
                    3.  Describe the daytime strategy: why it allows high humidity and how it's managed.
                    4.  Describe the nighttime strategy: why it forces low humidity and what natural processes help achieve this.
                    5.  Conclude by explaining that this dual-strategy is a sophisticated approach to balance crop growth with crop health.

                    Answer:
                    """,
            'correlation': f"""
                    You are a greenhouse physicist explaining a physical relationship to an operator. Your answer must be clear, concise, and causally correct.

                    User's Query: "{original_query}"

                    Here is the analysis of the relationship:
                    {context}

                    **Instructions for your response:**
                    1.  **State the Statistical Finding:** Begin by directly stating the "Statistical Finding" from the context (e.g., "There is a strong positive relationship...").

                    2.  **Explain the Physical Causality (Most Important Part):**
                        -   Use the "Physical Link" evidence to identify the **cause** and the **effect**.
                        -   If the link is `A -> B`, you MUST state that **"A causes B"**.
                        -   **CRITICAL:** You must frame the explanation according to this known causal direction. For example, "This is because the **Outside Temperature** is the primary driver of heat exchange. As the outside air gets warmer, the temperature difference between inside and outside changes, which directly affects the rate of heat flow through the cover."

                    3.  **Explain the "Why":** Explain *why* the correlation has the direction it does, based on the physical causality. For a positive correlation: "This positive correlation is expected because as the cause (Outside Temperature) increases, the effect (Heat Exchange) also increases."

                    4.  **Synthesize, Don't Invent:** Weave these points into a single, confident paragraph. **Do not invent backward causal links.** Stick strictly to the physical relationship provided.

                    Answer:
                    """,
        }

        # Default to a generic prompt if intent is unknown, though this should be rare.
        default_prompt = f"""
        Please provide a clear and concise answer to the user's query: '{original_query}' using the following context.
        Context: {context}
        Answer:
        """

        # Select the appropriate prompt, or use the default
        prompt_template = prompts.get(intent, default_prompt)
        
        # Fill the template with the provided context
        if intent == 'analyze_model_discrepancy':
            if isinstance(context, dict):
                final_prompt = prompt_template.format(
                    original_query=original_query,
                    finding=context.get('finding', 'Not available.'),
                    explanation=context.get('explanation', 'Not available.'),
                    evidence=json.dumps(context.get('evidence', {}), indent=2)
                )
            else:
                # If not dict, use context as explanation
                final_prompt = prompt_template.format(
                    original_query=original_query,
                    finding='Not available.',
                    explanation=str(context),
                    evidence='{}'
                )
        elif intent == 'explain_anomaly':
            if isinstance(context, dict):
                time_range_analysis = context.get('time_range_analysis', {})
                final_prompt = prompt_template.format(
                    time_range_analysis=time_range_analysis,
                    original_query=original_query
                )
            else:
                # If not dict, use context as time_range_analysis
                final_prompt = prompt_template.format(
                    time_range_analysis=str(context),
                    original_query=original_query
                )
        else:
            # For all other intents, the context is a simple string
            final_prompt = prompt_template.format(context=str(context), original_query=original_query)
        try:
            # Get API key from environment variable for security
            #api_key = os.getenv('OPENAI_API_KEY')
            #if not api_key:
                #return "Error: OPENAI_API_KEY environment variable not set. Please set it before running the chatbot."

            client = OpenAI(api_key='sk-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX')  # Replace with your actual API key


            messages_payload = [
                ChatCompletionSystemMessageParam(
                    role="system",
                    content="You are a knowledgeable and precise greenhouse management assistant. You interpret technical data and translate it into clear, practical insights for a greenhouse operator. Stick strictly to the data and instructions provided."
                ),
                ChatCompletionUserMessageParam(
                    role="user",
                    content=final_prompt
                )
            ]

            response = client.chat.completions.create(
                model="gpt-4o",
                messages=messages_payload
            )
            
            llm_response = response.choices[0].message.content
            print("--- LLM Response ---")
            print(llm_response)
            print("--------------------")
            return llm_response

        except Exception as e:
            print(f"Error communicating with the LLM: {e}")
            return f"Error communicating with the LLM: {e}"

    # Function to determine which analysis type to use
    def determine_analysis_type(query: str) -> str:
        """Determine if query needs pattern analysis, detailed analysis, or both."""
        time_keywords = ['drop', 'increase', 'sudden', 'change', 'why', 'when', 'trend', 'pattern']
        detail_keywords = ['current', 'now', 'level', 'value', 'status', 'what is']

        has_time = any(keyword in query.lower() for keyword in time_keywords)
        has_detail = any(keyword in query.lower() for keyword in detail_keywords)

        if has_time and has_detail:
            return "both"
        elif has_time:
            return "both"  # Time questions usually benefit from detailed context too
        else:
            return "detailed"

    def _get_flux_effect(self, flux_node: str) -> (str, str):
        """
        (NEW HELPER) Queries the Knowledge Graph to determine the physical effect of a flux.
        
        Returns:
            A tuple (effect_type, target_state_name)
            e.g., ('Sink', 'Temperature'), ('Source', 'CO2 Concentration'), ('Bidirectional', 'Humidity')
        """
        if not self.G.has_node(flux_node):
            return 'Unknown', 'Unknown'

        # A flux's effect is defined by its relationship to the state variable it influences.
        for successor in self.G.successors(flux_node):
            if self.G.nodes[successor].get('type') == 'State':
                edge_data = self.G.get_edge_data(flux_node, successor, default={})
                relationship = edge_data.get('relationship')
                target_name = self.G.nodes[successor].get('name', successor)
                
                if relationship == '+':
                    return 'Source', target_name
                elif relationship == '-':
                    return 'Sink', target_name
                elif relationship == '+/-':
                    return 'Bidirectional', target_name
                    
        return 'Unknown', 'Unknown'

    def analyze_physical_obstacle(self, state_node: str, start_dt: datetime, end_dt: datetime) -> Dict[str, Any]:
        """
        (GENERALIZED FRAMEWORK V2.0)
        Analyzes why a state variable changed slowly by checking for different types of
        physical inertia and conflicting forces. This is a modular function that can be
        extended with more expert scenarios.
        """
        print(f"DEBUG - Analyzing physical obstacle for '{state_node}' from {start_dt} to {end_dt}.")
        
        try:
            window_data = self.data.loc[start_dt:end_dt]
            if window_data.empty:
                return {'error': "No data for the specified time range."}
        except Exception as e:
            return {'error': f"Error slicing data: {e}"}

        # ==============================================================================
        # --- SCENARIO 1: Latent Heat & Mass Inertia (The "Wet Sponge" Effect) ---
        # This applies when a change in temperature causes a conflict with humidity goals.
        # ==============================================================================
        if state_node == 'H':
            # Check if this is a nighttime cooldown scenario
            if start_dt.hour >= 17:
                t_col = self.node_to_column.get('T')
                h_cov_col = self.node_to_column.get('H_cov') # Condensation on cover

                temp_start = window_data[t_col].iloc[0]
                temp_end = window_data[t_col].iloc[-1]
                
                if temp_end < temp_start: # If temperature is dropping...
                    avg_h_cov_flux = window_data[h_cov_col].mean()
                    if avg_h_cov_flux > 0.001: # ...and condensation is the primary removal method...
                        return {
                            'obstacle_name': "Latent Mass Inertia (The 'Wet Sponge' Effect)",
                            'explanation': (
                                "The primary obstacle was the massive amount of water vapor stored in the warm daytime air. "
                                "As the air cooled, its capacity to hold moisture decreased, working directly against the goal of lowering relative humidity. "
                                "The system had to rely on the slow, passive process of condensation on the cover to drain this enormous 'buffer' of water vapor."
                            ),
                            'evidence': {
                                "Conflicting Physics": f"The air temperature was actively dropping (from {temp_start:.1f}°C to {temp_end:.1f}°C), which physically increases relative humidity if water vapor content is constant.",
                                "Dominant Process": f"The system was relying on passive condensation on the cover to remove moisture (average flux of {avg_h_cov_flux:.5f}), which is an inherently slow process."
                            }
                        }

        # ==============================================================================
        # --- SCENARIO 2: Thermal Inertia (The "Brick Oven" Effect) ---
        # This applies when temperature changes slowly due to stored heat in the greenhouse mass.
        # ==============================================================================
        if state_node == 'T':
            q_sun_col = self.node_to_column.get('Q_sun')
            q_cool_col = self.node_to_column.get('Q_cool')
            
            avg_sun_flux = window_data[q_sun_col].mean()
            avg_cool_flux = window_data[q_cool_col].abs().mean()
            
            # Check for a situation where cooling is active but temperature is still high/rising slowly
            temp_change = window_data[self.node_to_column.get('T')].iloc[-1] - window_data[self.node_to_column.get('T')].iloc[0]
            
            if avg_cool_flux > abs(avg_sun_flux) and temp_change >= 0:
                return {
                    'obstacle_name': "Thermal Inertia (The 'Brick Oven' Effect)",
                    'explanation': (
                        "The primary obstacle was the large amount of thermal energy stored in the physical mass of the greenhouse itself (the ground, benches, water, and plants). "
                        "Even after the main heat source (the sun) was gone and active cooling was running, this stored heat continued to radiate back into the air, acting as a persistent internal heat source that slowed down the cooling process."
                    ),
                    'evidence': {
                        "Stored Heat Release": f"Despite the active cooling system removing heat at a rate of {avg_cool_flux:.2f} W/m², the temperature failed to drop, indicating a significant internal heat source was counteracting it.",
                        "System Inertia": "This demonstrates the thermal mass of the greenhouse, which acts like a 'heat battery' that must be slowly discharged before the air temperature can fall quickly."
                    }
                }
                
        # ==============================================================================
        # --- SCENARIO 3: Chemical/Biological Inertia (Slow Plant Response) ---
        # This applies when CO2 levels don't drop instantly after injection stops.
        # ==============================================================================
        if state_node == 'C':
            # Check for a post-injection, pre-ventilation period where CO2 lingers
            c_inj_col = self.node_to_column.get('uC') # CO2 injection control
            c_vent_col = self.node_to_column.get('C_vent')
            
            avg_injection = window_data[c_inj_col].mean()
            avg_vent_flux = window_data[c_vent_col].abs().mean()
            
            co2_change = window_data[self.node_to_column.get('C')].iloc[-1] - window_data[self.node_to_column.get('C')].iloc[0]

            if avg_injection < 0.01 and avg_vent_flux < 0.01 and co2_change > -5: # Injection off, vents closed, but CO2 not dropping fast
                return {
                    'obstacle_name': "Gas Diffusion & Biological Inertia",
                    'explanation': (
                        "The primary obstacle was the slow rate of natural CO₂ consumption by the plants (photosynthesis) and the lack of an active removal mechanism. "
                        "With CO₂ injection turned off and the vents closed, the only way for the high concentration to decrease was through the plants' biological uptake, which is a relatively slow process compared to mechanical ventilation."
                    ),
                    'evidence': {
                        "No Active Removal": f"Both the CO₂ injection system (avg: {avg_injection:.3f}) and ventilation flux (avg: {avg_vent_flux:.3f}) were inactive during this period.",
                        "Slow Biological Uptake": "The slow decay of CO₂ concentration demonstrates the rate limit of photosynthesis as the sole removal pathway."
                    }
                }

        # If no specific scenario is matched, return a generic error/message
        return {
            'error': "Could not identify a specific physical obstacle for this scenario. The slow change may be due to the system's normal inertia or a lack of strong control action."
        }

    def analyze_daily_total_and_comparison(self, variable_nodes: List[str], date: datetime.date) -> Dict[str, Any]:
        """
        (GENERALIZED VERSION 2.1 - Corrected Sorting Key)
        Calculates the daily total (integral) for any list of flux variables by first
        determining their physical effect (source, sink, etc.) from the Knowledge Graph.
        This version correctly handles small, non-zero values for display and sorting.
        """
        print(f"DEBUG - Analyzing general daily totals for {variable_nodes} on {date}.")
        analysis_result = {
            'date': date.strftime('%Y-%m-%d'),
            'variables': {},
            'comparison': None
        }

        try:
            day_data = self.data[self.data.index.date == date]
            if day_data.empty:
                return {'error': f"No data available for the specified date: {date}."}
        except Exception as e:
            return {'error': f"Error slicing data: {e}"}

        time_step_seconds = 300.0
        joules_to_megajoules = 1_000_000.0

        for node in variable_nodes:
            column = self.node_to_column.get(node)
            if not column or column not in day_data.columns:
                analysis_result['variables'][node] = {'error': f"Data column for '{node}' not found."}
                continue

            effect_type, target_state = self._get_flux_effect(node)
            
            variable_result = {
                'name': self.node_to_name.get(node, node),
                'effect_type': effect_type,
                'target_state': target_state
            }

            if effect_type == 'Sink':
                total_joules = day_data[column].abs().sum() * time_step_seconds
                raw_total_megajoules = total_joules / joules_to_megajoules
                variable_result['total_MJ_per_m2_raw'] = raw_total_megajoules
                if raw_total_megajoules > 0 and raw_total_megajoules < 0.01:
                    variable_result['total_MJ_per_m2_display'] = f"{raw_total_megajoules:.4f}"
                else:
                    variable_result['total_MJ_per_m2_display'] = f"{raw_total_megajoules:.2f}"
                variable_result['description'] = f"Total Removal/Consumption"

            elif effect_type == 'Source':
                total_joules = day_data[column].sum() * time_step_seconds
                raw_total_megajoules = total_joules / joules_to_megajoules
                variable_result['total_MJ_per_m2_raw'] = raw_total_megajoules
                if raw_total_megajoules > 0 and raw_total_megajoules < 0.01:
                    variable_result['total_MJ_per_m2_display'] = f"{raw_total_megajoules:.4f}"
                else:
                    variable_result['total_MJ_per_m2_display'] = f"{raw_total_megajoules:.2f}"
                variable_result['description'] = f"Total Addition/Contribution"

            elif effect_type == 'Bidirectional':
                positive_joules = day_data[column][day_data[column] > 0].sum() * time_step_seconds
                negative_joules = day_data[column][day_data[column] < 0].abs().sum() * time_step_seconds
                variable_result['total_positive_MJ'] = positive_joules / joules_to_megajoules
                variable_result['total_negative_MJ'] = negative_joules / joules_to_megajoules
                variable_result['description'] = f"Total Influx and Outflux"
                
            else:
                variable_result['error'] = "Could not determine the physical effect from the Knowledge Graph."

            analysis_result['variables'][node] = variable_result

        # Perform the comparison based on the new, richer data
        valid_results = [v for v in analysis_result['variables'].values() if 'error' not in v]
        if len(valid_results) == 2:
            # <<< --- FIX #1: Sort using the raw numerical value for accuracy --- >>>
            sorted_results = sorted(valid_results, key=lambda x: x.get('total_MJ_per_m2_raw', 0), reverse=True)
            res1, res2 = sorted_results[0], sorted_results[1]

            if res1['effect_type'] == res2['effect_type'] and res1['effect_type'] in ['Sink', 'Source']:
                val1 = res1.get('total_MJ_per_m2_raw', 0)
                val2 = res2.get('total_MJ_per_m2_raw', 0)
                
                if val2 > 1e-6:
                    ratio = val1 / val2
                    if ratio > 1000:
                        comparison_text = f"The contribution from '{res1['name']}' was overwhelmingly larger (orders of magnitude greater) than that of '{res2['name']}'."
                    elif ratio > 100:
                        comparison_text = f"The contribution from '{res1['name']}' was substantially greater (over 100 times more) than that of '{res2['name']}'."
                    else:
                        comparison_text = f"The contribution from '{res1['name']}' was approximately {ratio:.0f} times greater than that of '{res2['name']}'."
                    analysis_result['comparison'] = comparison_text

            else:
                # <<< --- FIX #2: Use the raw numerical value for the summary too --- >>>
                summary1 = f"'{res1['name']}' acted as a {res1['effect_type'].lower()} with a total magnitude of {res1.get('total_MJ_per_m2_raw', 0):.2f} MJ/m²"
                summary2 = f"'{res2['name']}' acted as a {res2['effect_type'].lower()} with a total magnitude of {res2.get('total_MJ_per_m2_raw', 0):.2f} MJ/m²"
                analysis_result['comparison'] = f"The two factors had different effects: {summary1}, while {summary2}."

        return analysis_result

    def parse_timestamp_query(self, query: str) -> dict[str, Any]:
        """Parses a query for a specific variable, date, and time."""
        query = query.lower()
        query_info = {'variable_code': None, 'date': None, 'time': None, 'query_term': None}

        # Extract variable
        found_variable = False
        for var_name, var_code in self.name_to_code.items():
            if var_name in query:
                query_info['variable_code'] = var_code
                query_info['query_term'] = var_name
                found_variable = True
                break # Use the first one found for simplicity

        if not found_variable:
            return query_info # Cannot proceed without variable

        # Extract date and time (using improved regex and fallback)
        # Format: Month DD, YYYY at HH:MM (am/pm optional)
        match = re.search(r'(\w+)\s+(\d{1,2})\s*[,]?\s*(\d{4})\s+(?:at|around)\s+(\d{1,2}):(\d{2})\s*(am|pm)?', query)
        if match:
            month_str, day_str, year_str, hour_str, min_str, ampm = match.groups()
            try:
                dt_obj = datetime.strptime(f"{month_str} {day_str} {year_str} {hour_str}:{min_str}{ampm if ampm else ''}",
                                           f"%B %d %Y %I:%M%p" if ampm else f"%B %d %Y %H:%M")
                query_info['date'] = dt_obj.date()
                query_info['time'] = dt_obj.time()
                return query_info
            except ValueError:
                try: # Try without AM/PM if first failed
                    dt_obj = datetime.strptime(f"{month_str} {day_str} {year_str} {hour_str}:{min_str}", "%B %d %Y %H:%M")
                    query_info['date'] = dt_obj.date()
                    query_info['time'] = dt_obj.time()
                    return query_info
                except ValueError:
                    pass # Failed this format

        # Format: YYYY-MM-DD at HH:MM or DD/MM/YYYY at HH:MM etc.
        match = re.search(r'(\d{4}[-/]\d{1,2}[-/]\d{1,2}|\d{1,2}[-/]\d{1,2}[-/]\d{4})\s+(?:at|around)\s+(\d{1,2}):(\d{2})', query)
        if match:
            date_str, hour_str, min_str = match.groups()
            try:
                # Try common formats
                date_obj = pd.to_datetime(date_str, errors='coerce').date()
                if date_obj:
                    time_obj = time(int(hour_str), int(min_str))
                    query_info['date'] = date_obj
                    query_info['time'] = time_obj
                    return query_info
            except ValueError:
                pass # Date or time parse failed

        # Fallback: Use dateutil parser if installed and other methods fail
        try:
            from dateutil import parser
            # Remove the variable part to help the parser
            query_parts = query.split(query_info['query_term'])
            search_str = " ".join(query_parts).strip()
            dt_obj = parser.parse(search_str, fuzzy=True) # Fuzzy might pick up wrong things
            query_info['date'] = dt_obj.date()
            query_info['time'] = dt_obj.time()
            print(f"Used dateutil parser: {query_info['date']} {query_info['time']}")
            return query_info
        except ImportError:
            print("Warning: dateutil library not found for fallback parsing.")
        except ValueError:
            print(f"Warning: Could not parse date/time from query: '{query}'")

        return query_info # Return potentially incomplete info
    
    def _interpret_lagrangian_state(self, state_node: str, lg_value: float) -> str:
        """ (NEW HELPER) Translates a Lagrangian multiplier's value into a plain-English interpretation. """
        if pd.isna(lg_value):
            return "Optimizer state for this variable is unknown."

        # Get the specific threshold for this lagrangian from the central config
        lg_node_name = self.lagrangian_nodes.get(state_node, {}).get('ieq')
        threshold = self.config['lagrangian_active_threshold'].get(lg_node_name, 1e-7)

        if abs(lg_value) < threshold:
            return "Stable; the controller is not actively fighting any constraints for this variable."

        var_name = self.node_to_name.get(state_node, state_node)
        
        # Check for extreme values, which indicate high significance
        is_extreme = abs(lg_value) > (threshold * 100) # e.g., 100x the base threshold
        significance = "This is highly significant. " if is_extreme else ""

        if lg_value > 0:
            return f"{significance}The controller's primary goal is to prevent **{var_name}** from exceeding its **UPPER limit**."
        else: # lg_value < 0
            return f"{significance}The controller's primary goal is to prevent **{var_name}** from falling below its **LOWER limit**."
        
    def analyze_system_fault(self, primary_node: str, timestamp: datetime) -> Dict[str, Any]:
        """
        (DEFINITIVE V2) Diagnoses system faults by checking ALL known fault signatures
        and selecting the one with the highest confidence score.
        """
        print(f"DEBUG (Fault Diagnosis V2): Diagnosing potential system faults at {timestamp}.")

        try:
            # Get data for the event and the point before it
            event_idx = self.data.index.get_loc(self.find_nearest_timestamp(timestamp))
            if event_idx == 0: return {'error': "Fault detected at first data point."}
            
            data_at_event = self.data.iloc[event_idx]
            data_before_event = self.data.iloc[event_idx - 1]
        except Exception as e:
            return {'error': f"Could not retrieve data for fault analysis: {e}"}

        potential_faults = []

        # ==============================================================================
        # --- SIGNATURE 1: Humidity Sensor Failure ---
        # ==============================================================================
        try:
            hum_col = self.node_to_column.get('H')
            hum_lg_col = self.node_to_column.get('uQh_ieq') # In this system, heating Lagrangian reacts to humidity faults
            co2_lg_col = self.node_to_column.get('uC_ieq')

            if all(col in data_at_event for col in [hum_col, hum_lg_col, co2_lg_col]):
                hum_change = data_at_event[hum_col] - data_before_event[hum_col]
                hum_lg_val = data_at_event[hum_lg_col]
                co2_lg_val = data_at_event[co2_lg_col]
                
                # Evidence checks
                e1 = hum_change < -50.0 # Physically impossible drop
                e2 = hum_lg_val < -10.0 # Extreme saturation
                e3 = co2_lg_val < -10.0 # Extreme saturation
                
                confidence = sum([e1, e2, e3]) # Score is the number of matched evidence points
                
                if confidence > 1: # Require at least two strong pieces of evidence
                    potential_faults.append({
                        'confidence': confidence,
                        'fault_type': "Humidity Sensor Failure",
                        'explanation': "The event was triggered by a catastrophic failure of the humidity sensor, which reported a physically impossible value.",
                        'evidence': {
                            "Root Cause (Sensor Data)": f"The humidity sensor reading showed an impossible drop of {hum_change:.1f}% in a single 5-minute step.",
                            "Optimizer Reaction (System Shock)": f"The controller's optimizer was shocked by this false data, causing it to saturate the heating and CO2 control outputs, proven by the extreme negative spikes in their Lagrangians (uHieq_lg: {hum_lg_val:.2E}, uCieq_lg: {co2_lg_val:.2E}).",
                            "System Action (Defensive Purge)": "The system's safety protocol likely triggered a full air purge using ventilation to restore the greenhouse to a known, predictable state."
                        }
                    })
        except Exception as e:
            print(f"DEBUG - Error during humidity fault check: {e}")

        # ==============================================================================
        # --- SIGNATURE 2: Heating System Saturation/Fault ---
        # ==============================================================================
        try:
            heat_lg_col = self.node_to_column.get('uQh_ieq')
            heat_flux_col = self.node_to_column.get('Q_heat')
            temp_col = self.node_to_column.get('T')

            if all(col in data_at_event for col in [heat_lg_col, heat_flux_col, temp_col]):
                heat_lg_val = data_at_event[heat_lg_col]
                heat_flux_val = data_at_event[heat_flux_col]
                temp_change = data_at_event[temp_col] - data_before_event[temp_col]
                
                # Evidence checks
                e1 = heat_lg_val < -10.0 # Extreme saturation
                e2 = heat_flux_val > 100 # Abnormal physical heat flux
                e3 = temp_change > 0.3 # Implausible temperature jump
                
                confidence = sum([e1, e2, e3])
                
                if confidence > 1:
                    potential_faults.append({
                        'confidence': confidence,
                        'fault_type': "Heating Control System Fault",
                        'explanation': "The event was caused by an unplanned saturation or fault in the heating control system, leading to an uncontrolled release of heat.",
                        'evidence': {
                            "Root Cause (Optimizer Signal)": f"The heating control Lagrangian (`uHieq_lg`) plunged to an extreme value of {heat_lg_val:.2E}, indicating the actuator was forced to its maximum limit.",
                            "Physical Consequence": f"This fault resulted in a massive, instantaneous injection of physical heat, with the heat flux (`phi_QT_heat`) spiking to {heat_flux_val:.2f} W/m².",
                            "Impact on Climate": f"As a direct result, the internal temperature (`Temp_ref`) surged by {temp_change:.2f}°C in just five minutes."
                        }
                    })
        except Exception as e:
            print(f"DEBUG - Error during heating fault check: {e}")

        # ==============================================================================
        # --- FINAL DIAGNOSIS ---
        # ==============================================================================
        if not potential_faults:
            return {
                'fault_type': "No Specific Fault Detected",
                'explanation': f"While the event at {timestamp.strftime('%H:%M')} may have been unusual, it does not match any known fault signatures in my diagnostic system.",
                'evidence': {}
            }

        # Select the fault with the highest confidence score
        best_diagnosis = max(potential_faults, key=lambda x: x['confidence'])
        return best_diagnosis

    def _extract_relationship_entities(self, query: str) -> List[str]:
        """
        (REFINED) Precisely extracts the two entities being compared in a relationship query.
        """
        query_lower = query.lower()
        
        # Define common relationship phrases and use regex to find what's on either side.
        # The `(.*?)` is a non-greedy match for the text of the entities.
        relationship_patterns = [
            r'(.*?) is related to (.*)',
            r'relationship between (.*?) and (.*)',
            r'connection between (.*?) and (.*)',
            r'correlation between (.*?) and (.*)'
        ]
        
        entity_text1, entity_text2 = None, None
        for pattern in relationship_patterns:
            match = re.search(pattern, query_lower)
            if match:
                entity_text1, entity_text2 = match.groups()
                break

        if not entity_text1 or not entity_text2:
            return [] # Could not find a clear relationship structure

        # Now, run entity extraction on EACH part separately.
        # This isolates the search and prevents confusion.
        nodes1 = self.extract_kg_entities(entity_text1)
        nodes2 = self.extract_kg_entities(entity_text2)
        
        # We want the LAST entity found in the first part, and the FIRST in the second.
        if nodes1 and nodes2:
            # nodes are returned as (node_id, start_position)
            best_node1 = max(nodes1, key=lambda x: x[1])[0]
            best_node2 = min(nodes2, key=lambda x: x[1])[0]
            return [best_node1, best_node2]
            
        return []
    
    def analyze_volatility(self, target_node: Union[str, List[str]], start_dt: datetime, end_dt: datetime) -> Dict[str, Any]:
        """
        (DEFINITIVE V4 - Corrected Variable Re-assignment) 
        Analyzes the cause of volatility by first identifying the TRULY most volatile actuator,
        then analyzing its performance against its driver and its resulting state.
        """
        print(f"DEBUG (Volatility Expert V4): Analyzing instability for '{target_node}' from {start_dt} to {end_dt}.")
        
        try:
            window_data = self.data.loc[start_dt:end_dt]
            if window_data.empty:
                return {'error': 'No data in the specified time range for analysis.'}
        except Exception as e:
            return {'error': f"Could not retrieve data for analysis: {e}"}
        
        # --- 1. FIND THE TRUE SOURCE OF VOLATILITY ---
        # This block correctly identifies the most volatile actuator, regardless of what the user mentioned.
        actuator_nodes = ['uV', 'uQc', 'uC', 'uQh']
        volatility_scores = {}

        # Inside analyze_volatility

        def calculate_volatility_score(series):
            if series.empty or len(series.dropna()) < 2:
                return 0
            
            mean = series.mean()
            std = series.std()

            # Handle cases where std is NaN or zero
            if pd.isna(std) or std == 0:
                return 0
                
            # Metric 1: Coefficient of Variation (good for instability)
            cv = std / abs(mean) if abs(mean) > 1e-9 else 0
            
            # Metric 2: Sum of absolute changes (good for "total work" or "activity")
            total_change = series.diff().abs().sum()
            
            # Normalize total_change by the series range to make it comparable
            series_range = series.max() - series.min()
            normalized_total_change = total_change / series_range if series_range > 1e-9 else 0
            
            # Combine the metrics. Give more weight to the "activity" metric.
            # This is an empirical tuning step.
            combined_score = (0.3 * cv) + (0.7 * normalized_total_change)
            return combined_score

        for node in actuator_nodes:
            col = self.node_to_column.get(node)
            if col and col in window_data.columns:
                # Use the new, more robust scoring function
                volatility_scores[node] = calculate_volatility_score(window_data[col])

        # Find the actuator with the highest volatility score.
        if volatility_scores:
            most_volatile_actuator = max(volatility_scores, key=lambda k: volatility_scores[k])
            print(f"DEBUG (Volatility Expert): User mentioned '{target_node}', but the most volatile actuator is actually '{most_volatile_actuator}' (CV: {volatility_scores[most_volatile_actuator]:.2f}). Switching analysis target.")
            # This is the new primary subject of our analysis.
            effort_signal_node = most_volatile_actuator
        else:
            # Fallback to the user-mentioned node if no actuators found
            effort_signal_node = target_node[0] if isinstance(target_node, list) else target_node

        effort_signal_node = None
    
        # Check if the user's target is a specific, known control action
        user_target = target_node[0] if isinstance(target_node, list) else target_node
        if user_target in self.control_action_nodes:
            # If the user specified a clear target, RESPECT that choice.
            print(f"DEBUG (Volatility Expert V5): User specified a clear target '{user_target}'. Analyzing it directly.")
            effort_signal_node = user_target
        else:
            # If the user's query was vague (e.g., "why was the system unstable?"),
            # then we perform our smart search for the most volatile actuator.
            print("DEBUG (Volatility Expert V5): User query was vague. Searching for the most volatile actuator...")
            actuator_nodes = ['uV', 'uQc', 'uC', 'uQh']
            volatility_scores = {}
            for node in actuator_nodes:
                col = self.node_to_column.get(node)
                if col and col in window_data.columns:
                    volatility_scores[node] = calculate_volatility_score(window_data[col])

            if volatility_scores:
                most_volatile_actuator = max(volatility_scores, key=lambda k: volatility_scores[k])
                print(f"DEBUG (Volatility Expert V5): Found most volatile actuator: '{most_volatile_actuator}'.")
                effort_signal_node = most_volatile_actuator
            else:
                return {'error': "Could not find any control actuators to analyze for volatility."}
        
        # <<< --- CRITICAL FIX STARTS HERE --- >>>
        # --- 2. RE-INITIALIZE ANALYSIS VARIABLES BASED ON THE *TRUE* VOLATILE ACTUATOR ---
        # After finding the most volatile actuator, we must re-define all our analysis
        # variables (base_state, result_node, driver_node, and their columns) based on THIS new node.
        
        base_state_code = effort_signal_node.split('_')[0] # 'uQc' -> 'uQ', then we'll map to 'T'
        # Map control base codes to the state they primarily affect
        control_to_state_map = {'uV': 'H', 'uQc': 'T', 'uC': 'C', 'uQh': 'T'}
        state_affected = control_to_state_map.get(effort_signal_node, base_state_code)
        
        result_signal_node = f"{state_affected}_ref"

        driver_map = {'C': 'Qrad', 'T': 'Qrad', 'H': 'H_trans'}
        external_driver_node = driver_map.get(state_affected, 'Qrad')
        
        effort_col = self.node_to_column.get(effort_signal_node)
        result_col = self.node_to_column.get(result_signal_node)
        driver_col = self.node_to_column.get(external_driver_node)

        print(f"DEBUG (Volatility Expert): Re-initialized analysis vars. Effort: {effort_signal_node}, Result: {result_signal_node}, Driver: {external_driver_node}")

        if not all(col and col in window_data.columns for col in [effort_col, result_col, driver_col]):
            return {'error': "Missing one or more necessary data columns for the re-targeted volatility analysis."}

        # --- 3. CONSISTENT VOLATILITY MEASUREMENT (using the CORRECTED variables) ---
        effort_volatility_cv = calculate_volatility_score(window_data[effort_col])
        result_volatility_cv = calculate_volatility_score(window_data[result_col])
        driver_volatility_cv = calculate_volatility_score(window_data[driver_col])
        # <<< --- CRITICAL FIX ENDS HERE --- >>>


        # --- 4. CROSS-CORRELATION FOR LAG ANALYSIS ---
        measurable_lag_minutes = None
        correlation_strength = None
        s1 = window_data[driver_col]
        s2 = window_data[effort_col]
        lags = range(0, 7)
        corrs = [s1.corr(s2.shift(lag)) for lag in lags]
        if any(pd.notna(c) for c in corrs):
            max_corr_idx = np.nanargmax(np.abs(corrs))
            measurable_lag_steps = lags[max_corr_idx]
            measurable_lag_minutes = measurable_lag_steps * 5
            correlation_strength = corrs[max_corr_idx]

        # --- 5. SYNTHESIZE THE CONCLUSION ---
        is_excellent_performance = (driver_volatility_cv > 0.5 and 
                                    effort_volatility_cv > 1.0 and 
                                    result_volatility_cv < 0.05)
                                    
        if is_excellent_performance:
            conclusion = "Yes, this is normal and indicates masterful performance under extremely challenging conditions."
        else:
            conclusion = "No, this is not normal. The oscillation indicates a potential tuning issue, as the controller was unable to fully stabilize the system in response to external disturbances."

        # --- 6. ASSEMBLE EVIDENCE ---
        effort_name = self.node_to_name.get(effort_signal_node, effort_signal_node)
        result_name = self.node_to_name.get(result_signal_node, result_signal_node)
        driver_name = self.node_to_name.get(external_driver_node, external_driver_node)

        evidence = {
            "External Driver of Instability": f"The root cause was the highly volatile **{driver_name}** (Volatility CV: {driver_volatility_cv:.2f}).",
            "Controller's High-Performance Response": f"The jagged signal you observed was in the **{effort_name}** system. This is not a flaw; it shows the controller making rapid, high-frequency adjustments to precisely counteract the external chaos (Volatility CV: {effort_volatility_cv:.2f}).",
            "Performance Verdict": f"The performance was excellent. Despite the volatile driver and the controller's intense effort, the final state of **{result_name}** remained remarkably stable (Volatility CV: {result_volatility_cv:.2f})."
        }
        
        if measurable_lag_minutes is not None and correlation_strength is not None:
            evidence["Measurable System Lag"] = (
                f"The system's response is extremely fast. Cross-correlation between the **{driver_name}** and the **{effort_name}** "
                f"shows a peak correlation of {correlation_strength:.2f} with a time lag of approximately **{measurable_lag_minutes} minutes**. "
                "This near-instantaneous reaction is a hallmark of a high-performance, predictive controller."
            )

        return {
            'conclusion': conclusion,
            'evidence': evidence
        }
    
    def format_volatility_for_llm(self, analysis_result: Dict[str, Any]) -> str:
        """
        Formats the volatility analysis result into a structured string for the LLM,
        creating a clear narrative of cause, effect, and performance.
        """
        # Handle the case where the analysis itself returned an error
        if 'error' in analysis_result:
            return f"Error during volatility analysis: {analysis_result['error']}"

        # Start building the context string with a clear title
        context = [
            "--- Volatility & Performance Analysis ---",
            f"\n**Conclusion:** {analysis_result.get('conclusion', 'Analysis inconclusive.')}"
        ]

        # Add the evidence section, which tells the causal story
        evidence = analysis_result.get('evidence', {})
        if evidence:
            context.append("\n**Causal Chain of Evidence:**")
            
            # The order of evidence is important for telling a clear story.
            # We define the order here to ensure the narrative flows logically.
            evidence_order = [
                "External Driver of Instability",
                "Controller's High-Performance Response",
                "Measurable System Lag",
                "Performance Verdict"
            ]
            
            for key in evidence_order:
                if key in evidence:
                    # Format each piece of evidence as a clear, labeled bullet point
                    context.append(f"- **{key}:** {evidence[key]}")
        
        # Join all the parts into a single string
        return "\n".join(context)
    
    def calculate_time_window(self, central_time: time, window_minutes: int, date: date):
        """Calculates a time window (start and end HH:MM strings) around a central time for a specific date."""
        from datetime import datetime
        central_dt = datetime.combine(date, central_time)
        start_dt = central_dt - timedelta(minutes=window_minutes)
        end_dt = central_dt + timedelta(minutes=window_minutes)
        return start_dt.strftime('%H:%M'), end_dt.strftime('%H:%M')

    def get_5min_interval(self, target_time: time, date: date):
        """Finds the start and end HH:MM strings of the 5-minute interval containing the target time."""
        from datetime import datetime
        target_dt = datetime.combine(date, target_time)
        # Calculate minutes from midnight
        total_minutes = target_dt.hour * 60 + target_dt.minute
        # Find the start minute of the 5-min interval (nearest lower multiple of 5)
        start_minute = (total_minutes // 5) * 5
        # Calculate the start datetime of the interval
        interval_start_dt = datetime.combine(date, time(start_minute // 60, start_minute % 60))
        # Calculate the end datetime of the interval (start + 5 minutes)
        interval_end_dt = interval_start_dt + timedelta(minutes=5)
        return interval_start_dt.strftime('%H:%M'), interval_end_dt.strftime('%H:%M')

    def _find_significant_event_in_range(self, control_action_node: str, start_dt: datetime, end_dt: datetime) -> Optional[datetime]:
        """
        Scans a time range to find the timestamp of the most significant change 
        (largest increase or decrease) in a control action.
        
        Returns:
            The precise datetime of the most significant event, or the start time as a fallback.
        """
        print(f"DEBUG - Scanning for significant event for '{control_action_node}' between {start_dt} and {end_dt}.")
        control_col = self.node_to_column.get(control_action_node)
        if not control_col or control_col not in self.data.columns:
            print(f"WARN - Could not find column for {control_action_node}.")
            return start_dt  # Fallback

        window_data = self.data.loc[start_dt:end_dt, control_col]
        if window_data.empty or len(window_data) < 2:
            print("WARN - Not enough data in the window to find a significant event.")
            return start_dt  # Fallback

        # Calculate the change from the previous step and find the absolute maximum.
        changes = window_data.diff().abs()

        # Check if there was any significant change at all.
        if changes.max() < self.config['control_thresholds']['change_threshold']:
            print(f"DEBUG - Control action '{control_action_node}' was stable. Analyzing the start of the period.")
            return start_dt

        # idxmax() returns the timestamp (index) of the first occurrence of the maximum value.
        significant_timestamp = changes.idxmax()

        print(f"DEBUG - Found most significant change at {significant_timestamp}.")
        return significant_timestamp
    
    def answer_query(self, query: str) -> Tuple[str, Union[str, None]] | str | None:
        """
        Answers a natural language query using dialogue state for context,
        NLP parsing, and routing to appropriate analysis.
        """
        from datetime import datetime
        original_query = query
        print(f"\nReceived Query: {query}")
        final_answer = None
        plot_figure = None
        # Track the intent that was successfully executed or attempted
        intent_executed: Literal['time_range','timestamp', 'pattern', 'correlation', 'explain_specific_event', 'evaluate_control_strategy', 'explain_net_effect','analyze_volatility', 'explain_causal_anomaly','explain_sudden_event',
                                 'explain_lagrangian_state', 'explain_control_action','explain_causal_trend','visualize','explain_system_event','explain_trade_off', 'explain_strategy', 'find_and_analyze_event','explain_gradual_trend',
                                 'analyze_profitability_strategy','evaluate_setpoint_strategy','evaluate_day_strategy','analyze_model_discrepancy','explain_obstacle','find_and_explain_reaction','analyze_daily_total','explain_anomaly','explain_disturbance_significance','analyze_control_efficiency'] | str | None = None

        # Ensure dialogue_state exists
        if not hasattr(self, 'dialogue_state'):
            self.dialogue_state = {}
            print("Initialized dialogue_state.")

        # --- 1. PARSE THE QUERY: This is now the single source of truth ---
        resolved_info = self.parse_query_nlp(query)
        if resolved_info.get('error') and resolved_info.get('intent') != 'correlation':
            # Error handling for parser failure (but allow correlation queries to proceed)
            return f"Error during query parsing: {resolved_info.get('error')}"

        print(f"DEBUG - NLP Parse Result (to be used directly): {resolved_info}")

        resolved_intent = resolved_info.get('intent', 'unknown')
        primary_node = resolved_info.get('primary_node')
        query_lower = query.lower()

        # --- 2. INTENT REFINEMENT (Single, clean block) ---
        is_why_question = any(word in query_lower for word in ['why', 'reason', 'cause', 'explain the reason for', 'purpose of', 'what was the reason for'])
        trade_off_keywords = ['prioritizing', 'priority', 'trade-off', 'vs', 'versus', 'balance between']
        is_trade_off_query = any(kw in query_lower for kw in trade_off_keywords)
        is_single_time_query = resolved_info.get('time') and not resolved_info.get('start_time')

        # Find how many state variables were mentioned
        mentioned_states = [node for node in resolved_info.get('target_nodes', []) if node in self.base_state_variables]

        if resolved_intent != 'explain_system_event' and is_trade_off_query and len(mentioned_states) >= 2:
            print("DEBUG - Intent Refinement: Upgrading intent to 'explain_trade_off'.")
            resolved_intent = 'explain_trade_off'
        elif resolved_intent == 'pattern' and len(resolved_info.get('target_nodes', [])) >= 2 and 'trigger' in query_lower and 'change' in query_lower:
            print("DEBUG - Intent Refinement: Upgrading 'pattern' intent to 'explain_system_event' for system-wide change.")
            resolved_intent = 'explain_system_event'
        elif resolved_intent == 'pattern' and primary_node and primary_node in self.control_action_nodes and is_why_question:
            print(f"DEBUG - Intent Refinement: Upgrading 'pattern' intent to 'explain_control_action'.")
            resolved_intent = 'explain_control_action'
        elif 'model' in query_lower and ('mismatch' in query_lower or 'underestimating' in query_lower or 'underestimate' in query_lower):
            print("DEBUG - Intent Refinement: Upgrading intent to 'analyze_model_discrepancy' for model mismatch query.")
            resolved_intent = 'analyze_model_discrepancy'
        elif resolved_intent == 'explain_system_event' and 'morning' in query_lower and ('heating' in query_lower or 'heat' in query_lower) and ('ventilation' in query_lower or 'vent' in query_lower):
            print("DEBUG - Intent Refinement: Upgrading 'explain_system_event' to 'explain_control_action' for morning heating/ventilation event.")
            resolved_intent = 'explain_control_action'

        print(f"DEBUG - Final resolved intent for routing: '{resolved_intent}'")

        # --- 2.5. AUTOMATIC PLOTTING FOR ANY VARIABLE QUESTION ---
        # If user mentions any variables in their question, show plots regardless of intent
        target_nodes = resolved_info.get('target_nodes', [])
        primary_node = resolved_info.get('primary_node')
        is_variable_question = len(target_nodes) > 0 or primary_node is not None
        resolved_time_obj = resolved_info.get('time')
        resolved_date_obj = resolved_info.get('date')
        variable_code = resolved_info.get('variable_code')
        query_term = resolved_info.get('query_term')
        # Generate plots for ANY question that mentions variables and has a date
        should_generate_plot = False
        if is_variable_question and resolved_date_obj:
            should_generate_plot = True
            print(f"DEBUG - Variable Question Detected: Will generate plot for question mentioning variables {target_nodes + ([primary_node] if primary_node else [])}")
        
        # For simple variable queries, still route to visualize intent for dedicated handling
        # But keep timestamp queries as timestamp intent so they get LLM analysis
        is_simple_intent = resolved_intent in ['pattern'] and not any(word in query_lower for word in ['why', 'reason', 'cause', 'explain', 'what caused', 'what led to', 'how', 'when did', 'find'])
        if is_variable_question and is_simple_intent and resolved_date_obj:
            print("DEBUG - Simple Variable Question: Routing to visualize intent.")
            resolved_intent = 'visualize'

        # --- Re-fetch variables after potential intent change ---
        

        # --- 3. Route based on Intent and Event Match ---
        if resolved_intent == 'timestamp' and primary_node and self.kg.G.nodes[primary_node].get('type') == 'Disturbance':
            is_causal_query = any(kw in query_lower for kw in ['significant', 'factor', 'influence', 'impact', 'reason', 'cause'])
            if is_causal_query:
                print("DEBUG - Intent Refinement: Upgrading 'timestamp' to 'explain_disturbance_significance' for a more detailed causal answer.")
                # We will use this new "intent" to select a better prompt later.
                resolved_intent = 'explain_disturbance_significance'
        print(f"DEBUG - resolved_info just before visualization check: {resolved_info}") 
        
        
        if resolved_intent == 'visualize':
            if primary_node and resolved_date_obj:
                intent_executed = 'visualize'
                # Check if this is a comparison question (multiple variables mentioned)
                target_nodes = resolved_info.get('target_nodes', [])
                # Deduplicate by column to avoid plotting the same data multiple times
                seen_columns = set()
                comparison_nodes = []
                for node in target_nodes:
                    if node != primary_node and node in self.node_to_column:
                        col = self.node_to_column[node]
                        if col not in seen_columns:
                            seen_columns.add(col)
                            comparison_nodes.append(node)
                
                if comparison_nodes:
                    # Multi-variable comparison plot
                    plot_fig = self.visualize_data(primary_node, resolved_date_obj, comparison_nodes)
                    chat_message = f"Here is a comparison plot for {query_term or primary_node} and {', '.join([self.node_to_name.get(node, node) for node in comparison_nodes])} on {resolved_date_obj.strftime('%Y-%m-%d')}:"
                else:
                    # Single variable plot
                    plot_fig = self.visualize_data(primary_node, resolved_date_obj)
                    chat_message = f"Here is the plot for {query_term or primary_node} on {resolved_date_obj.strftime('%Y-%m-%d')}:"
                
                # Handle plot_fig being a figure or an error string
                if isinstance(plot_fig, go.Figure):
                    final_answer = chat_message
                    plot_figure = plot_fig
                else:
                    final_answer = str(plot_fig) # It will be an error message string
            else:
                final_answer = "To show a graph, please specify a variable and, if I don't know it, a date."

            self.update_dialogue_state(
                original_query,
                resolved_info.get('intent', 'unknown'),
                resolved_info.get('primary_node'),
                resolved_info.get('variable_code'),
                resolved_info.get('query_term'),
                resolved_info.get('datetime'),
                intent_executed,
                resolved_info.get('pattern_type'),
                plot_generated=bool(plot_figure)  # Pass plot generation status
            )
            return final_answer, plot_figure
        
        elif resolved_intent == 'analyze_volatility':
            intent_executed = 'analyze_volatility'
            target_nodes = resolved_info.get('target_nodes')
            start_time_obj = resolved_info.get('start_time')
            end_time_obj = resolved_info.get('end_time')
            resolved_date_obj = resolved_info.get('date')

            target_node_for_volatility = resolved_info.get('primary_node') 
            
            if target_node_for_volatility and resolved_date_obj:
                # For a full-day query, define the start and end times
                start_dt = datetime.combine(resolved_date_obj, time(0, 0))
                end_dt = datetime.combine(resolved_date_obj, time(23, 59))

                # --- THIS IS THE CRITICAL FIX: CALL THE CORRECT FUNCTION ---
                analysis_result = self.analyze_volatility(target_node_for_volatility, start_dt, end_dt)

                if 'error' in analysis_result:
                    final_answer = f"Error during volatility analysis: {analysis_result['error']}"
                else:
                    # 2. Format the result into a context string for the LLM
                    llm_context_str = self.format_volatility_for_llm(analysis_result)

                    # 3. Send to the LLM with the control action prompt
                    final_answer = self.respond_with_llm(
                        llm_context_str,
                        original_query,
                        'analyze_volatility',
                        resolved_info
                    )
                    
                    # Also generate a plot showing the volatile variable and its likely driver
                    driver_node = analysis_result.get('evidence', {}).get("External Driver of Instability", "Qrad") # Default to Qrad
                    plot_fig = self.visualize_data(target_node_for_volatility, resolved_date_obj, [driver_node])
                    if isinstance(plot_fig, go.Figure):
                        plot_figure = plot_fig
                        final_answer += f"\n\nHere is a plot showing {primary_node} and related variables:"

            elif resolved_date_obj and target_nodes:
                # Full day analysis
                start_dt = datetime.combine(resolved_date_obj, time(0, 0))
                end_dt = datetime.combine(resolved_date_obj, time(23, 59))
                
                # 1. Call the control action analysis function
                primary_node = target_nodes[0] if target_nodes else None
                other_nodes = target_nodes[1:] if len(target_nodes) > 1 else []
                analysis_result = self._analyze_control_action_over_range(primary_node, start_dt, end_dt, other_nodes)

                if 'error' in analysis_result:
                    final_answer = f"Error during volatility analysis: {analysis_result['error']}"
                else:
                    # 2. Format the result
                    llm_context_str = self.format_control_action_context_for_llm(analysis_result)
                    
                    # 3. Send to LLM
                    final_answer = self.respond_with_llm(
                        llm_context_str,
                        original_query,
                        'explain_control_action',
                        resolved_info
                    )
                    
                    # Generate plot
                    plot_variables = [primary_node] + other_nodes
                    plot_fig = self.visualize_data(primary_node, resolved_date_obj, plot_variables[1:])
                    if isinstance(plot_fig, go.Figure):
                        plot_figure = plot_fig
                        final_answer += f"\n\nHere is a plot showing {primary_node} over the full day:"

            else:
                final_answer = "To analyze volatility, I need to know which variables were unstable and over what time period (e.g., 'in the afternoon')."
            final_date_for_state = resolved_info.get('date')
            final_time_for_state = resolved_info.get('start_time') or resolved_info.get('time') or time(0, 0)
            if final_date_for_state:
                final_timestamp_for_state = datetime.combine(final_date_for_state, final_time_for_state)
                self.update_dialogue_state(
                    original_query=original_query,
                    resolved_intent=resolved_intent,
                    resolved_primary_node=primary_node,
                    resolved_variable_code=variable_code,
                    resolved_query_term=query_term,
                    resolved_timestamp=final_timestamp_for_state,
                    executed_intent=intent_executed,
                    pattern_type=resolved_info.get('pattern_type'),
                    plot_generated=bool(plot_figure)
                )
            
            return final_answer, plot_figure
        
        elif resolved_intent == 'find_and_analyze_event':
            intent_executed = 'find_and_analyze_event'
            event_type = resolved_info.get('event_type')
            plot_figure = None  # Initialize plot figure
            
            if event_type and resolved_date_obj:
                analysis_result = self.find_and_analyze_extreme_event(resolved_date_obj, event_type)
                if 'error' in analysis_result:
                    final_answer = f"Error finding event: {analysis_result['error']}"
                else:
                    # Handle multiple contexts for multiple analysis variables
                    if 'detailed_contexts' in analysis_result:
                        # Multiple variables case
                        detailed_contexts_str = ""
                        plot_variables = []
                        for var_name, context in analysis_result['detailed_contexts'].items():
                            var_display_name = self.node_to_name.get(var_name, var_name)
                            detailed_contexts_str += f"\n\n**{var_display_name}:**\n{self.format_context_for_llm_query(context)}"
                            plot_variables.append(var_name)
                        
                        llm_context_str = (
                            f"{analysis_result.get('conclusion', '')}\n\n"
                            f"**{analysis_result.get('evidence', {}).get('Context at Event Time', '')}**"
                            f"{detailed_contexts_str}"
                        )
                    else:
                        # Single variable case (backward compatibility)
                        detailed_context_str = self.format_context_for_llm_query(analysis_result['detailed_context'])
                        llm_context_str = (
                            f"{analysis_result.get('conclusion', '')}\n\n"
                            f"**{analysis_result.get('evidence', {}).get('Context at Event Time', '')}**\n"
                            f"{detailed_context_str}"
                        )
                        plot_variables = [analysis_result.get('analysis_variable', 'T')]
                    
                    final_answer = self.respond_with_llm(
                        llm_context_str,
                        original_query,
                        'find_and_analyze_event',
                        resolved_info,
                        attach_hca=False
                    )
                    
                    # Generate plot for the event analysis
                    if resolved_date_obj and plot_variables:
                        try:
                            # Create subplot for multiple variables
                            import plotly.subplots as sp
                            fig = sp.make_subplots(
                                rows=len(plot_variables), 
                                cols=1, 
                                shared_xaxes=True,
                                subplot_titles=[self.node_to_name.get(var, var) for var in plot_variables]
                            )
                            
                            event_timestamp = analysis_result.get('event_timestamp')
                            
                            for i, var in enumerate(plot_variables):
                                var_data = self.data[self.data.index.date == resolved_date_obj]
                                if not var_data.empty:
                                    column_name = self.node_to_column.get(var)
                                    if column_name and column_name in var_data.columns:
                                        fig.add_trace(
                                            go.Scatter(
                                                x=var_data.index.tolist(),
                                                y=var_data[column_name],
                                                mode='lines',
                                                name=self.node_to_name.get(var, var),
                                                showlegend=False
                                            ),
                                            row=i+1, col=1
                                        )
                            
                            fig.update_layout(
                                height=300 * len(plot_variables),
                                title_text=f"System Response to {event_type} on {resolved_date_obj.strftime('%Y-%m-%d')}",
                                showlegend=False
                            )
                            
                            # Add vertical line at event time
                            if event_timestamp:
                                for i in range(len(plot_variables)):
                                    fig.add_vline(
                                        x=event_timestamp,
                                        line=dict(color="red", width=2, dash="dash"),
                                        annotation_text=f"Event at {event_timestamp.strftime('%H:%M')}",
                                        annotation_position="top right",
                                        row=i+1, col=1
                                    )
                            
                            plot_figure = fig
                            final_answer += f"\n\nHere's the system response plot for the event on {resolved_date_obj.strftime('%Y-%m-%d')}:"
                        except Exception as e:
                            print(f"Error creating plot: {e}")
                            plot_figure = None
            else:
                final_answer = "To find and analyze an event, I need to know which event you're interested in and for what day."
            final_date_for_state = resolved_info.get('date')
            final_time_for_state = resolved_info.get('start_time') or resolved_info.get('time') or time(0, 0)
            if final_date_for_state:
                final_timestamp_for_state = datetime.combine(final_date_for_state, final_time_for_state)
                self.update_dialogue_state(
                    original_query=original_query,
                    resolved_intent=resolved_intent,
                    resolved_primary_node=primary_node,
                    resolved_variable_code=variable_code,
                    resolved_query_term=query_term,
                    resolved_timestamp=final_timestamp_for_state,
                    executed_intent=intent_executed,
                    pattern_type=resolved_info.get('pattern_type'),
                    plot_generated=bool(plot_figure)
                )
            
            return final_answer, plot_figure
        
        elif resolved_intent == 'analyze_disease_risk':
            intent_executed = 'analyze_disease_risk'
            resolved_date_obj = resolved_info.get('date')
            
            if resolved_date_obj:
                analysis_result = self.analyze_disease_risk(resolved_date_obj)

                if 'error' in analysis_result:
                    final_answer = f"Error during disease risk analysis: {analysis_result['error']}"
                else:
                    # Format the result for LLM
                    llm_context_str = self.format_disease_risk_for_llm(analysis_result)
                    final_answer = self.respond_with_llm(
                        llm_context_str,
                        original_query,
                        'analyze_disease_risk',
                        resolved_info
                    )
                    
                    # Generate plot showing humidity and temperature
                    plot_fig = self.visualize_data('H', resolved_date_obj, ['T'])
                    if isinstance(plot_fig, go.Figure):
                        plot_figure = plot_fig
                        final_answer += f"\n\nHere is a plot showing humidity and temperature conditions:"
            else:
                final_answer = "To assess disease risk, I need a specific date."
        
        elif resolved_intent == 'analyze_profitability_strategy':
            intent_executed = 'analyze_profitability_strategy'
            resolved_date_obj = resolved_info.get('date')
            
            if resolved_date_obj:
                analysis_result = self.analyze_profitability_strategy(resolved_date_obj)

                if 'error' in analysis_result:
                    final_answer = f"Error during analysis: {analysis_result['error']}"
                else:
                    # --- THIS IS THE CLEAN INTEGRATION ---
                    # 1. Call the dedicated formatter
                    llm_context_str = self.format_profitability_for_llm(analysis_result)
                    
                    # 2. Send to the LLM with the dedicated prompt
                    final_answer = self.respond_with_llm(
                        llm_context_str,
                        original_query,
                        'explain_profitability',
                        resolved_info
                    )
            else:
                final_answer = "To analyze the profitability of a strategy, I need a specific date."
            final_date_for_state = resolved_info.get('date')
            final_time_for_state = resolved_info.get('start_time') or resolved_info.get('time') or time(0, 0)
            if final_date_for_state:
                final_timestamp_for_state = datetime.combine(final_date_for_state, final_time_for_state)
                self.update_dialogue_state(
                    original_query=original_query,
                    resolved_intent=resolved_intent,
                    resolved_primary_node=primary_node,
                    resolved_variable_code=variable_code,
                    resolved_query_term=query_term,
                    resolved_timestamp=final_timestamp_for_state,
                    executed_intent=intent_executed,
                    pattern_type=resolved_info.get('pattern_type'),
                    plot_generated=bool(plot_figure)
                )
            
            return final_answer, plot_figure
          
        elif resolved_intent == 'analyze_control_efficiency':
            intent_executed = 'analyze_control_efficiency'
            resolved_date_obj = resolved_info.get('date')
            primary_node = resolved_info.get('primary_node')
            
            if resolved_date_obj and primary_node:
                analysis_result = self.analyze_control_efficiency(primary_node, resolved_date_obj)

                if 'error' in analysis_result:
                    final_answer = f"Error during efficiency analysis: {analysis_result['error']}"
                else:
                    # 1. Call the dedicated formatter
                    llm_context_str = self.format_control_efficiency_for_llm(analysis_result)
                    
                    # 2. Send to the LLM with the dedicated prompt
                    final_answer = self.respond_with_llm(
                        llm_context_str,
                        original_query,
                        'explain_efficiency',
                        resolved_info
                    )
            else:
                final_answer = "To analyze control efficiency, I need a specific control action and date."
            final_date_for_state = resolved_info.get('date')
            final_time_for_state = resolved_info.get('start_time') or resolved_info.get('time') or time(0, 0)
            if final_date_for_state:
                final_timestamp_for_state = datetime.combine(final_date_for_state, final_time_for_state)
                self.update_dialogue_state(
                    original_query=original_query,
                    resolved_intent=resolved_intent,
                    resolved_primary_node=primary_node,
                    resolved_variable_code=variable_code,
                    resolved_query_term=query_term,
                    resolved_timestamp=final_timestamp_for_state,
                    executed_intent=intent_executed,
                    pattern_type=resolved_info.get('pattern_type'),
                    plot_generated=bool(plot_figure)
                )
            
            return final_answer, plot_figure
          
        elif resolved_intent == 'analyze_daily_total':
            intent_executed = 'analyze_daily_total'
            target_nodes = resolved_info.get('target_nodes')
            
            all_extracted_nodes = resolved_info.get('target_nodes')
            
            # 2. Intelligently FILTER this list for only the nodes relevant to THIS intent.
            #    For a "total" query, we only care about fluxes. A reliable way to identify
            #    them is by checking if their column name contains 'phi_'.
            flux_nodes_for_analysis = [
                node for node in all_extracted_nodes 
                if 'phi_' in self.node_to_column.get(node, '').lower()
            ]
            
            print(f"DEBUG (Intent Router): Filtered nodes for 'analyze_daily_total'. Kept: {flux_nodes_for_analysis}")

            # 3. Proceed with the analysis using ONLY the filtered, relevant nodes.
            if flux_nodes_for_analysis and resolved_date_obj:
                analysis_result = self.analyze_daily_total_and_comparison(
                    flux_nodes_for_analysis,  # Use the filtered list
                    resolved_date_obj
                )
                
                if 'error' in analysis_result:
                    final_answer = f"Error analyzing daily totals: {analysis_result['error']}"
                else:
                    llm_context_str = self.format_daily_total_for_llm(analysis_result)
                    final_answer = self.respond_with_llm(
                        llm_context_str,
                        original_query,
                        'analyze_daily_total',
                        resolved_info
                    )
                    
                    # Generate plot for the flux comparison
                    if resolved_date_obj and len(flux_nodes_for_analysis) > 0:
                        try:
                            if len(flux_nodes_for_analysis) == 1:
                                # Single flux plot
                                plot_fig = self.visualize_data(flux_nodes_for_analysis[0], resolved_date_obj)
                                if isinstance(plot_fig, go.Figure):
                                    plot_figure = plot_fig
                                    final_answer += f"\n\n📊 Here's the plot for {self.node_to_name.get(flux_nodes_for_analysis[0], flux_nodes_for_analysis[0])}:"
                            else:
                                # Multi-flux subplot plot
                                import plotly.subplots as sp
                                fig = sp.make_subplots(
                                    rows=len(flux_nodes_for_analysis),
                                    cols=1,
                                    shared_xaxes=True,
                                    subplot_titles=[self.node_to_name.get(node, node) for node in flux_nodes_for_analysis]
                                )
                                
                                var_data = self.data[self.data.index.date == resolved_date_obj]
                                if not var_data.empty:
                                    for i, node in enumerate(flux_nodes_for_analysis):
                                        column_name = self.node_to_column.get(node)
                                        if column_name and column_name in var_data.columns:
                                            fig.add_trace(
                                                go.Scatter(
                                                    x=var_data.index.tolist(),
                                                    y=var_data[column_name],
                                                    mode='lines',
                                                    name=self.node_to_name.get(node, node),
                                                    showlegend=False
                                                ),
                                                row=i+1, col=1
                                            )
                                
                                fig.update_layout(
                                    height=300 * len(flux_nodes_for_analysis),
                                    title_text=f"Flux Comparison on {resolved_date_obj.strftime('%Y-%m-%d')}",
                                    showlegend=False
                                )
                                
                                plot_figure = fig
                                flux_names = [self.node_to_name.get(node, node) for node in flux_nodes_for_analysis]
                                final_answer += f"\n\n📊 Here's a multi-panel plot comparing {', '.join(flux_names)}:"
                        except Exception as e:
                            print(f"Error creating plot for daily total analysis: {e}")
                            plot_figure = None
            else:
                final_answer = "To calculate and compare daily totals, I need at least two flux variables (like phi_QT_trans) and a specific date."
            final_date_for_state = resolved_info.get('date')
            final_time_for_state = resolved_info.get('start_time') or resolved_info.get('time') or time(0, 0)
            if final_date_for_state:
                final_timestamp_for_state = datetime.combine(final_date_for_state, final_time_for_state)
                self.update_dialogue_state(
                    original_query=original_query,
                    resolved_intent=resolved_intent,
                    resolved_primary_node=primary_node,
                    resolved_variable_code=variable_code,
                    resolved_query_term=query_term,
                    resolved_timestamp=final_timestamp_for_state,
                    executed_intent=intent_executed,
                    pattern_type=resolved_info.get('pattern_type'),
                    plot_generated=bool(plot_figure)
                )
            
            return final_answer, plot_figure
        
        elif resolved_intent == 'explain_strategy_shift':
            intent_executed = 'explain_strategy_shift'
            safety_node = resolved_info.get('primary_node')  # The safety action (ventilation/cooling)
            growth_node = resolved_info.get('target_nodes', [None])[0] if resolved_info.get('target_nodes') else None  # The growth action (CO2)
            
            if safety_node and resolved_date_obj and resolved_info.get('time'):
                action_timestamp = datetime.combine(resolved_date_obj, resolved_info.get('time'))
                
                # Check for afternoon strategy shift
                strategy_analysis = self._check_for_afternoon_strategy_shift(action_timestamp)
                
                if strategy_analysis:
                    # Format the strategy shift analysis for LLM
                    llm_context_str = (
                        f"--- Afternoon Strategy Shift Analysis ---\n\n"
                        f"**Primary Reason:** {strategy_analysis.get('primary_reason', 'Not available.')}\n\n"
                        f"**Causal Driver:** {strategy_analysis.get('causal_driver_type', 'Not available.')}\n\n"
                        f"**Supporting Evidence:**\n"
                    )
                    
                    for key, value in strategy_analysis.get('supporting_evidence', {}).items():
                        llm_context_str += f"- **{key}:** {value}\n"
                    
                    # Add observed effects if available
                    if 'observed_effects' in strategy_analysis:
                        llm_context_str += f"\n**Observed Effects:**\n{strategy_analysis['observed_effects']}"
                    
                    final_answer = self.respond_with_llm(
                        llm_context_str,
                        original_query,
                        'explain_strategy_shift',
                        resolved_info,
                        hca_focus_node=safety_node,
                        hca_focus_timestamp=action_timestamp
                    )
                    
                    # Generate plot showing the strategy shift
                    if resolved_date_obj:
                        try:
                            # Create a plot showing the relevant variables around the shift time
                            plot_vars = []
                            if safety_node:
                                plot_vars.append(safety_node)
                            if growth_node:
                                plot_vars.append(growth_node)
                            
                            if plot_vars:
                                var_data = self.data[self.data.index.date == resolved_date_obj]
                                if not var_data.empty:
                                    import plotly.subplots as sp
                                    fig = sp.make_subplots(
                                        rows=len(plot_vars),
                                        cols=1,
                                        shared_xaxes=True,
                                        subplot_titles=[self.node_to_name.get(var, var) for var in plot_vars]
                                    )
                                    
                                    for i, var in enumerate(plot_vars):
                                        column_name = self.node_to_column.get(var)
                                        if column_name and column_name in var_data.columns:
                                            fig.add_trace(
                                                go.Scatter(
                                                    x=var_data.index.tolist(),
                                                    y=var_data[column_name],
                                                    mode='lines',
                                                    name=self.node_to_name.get(var, var),
                                                    showlegend=False
                                                ),
                                                row=i+1, col=1
                                            )
                                    
                                    # Add vertical line at the strategy shift time
                                    for i in range(len(plot_vars)):
                                        fig.add_vline(
                                            x=action_timestamp,
                                            line=dict(color="red", width=2, dash="dash"),
                                            annotation_text="Strategy Shift",
                                            annotation_position="top right",
                                            row=i+1, col=1
                                        )
                                    
                                    fig.update_layout(
                                        height=300 * len(plot_vars),
                                        title_text=f"Strategy Shift at {action_timestamp.strftime('%H:%M')} on {resolved_date_obj.strftime('%Y-%m-%d')}",
                                        showlegend=False
                                    )
                                    
                                    plot_figure = fig
                                    var_names = [self.node_to_name.get(var, var) for var in plot_vars]
                                    final_answer += f"\n\n📊 Here's the strategy shift visualization showing {', '.join(var_names)}:"
                        except Exception as e:
                            print(f"Error creating plot for strategy shift: {e}")
                            plot_figure = None
                else:
                    final_answer = "This doesn't appear to be a classic afternoon strategy shift pattern."
            else:
                final_answer = "To explain a strategy shift, I need a specific time when the shift occurred."
            
            final_date_for_state = resolved_info.get('date')
            final_time_for_state = resolved_info.get('start_time') or resolved_info.get('time') or time(0, 0)
            if final_date_for_state:
                final_timestamp_for_state = datetime.combine(final_date_for_state, final_time_for_state)
                self.update_dialogue_state(
                    original_query=original_query,
                    resolved_intent=resolved_intent,
                    resolved_primary_node=primary_node,
                    resolved_variable_code=variable_code,
                    resolved_query_term=query_term,
                    resolved_timestamp=final_timestamp_for_state,
                    executed_intent=intent_executed,
                    pattern_type=resolved_info.get('pattern_type'),
                    plot_generated=bool(plot_figure)
                )
            
            return final_answer, plot_figure
        
        elif resolved_intent == 'explain_obstacle':
            intent_executed = 'explain_obstacle'
            state_node = resolved_info.get('primary_node')
            start_time_obj = resolved_info.get('start_time')
            end_time_obj = resolved_info.get('end_time')

            if state_node and resolved_date_obj and start_time_obj and end_time_obj:
                start_dt = datetime.combine(resolved_date_obj, start_time_obj)
                end_dt = datetime.combine(resolved_date_obj, end_time_obj)

                # 1. Call the new analysis function
                analysis = self.analyze_physical_obstacle(state_node, start_dt, end_dt)
                
                # 2. Format the result for the LLM
                llm_context = self.format_obstacle_for_llm(analysis)
                
                # 3. Send to the LLM with the new prompt
                final_answer = self.respond_with_llm(
                    llm_context,
                    original_query,
                    'explain_obstacle',
                    resolved_info,
                    hca_focus_node=state_node
                )
            else:
                final_answer = "To explain a physical obstacle, I need to know which variable was changing slowly and over what time period."
            final_date_for_state = resolved_info.get('date')
            final_time_for_state = resolved_info.get('start_time') or resolved_info.get('time') or time(0, 0)
            if final_date_for_state:
                final_timestamp_for_state = datetime.combine(final_date_for_state, final_time_for_state)
                self.update_dialogue_state(
                    original_query=original_query,
                    resolved_intent=resolved_intent,
                    resolved_primary_node=primary_node,
                    resolved_variable_code=variable_code,
                    resolved_query_term=query_term,
                    resolved_timestamp=final_timestamp_for_state,
                    executed_intent=intent_executed,
                    pattern_type=resolved_info.get('pattern_type')
                )
            
            return final_answer, plot_figure
            
        elif resolved_intent == 'find_and_explain_reaction':
            intent_executed = 'find_and_explain_reaction'
            disturbance_node = resolved_info.get('primary_node')
            reaction_nodes = resolved_info.get('target_nodes')
            plot_figure = None  # Initialize plot figure
            
            if disturbance_node and reaction_nodes and resolved_date_obj:
                analysis = self.find_and_explain_reaction(disturbance_node, reaction_nodes, resolved_date_obj)
                
                if 'error' in analysis:
                    final_answer = f"Error during analysis: {analysis['error']}"
                else:
                    # We need a dedicated formatter for this new analysis structure
                    llm_context = f"Event Description: {analysis['event']['description']}\n\nReactions:\n"
                    for reaction in analysis['reactions']:
                        llm_context += (f"- System: {reaction['name']}\n"
                                        f"  - Value Before: {reaction['value_before']:.2f}\n"
                                        f"  - Value At Event: {reaction['value_at']:.2f}\n")
                    
                    final_answer = self.respond_with_llm(
                        llm_context,
                        original_query,
                        'find_and_explain_reaction',
                        resolved_info,
                        hca_focus_node=disturbance_node,
                        hca_focus_timestamp=analysis.get('event', {}).get('timestamp')
                    )
                    
                    # Generate plot for the reaction analysis
                    if resolved_date_obj:
                        try:
                            # Create subplot for disturbance + reaction systems
                            plot_variables = [disturbance_node] + reaction_nodes
                            import plotly.subplots as sp
                            fig = sp.make_subplots(
                                rows=len(plot_variables), 
                                cols=1, 
                                shared_xaxes=True,
                                subplot_titles=[self.node_to_name.get(var, var) for var in plot_variables]
                            )
                            
                            event_timestamp = analysis.get('event', {}).get('timestamp')
                            
                            for i, var in enumerate(plot_variables):
                                var_data = self.data[self.data.index.date == resolved_date_obj]
                                if not var_data.empty:
                                    column_name = self.node_to_column.get(var)
                                    if column_name and column_name in var_data.columns:
                                        fig.add_trace(
                                            go.Scatter(
                                                x=var_data.index.tolist(),
                                                y=var_data[column_name],
                                                mode='lines',
                                                name=self.node_to_name.get(var, var),
                                                showlegend=False
                                            ),
                                            row=i+1, col=1
                                        )
                            
                            fig.update_layout(
                                height=300 * len(plot_variables),
                                title_text=f"System Reactions to {self.node_to_name.get(disturbance_node, disturbance_node)} Event on {resolved_date_obj.strftime('%Y-%m-%d')}",
                                showlegend=False
                            )
                            
                            # Add vertical line at event time
                            if event_timestamp:
                                for i in range(len(plot_variables)):
                                    fig.add_vline(
                                        x=event_timestamp,
                                        line=dict(color="red", width=2, dash="dash"),
                                        annotation_text=f"Event at {event_timestamp.strftime('%H:%M')}",
                                        annotation_position="top right",
                                        row=i+1, col=1
                                    )
                            
                            plot_figure = fig
                            final_answer += f"\n\nHere's the system reaction plot for the event on {resolved_date_obj.strftime('%Y-%m-%d')}:"
                        except Exception as e:
                            print(f"Error creating plot: {e}")
                            plot_figure = None
            else:
                final_answer = "To find an event and explain a reaction, I need a trigger variable (like solar radiation) and at least one system that reacted (like CO2 injection)."
            final_date_for_state = resolved_info.get('date')
            final_time_for_state = resolved_info.get('start_time') or resolved_info.get('time') or time(0, 0)
            if final_date_for_state:
                final_timestamp_for_state = datetime.combine(final_date_for_state, final_time_for_state)
                self.update_dialogue_state(
                    original_query=original_query,
                    resolved_intent=resolved_intent,
                    resolved_primary_node=primary_node,
                    resolved_variable_code=variable_code,
                    resolved_query_term=query_term,
                    resolved_timestamp=final_timestamp_for_state,
                    executed_intent=intent_executed,
                    pattern_type=resolved_info.get('pattern_type'),
                    plot_generated=bool(plot_figure)
                )
            
            return final_answer, plot_figure

        elif resolved_intent == 'evaluate_day_strategy':
            intent_executed = 'evaluate_day_strategy'
            
            if resolved_date_obj:
                # Call your new analysis function
                analysis_result = self.analyze_day_strategy(
                    resolved_date_obj,
                    resolved_info.get('target_nodes', [])
                )
                
                if 'error' in analysis_result:
                    final_answer = f"Error analyzing the daily strategy: {analysis_result['error']}"
                else:
                    # Use the new formatter to create the context
                    llm_context_str = self.format_day_strategy_for_llm(analysis_result,original_query)
                    
                    # You need a dedicated prompt for this high-level summary
                    # Let's call it 'summarize_day_strategy'
                    final_answer = self.respond_with_llm(
                        llm_context_str,
                        original_query,
                        'summarize_day_strategy',
                        resolved_info
                    )
                    if resolved_date_obj and mentioned_states:
                        # Create a multi-panel plot showing the variables involved in the trade-off
                        import plotly.subplots as sp
                        fig = sp.make_subplots(
                            rows=len(mentioned_states),
                            cols=1,
                            shared_xaxes=True,
                            subplot_titles=[self.node_to_name.get(var, var) for var in mentioned_states]
                        )

                        day_data = self.data[self.data.index.date == resolved_date_obj]
                        if not day_data.empty:
                            for i, var in enumerate(mentioned_states):
                                column_name = self.node_to_column.get(var)
                                if column_name and column_name in day_data.columns:
                                    fig.add_trace(
                                        go.Scatter(
                                            x=day_data.index.tolist(),
                                            y=day_data[column_name],
                                            mode='lines',
                                            name=self.node_to_name.get(var, var),
                                            showlegend=False
                                        ),
                                        row=i+1, col=1
                                    )

                            fig.update_layout(
                                height=300 * len(mentioned_states),
                                title_text=f"Day Strategy Analysis: {', '.join([self.node_to_name.get(var, var) for var in mentioned_states])} on {resolved_date_obj.strftime('%Y-%m-%d')}",
                                showlegend=False
                            )

                            plot_figure = fig
                            final_answer += f"\n\n📊 Here's a plot showing the variables involved in the day strategy analysis:"
                    # Check if the question mentions specific variables that should be visualized
                    target_nodes = resolved_info.get('target_nodes', [])
                    mentioned_co2 = any('co2' in node.lower() or 'uC' in node for node in target_nodes)
                    mentioned_co2_in_query = 'co₂' in query_lower or 'co2' in query_lower or 'carbon' in query_lower
                    
                    if mentioned_co2 or mentioned_co2_in_query:
                        # Generate CO2 injection plot
                        co2_plot = self.visualize_data('uC', resolved_date_obj)
                        if isinstance(co2_plot, go.Figure):
                            plot_figure = co2_plot
                            final_answer += "\n\nHere's the CO2 injection pattern for that day:"
            else:
                final_answer = "To evaluate the day's strategy, I need a specific date."
            final_date_for_state = resolved_info.get('date')
            final_time_for_state = resolved_info.get('start_time') or resolved_info.get('time') or time(0, 0)
            if final_date_for_state:
                final_timestamp_for_state = datetime.combine(final_date_for_state, final_time_for_state)
                self.update_dialogue_state(
                    original_query=original_query,
                    resolved_intent=resolved_intent,
                    resolved_primary_node=primary_node,
                    resolved_variable_code=variable_code,
                    resolved_query_term=query_term,
                    resolved_timestamp=final_timestamp_for_state,
                    executed_intent=intent_executed,
                    pattern_type=resolved_info.get('pattern_type')
                )
            
            return final_answer, plot_figure

        elif resolved_intent == 'explain_lagrangian_state':
            intent_executed = 'explain_lagrangian_state'
            lagrangian_nodes = resolved_info.get('target_nodes', [])
            
            if lagrangian_nodes and resolved_date_obj:
                analysis_result = self.analyze_lagrangian_state(lagrangian_nodes, resolved_date_obj)
                llm_context_str = self.format_lagrangian_state_for_llm(analysis_result)
                final_answer = self.respond_with_llm(
                    llm_context_str,
                    original_query,
                    'explain_lagrangian_state',
                    resolved_info
                )
                if resolved_date_obj and mentioned_states:
                        # Create a multi-panel plot showing the variables involved in the trade-off
                        import plotly.subplots as sp
                        fig = sp.make_subplots(
                            rows=len(mentioned_states),
                            cols=1,
                            shared_xaxes=True,
                            subplot_titles=[self.node_to_name.get(var, var) for var in mentioned_states]
                        )

                        day_data = self.data[self.data.index.date == resolved_date_obj]
                        if not day_data.empty:
                            for i, var in enumerate(mentioned_states):
                                column_name = self.node_to_column.get(var)
                                if column_name and column_name in day_data.columns:
                                    fig.add_trace(
                                        go.Scatter(
                                            x=day_data.index.tolist(),
                                            y=day_data[column_name],
                                            mode='lines',
                                            name=self.node_to_name.get(var, var),
                                            showlegend=False
                                        ),
                                        row=i+1, col=1
                                    )

                            fig.update_layout(
                                height=300 * len(mentioned_states),
                                title_text=f"Lagrangian Analysis: {', '.join([self.node_to_name.get(var, var) for var in mentioned_states])} on {resolved_date_obj.strftime('%Y-%m-%d')}",
                                showlegend=False
                            )

                            plot_figure = fig
                            final_answer += f"\n\n📊 Here's a plot showing the variables involved in the lagrangian analysis:"
            else:
                final_answer = "To explain a Lagrangian's state, I need the variable name and a date."
            final_date_for_state = resolved_info.get('date')
            final_time_for_state = resolved_info.get('start_time') or resolved_info.get('time') or time(0, 0)
            if final_date_for_state:
                final_timestamp_for_state = datetime.combine(final_date_for_state, final_time_for_state)
                self.update_dialogue_state(
                    original_query=original_query,
                    resolved_intent=resolved_intent,
                    resolved_primary_node=primary_node,
                    resolved_variable_code=variable_code,
                    resolved_query_term=query_term,
                    resolved_timestamp=final_timestamp_for_state,
                    executed_intent=intent_executed,
                    pattern_type=resolved_info.get('pattern_type')
                )
            
            return final_answer, plot_figure
                  
        elif resolved_intent == 'explain_anomaly':
            intent_executed = 'explain_anomaly'
            primary_node = resolved_info.get('primary_node')
            resolved_date_obj = resolved_info.get('date')
            
            is_range_query = resolved_info.get('start_time') and resolved_info.get('end_time')
            
            # --- PATH 1: Anomaly over a TIME RANGE (call the new PATTERN function) ---
            if is_range_query and primary_node and resolved_date_obj:
                print("DEBUG - Anomaly Intent: Range query detected. Routing to Pattern Analysis.")
                
                analysis_result = self.analyze_setpoint_anomaly_PATTERN(
                    setpoint_node=primary_node,
                    date=resolved_date_obj,
                    start_time=resolved_info.get('start_time'),
                    end_time=resolved_info.get('end_time')
                )
                
                if 'error' in analysis_result:
                    final_answer = analysis_result['error']
                else:
                    llm_context = self.format_setpoint_strategy_for_llm(analysis_result)
                    final_answer = self.respond_with_llm(
                        llm_context,
                        original_query,
                        'explain_anomaly',
                        resolved_info,
                        hca_focus_node=primary_node
                    )

            # --- PATH 2: Anomaly at a SINGLE TIME (could be a fault or a different strategy) ---
            elif is_single_time_query and primary_node and resolved_date_obj:
                print("DEBUG - Anomaly Intent: Single time query detected. Routing to single-point analysis pipeline.")
                timestamp = datetime.combine(resolved_date_obj, resolved_info.get('time'))
                
                # Run the hierarchical check for single-point anomalies
                analysis_result = self._check_for_midnight_reset_anomaly(timestamp)
                
                if not analysis_result:
                    analysis_result = self.analyze_system_fault(primary_node, timestamp)
                
                if not analysis_result or "No Specific Fault" in analysis_result.get('fault_type', ''):
                    setpoint_node_to_analyze = f"{primary_node}_ref"
                    analysis_result = self.analyze_setpoint_anomaly_strategy(setpoint_node_to_analyze, timestamp)
                
                # Now, format the result from whichever expert succeeded
                if 'error' in analysis_result:
                    final_answer = analysis_result['error']
                elif 'fault_type' in analysis_result and "No Specific Fault" not in analysis_result['fault_type']:
                    llm_context = self.format_fault_for_llm(analysis_result)
                    final_answer = self.respond_with_llm(
                        llm_context,
                        original_query,
                        'explain_anomaly_fault',
                        resolved_info,
                        hca_focus_node=primary_node,
                        hca_focus_timestamp=timestamp
                    )
                elif 'strategy_name' in analysis_result:
                    llm_context = self.format_setpoint_strategy_for_llm(analysis_result)
                    final_answer = self.respond_with_llm(
                        llm_context,
                        original_query,
                        'evaluate_setpoint_strategy',
                        resolved_info,
                        hca_focus_node=primary_node,
                        hca_focus_timestamp=timestamp
                    )
                else:
                    final_answer = "I analyzed the data at that specific moment but could not determine a specific cause for the anomaly."

            else:
                final_answer = "To analyze an unusual pattern, please provide a specific time for a fault, or a time range for a strategy."
            
                if resolved_date_obj and mentioned_states:
                            # Create a multi-panel plot showing the variables involved in the trade-off
                            import plotly.subplots as sp
                            fig = sp.make_subplots(
                                rows=len(mentioned_states),
                                cols=1,
                                shared_xaxes=True,
                                subplot_titles=[self.node_to_name.get(var, var) for var in mentioned_states]
                            )

                            day_data = self.data[self.data.index.date == resolved_date_obj]
                            if not day_data.empty:
                                for i, var in enumerate(mentioned_states):
                                    column_name = self.node_to_column.get(var)
                                    if column_name and column_name in day_data.columns:
                                        fig.add_trace(
                                            go.Scatter(
                                                x=day_data.index.tolist(),
                                                y=day_data[column_name],
                                                mode='lines',
                                                name=self.node_to_name.get(var, var),
                                                showlegend=False
                                            ),
                                            row=i+1, col=1
                                        )

                                fig.update_layout(
                                    height=300 * len(mentioned_states),
                                    title_text=f"Anomaly Analysis: {', '.join([self.node_to_name.get(var, var) for var in mentioned_states])} on {resolved_date_obj.strftime('%Y-%m-%d')}",
                                    showlegend=False
                                )

                                plot_figure = fig
                                final_answer += f"\n\n📊 Here's a plot showing the variables involved in the anomaly analysis:"

                # Fallback: If the user asks about an anomaly but gives ambiguous information.
                else:
                    final_answer = "To analyze an unusual pattern, please be more specific. For a strategy, provide a time range (e.g., 'from 10:00 to 12:00'). For a specific event, provide a single time (e.g., 'at 13:40')."
        
            # Update dialogue state (this part remains the same)
            final_date_for_state = resolved_info.get('date')
            final_time_for_state = resolved_info.get('start_time') or resolved_info.get('time') or time(0, 0)
            if final_date_for_state and final_time_for_state:
                final_timestamp_for_state = datetime.combine(final_date_for_state, final_time_for_state)
                self.update_dialogue_state(
                    original_query=original_query,
                    resolved_intent=resolved_intent,
                    resolved_primary_node=primary_node,
                    resolved_variable_code=resolved_info.get('variable_code'),
                    resolved_query_term=resolved_info.get('query_term'),
                    resolved_timestamp=final_timestamp_for_state,
                    executed_intent=intent_executed,
                    pattern_type=resolved_info.get('pattern_type')
                )
            
            return final_answer, plot_figure
        
        elif resolved_intent == 'analyze_model_discrepancy':
            intent_executed = 'analyze_model_discrepancy'
            all_mentioned_nodes = resolved_info.get('target_nodes', []) + [resolved_info.get('primary_node')]
            if resolved_info.get('variable_code'):
                all_mentioned_nodes.append(resolved_info.get('variable_code'))
    
            # 1. Intelligently find the STATE node.
            # The user's query is about a state (like Temperature), which we can infer from
            # the mention of its equality Lagrangian (e.g., 'T_eq').
            state_node = None
            for node in all_mentioned_nodes:
                if node and '_eq' in node: # e.g., if we find 'T_eq'
                    state_node = node.split('_')[0] # Extract the base state 'T'
                    break
            # If not found that way, try finding a base state directly
            if not state_node:
                for node in all_mentioned_nodes:
                    if node and node in self.base_state_variables:
                        state_node = node
                        break

            # 2. Intelligently find the CONTROL node.
            # The user mentioned 'heat_ref' or 'heating', which maps to 'uQh'.
            control_node = None
            for node in all_mentioned_nodes:
                if node:
                    base_code = self.get_control_action_base_code(node) # Handles 'heat_ref' -> 'uQh'
                    if base_code in self.control_action_nodes:
                        control_node = base_code
                        break
            
            # --- INTELLIGENT TIME WINDOW CREATION ---
            query_lower = resolved_info.get('query', '').lower()
            
            # Check for explicit datetimes from parser
            datetimes = resolved_info.get('datetimes', [])
            if len(datetimes) >= 2:
                start_dt = datetimes[0]['datetime']
                end_dt = datetimes[1]['datetime']
                print(f"DEBUG (Model Discrepancy Router): Using explicit time range from parser: {start_dt} to {end_dt}")
            elif len(datetimes) == 1:
                # Single timestamp - create a window around it
                timestamp = datetimes[0]['datetime']
                start_dt = timestamp - timedelta(minutes=30)
                end_dt = timestamp + timedelta(minutes=30)
                print(f"DEBUG (Model Discrepancy Router): Using window around single timestamp: {start_dt} to {end_dt}")
            else:
                # If the parser found an explicit range, use it.
                if resolved_info.get('start_time') and resolved_info.get('end_time'):
                    start_time_obj = resolved_info.get('start_time')
                    end_time_obj = resolved_info.get('end_time')
                    start_dt = datetime.combine(resolved_date_obj, start_time_obj)
                    end_dt = datetime.combine(resolved_date_obj, end_time_obj)
                    print(f"DEBUG (Model Discrepancy Router): Using start_time/end_time from parser: {start_dt} to {end_dt}")
                else:
                    # Check for time qualifiers in the query
                    if 'afternoon' in query_lower:
                        # Use afternoon hours: 12:00 to 18:00
                        start_time_obj = time(12, 0)
                        end_time_obj = time(18, 0)
                        print("DEBUG (Model Discrepancy Router): Using afternoon hours (12:00-18:00) for analysis.")
                    elif 'morning' in query_lower:
                        # Use morning hours: 06:00 to 12:00
                        start_time_obj = time(6, 0)
                        end_time_obj = time(12, 0)
                        print("DEBUG (Model Discrepancy Router): Using morning hours (06:00-12:00) for analysis.")
                    elif 'evening' in query_lower or 'night' in query_lower:
                        # Use evening/night hours: 18:00 to 06:00 next day
                        start_time_obj = time(18, 0)
                        end_time_obj = time(6, 0)
                        print("DEBUG (Model Discrepancy Router): Using evening/night hours (18:00-06:00) for analysis.")
                    else:
                        # Default to night as before
                        print("DEBUG (Model Discrepancy Router): No explicit time range found. Creating default 'night' window (02:00-06:00).")
                        start_time_obj = time(2, 0)
                        end_time_obj = time(6, 0)
                    
                    start_dt = datetime.combine(resolved_date_obj, start_time_obj)
                    end_dt = datetime.combine(resolved_date_obj, end_time_obj)
                
            if control_node and state_node and resolved_date_obj:
                analysis = self.analyze_model_discrepancy(control_node, state_node, start_dt, end_dt, original_query)
                llm_context = analysis

                final_answer = self.respond_with_llm(
                    llm_context,
                    original_query,
                    'analyze_model_discrepancy',
                    resolved_info,
                    hca_focus_node=state_node,
                    hca_focus_timestamp=start_dt + (end_dt - start_dt) / 2 if start_dt and end_dt else None
                )
                if resolved_date_obj and mentioned_states:
                        # Create a multi-panel plot showing the variables involved in the trade-off
                        import plotly.subplots as sp
                        fig = sp.make_subplots(
                            rows=len(mentioned_states),
                            cols=1,
                            shared_xaxes=True,
                            subplot_titles=[self.node_to_name.get(var, var) for var in mentioned_states]
                        )

                        day_data = self.data[self.data.index.date == resolved_date_obj]
                        if not day_data.empty:
                            for i, var in enumerate(mentioned_states):
                                column_name = self.node_to_column.get(var)
                                if column_name and column_name in day_data.columns:
                                    fig.add_trace(
                                        go.Scatter(
                                            x=day_data.index.tolist(),
                                            y=day_data[column_name],
                                            mode='lines',
                                            name=self.node_to_name.get(var, var),
                                            showlegend=False
                                        ),
                                        row=i+1, col=1
                                    )

                            fig.update_layout(
                                height=300 * len(mentioned_states),
                                title_text=f"Model Discrepancy Analysis: {', '.join([self.node_to_name.get(var, var) for var in mentioned_states])} on {resolved_date_obj.strftime('%Y-%m-%d')}",
                                showlegend=False
                            )

                            plot_figure = fig
                            final_answer += f"\n\n📊 Here's a plot showing the variables involved in the model discrepancy analysis:"
            else:
                final_answer = "To diagnose a model discrepancy, I need a control action, a state variable, and a time range to compare."
            final_date_for_state = resolved_info.get('date')
            final_time_for_state = resolved_info.get('start_time') or resolved_info.get('time') or time(0, 0)
            if final_date_for_state:
                final_timestamp_for_state = datetime.combine(final_date_for_state, final_time_for_state)
                self.update_dialogue_state(
                    original_query=original_query,
                    resolved_intent=resolved_intent,
                    resolved_primary_node=primary_node,
                    resolved_variable_code=variable_code,
                    resolved_query_term=query_term,
                    resolved_timestamp=final_timestamp_for_state,
                    executed_intent=intent_executed,
                    pattern_type=resolved_info.get('pattern_type')
                )
            
            return final_answer, plot_figure
            
        elif resolved_intent == 'explain_net_effect':
            intent_executed = 'explain_net_effect'
            flux_nodes = resolved_info.get('target_nodes')
            target_control = resolved_info.get('primary_node')
     
            start_dt, end_dt = None, None
            
            # Scenario 1: User provided an explicit range
            if resolved_info.get('start_time') and resolved_info.get('end_time'):
                start_time_obj = resolved_info.get('start_time')
                end_time_obj = resolved_info.get('end_time')
                start_dt = datetime.combine(resolved_date_obj, start_time_obj)
                end_dt = datetime.combine(resolved_date_obj, end_time_obj)

            # Scenario 2: User provided a single timestamp
            elif resolved_info.get('time'):
                # Create a small, sensible window around the single point for analysis (e.g., +/- 15 mins)
                center_time = resolved_info.get('time')
                center_dt = datetime.combine(resolved_date_obj, center_time)
                start_dt = center_dt - timedelta(minutes=15)
                end_dt = center_dt + timedelta(minutes=15)
                print(f"DEBUG (Net Effect): Single timestamp detected. Creating analysis window from {start_dt.time()} to {end_dt.time()}.")

            # Now, proceed with analysis if we have a valid window
            if all([flux_nodes, target_control, resolved_date_obj, start_dt, end_dt]):
                analysis_result = self.analyze_competing_fluxes(flux_nodes, target_control, start_dt, end_dt)

                if 'error' in analysis_result:
                    final_answer = f"Error analyzing competing forces: {analysis_result['error']}"
                else:
                    # Formatting logic (this was already correct)
                    evidence = analysis_result.get('evidence', {})
                    competing_forces_str = ""
                    for force_data in evidence.get("Competing Forces", {}).values():
                        competing_forces_str += f"- The **{force_data['name']}** provided a **{force_data['effect']}** effect of approximately **{force_data['average_value']:.2f} W/m²**.\n"
                    
                    llm_context_str = (
                        f"--- Analysis of Competing Forces ---\n\n"
                        f"**Conclusion:** {analysis_result.get('conclusion', 'Not available.')}\n\n"
                        f"**Supporting Evidence:**\n"
                        f"**1. The Competing Forces:**\n{competing_forces_str}"
                        f"**2. The Net Effect:** {evidence.get('Net Effect', 'N/A')}\n"
                        f"**3. The System's Response:** {evidence.get('System Response', 'N/A')}"
                    )
                    final_answer = self.respond_with_llm(
                        llm_context_str,
                        original_query,
                        'explain_net_effect',
                        resolved_info,
                        hca_focus_node=target_control,
                        hca_focus_timestamp=start_dt + (end_dt - start_dt) / 2 if start_dt and end_dt else None
                    )
                    if resolved_date_obj and mentioned_states:
                        # Create a multi-panel plot showing the variables involved in the trade-off
                        import plotly.subplots as sp
                        fig = sp.make_subplots(
                            rows=len(mentioned_states),
                            cols=1,
                            shared_xaxes=True,
                            subplot_titles=[self.node_to_name.get(var, var) for var in mentioned_states]
                        )

                        day_data = self.data[self.data.index.date == resolved_date_obj]
                        if not day_data.empty:
                            for i, var in enumerate(mentioned_states):
                                column_name = self.node_to_column.get(var)
                                if column_name and column_name in day_data.columns:
                                    fig.add_trace(
                                        go.Scatter(
                                            x=day_data.index.tolist(),
                                            y=day_data[column_name],
                                            mode='lines',
                                            name=self.node_to_name.get(var, var),
                                            showlegend=False
                                        ),
                                        row=i+1, col=1
                                    )

                            fig.update_layout(
                                height=300 * len(mentioned_states),
                                title_text=f"Net Effect Analysis: {', '.join([self.node_to_name.get(var, var) for var in mentioned_states])} on {resolved_date_obj.strftime('%Y-%m-%d')}",
                                showlegend=False
                            )

                            plot_figure = fig
                            final_answer += f"\n\n📊 Here's a plot showing the variables involved in the control analysis:"
            else:
                final_answer = "To analyze the net effect of competing forces, please specify at least two fluxes, a target control system, and a time or time range."
            final_date_for_state = resolved_info.get('date')
            final_time_for_state = resolved_info.get('start_time') or resolved_info.get('time') or time(0, 0)
            if final_date_for_state:
                final_timestamp_for_state = datetime.combine(final_date_for_state, final_time_for_state)
                self.update_dialogue_state(
                    original_query=original_query,
                    resolved_intent=resolved_intent,
                    resolved_primary_node=primary_node,
                    resolved_variable_code=variable_code,
                    resolved_query_term=query_term,
                    resolved_timestamp=final_timestamp_for_state,
                    executed_intent=intent_executed,
                    pattern_type=resolved_info.get('pattern_type')
                )
            
            return final_answer, plot_figure
        
        elif resolved_intent == 'explain_oscillation':
            intent_executed = 'explain_oscillation'
            variable_node = resolved_info.get('primary_node')
            start_time_obj = resolved_info.get('start_time')
            end_time_obj = resolved_info.get('end_time')
            time_obj = resolved_info.get('time')

            # Handle single time by creating a 2-hour window around it
            if not start_time_obj and not end_time_obj and time_obj:
                # Create a 2-hour window around the specified time
                time_dt = datetime.combine(resolved_date_obj, time_obj) if resolved_date_obj else datetime.combine(datetime.now().date(), time_obj)
                start_dt = time_dt - timedelta(hours=1)
                end_dt = time_dt + timedelta(hours=1)
            elif variable_node and resolved_date_obj and start_time_obj and end_time_obj:
                start_dt = datetime.combine(resolved_date_obj, start_time_obj)
                end_dt = datetime.combine(resolved_date_obj, end_time_obj)
            else:
                start_dt = end_dt = None

            if variable_node and start_dt and end_dt:
                analysis = self.analyze_oscillation(variable_node, start_dt, end_dt)
                
                llm_context = self.format_oscillation_for_llm(analysis)

                final_answer = self.respond_with_llm(
                    llm_context,
                    original_query,
                    'explain_oscillation',
                    resolved_info,
                    hca_focus_node=variable_node,
                    hca_focus_timestamp=start_dt + (end_dt - start_dt) / 2
                )
                if resolved_date_obj and mentioned_states:
                        # Create a multi-panel plot showing the variables involved in the trade-off
                        import plotly.subplots as sp
                        fig = sp.make_subplots(
                            rows=len(mentioned_states),
                            cols=1,
                            shared_xaxes=True,
                            subplot_titles=[self.node_to_name.get(var, var) for var in mentioned_states]
                        )

                        day_data = self.data[self.data.index.date == resolved_date_obj]
                        if not day_data.empty:
                            for i, var in enumerate(mentioned_states):
                                column_name = self.node_to_column.get(var)
                                if column_name and column_name in day_data.columns:
                                    fig.add_trace(
                                        go.Scatter(
                                            x=day_data.index.tolist(),
                                            y=day_data[column_name],
                                            mode='lines',
                                            name=self.node_to_name.get(var, var),
                                            showlegend=False
                                        ),
                                        row=i+1, col=1
                                    )

                            fig.update_layout(
                                height=300 * len(mentioned_states),
                                title_text=f"Oscillation Analysis: {', '.join([self.node_to_name.get(var, var) for var in mentioned_states])} on {resolved_date_obj.strftime('%Y-%m-%d')}",
                                showlegend=False
                            )

                            plot_figure = fig
                            final_answer += f"\n\n📊 Here's a plot showing the variables involved in the oscillation analysis:"
            else:
                final_answer = "To analyze an oscillation, I need the variable name and a specific time range."
            final_date_for_state = resolved_info.get('date')
            final_time_for_state = resolved_info.get('start_time') or resolved_info.get('time') or time(0, 0)
            if final_date_for_state:
                final_timestamp_for_state = datetime.combine(final_date_for_state, final_time_for_state)
                self.update_dialogue_state(
                    original_query=original_query,
                    resolved_intent=resolved_intent,
                    resolved_primary_node=primary_node,
                    resolved_variable_code=variable_code,
                    resolved_query_term=query_term,
                    resolved_timestamp=final_timestamp_for_state,
                    executed_intent=intent_executed,
                    pattern_type=resolved_info.get('pattern_type')
                )
            
            return final_answer, plot_figure
        
        elif resolved_intent == 'evaluate_control_strategy':
            intent_executed = 'evaluate_control_strategy'
            start_time_obj = resolved_info.get('start_time')
            end_time_obj = resolved_info.get('end_time')
            control_node = resolved_info.get('primary_node')
            target_nodes = resolved_info.get('target_nodes')
            resolved_date_obj = resolved_info.get('date')
            if all([control_node, target_nodes, resolved_date_obj, start_time_obj, end_time_obj]):
                start_dt = datetime.combine(resolved_date_obj, start_time_obj)
                end_dt = datetime.combine(resolved_date_obj, end_time_obj)
                analysis_result = self.evaluate_control_strategy(control_node, target_nodes, start_dt, end_dt)
                if 'error' in analysis_result:
                    final_answer = f"Error during strategy evaluation: {analysis_result['error']}"
                else:
                    evidence = analysis_result.get('evidence', {})
                    evidence_str = "\n".join([f"- **{key}:** {value}" for key, value in evidence.items()])
                    llm_context_str = (
                        f"--- Control Strategy Performance Evaluation ---\n\n"
                        f"**Conclusion:** {analysis_result.get('conclusion', 'Not available.')}\n\n"
                        f"**Supporting Evidence:**\n{evidence_str}"
                    )
                    final_answer = self.respond_with_llm(
                        llm_context_str,
                        original_query,
                        'evaluate_control_strategy',
                        resolved_info,
                        hca_focus_node=control_node,
                        hca_focus_timestamp=start_dt + (end_dt - start_dt) / 2
                    )
                    if resolved_date_obj and mentioned_states:
                        # Create a multi-panel plot showing the variables involved in the trade-off
                        import plotly.subplots as sp
                        fig = sp.make_subplots(
                            rows=len(mentioned_states),
                            cols=1,
                            shared_xaxes=True,
                            subplot_titles=[self.node_to_name.get(var, var) for var in mentioned_states]
                        )

                        day_data = self.data[self.data.index.date == resolved_date_obj]
                        if not day_data.empty:
                            for i, var in enumerate(mentioned_states):
                                column_name = self.node_to_column.get(var)
                                if column_name and column_name in day_data.columns:
                                    fig.add_trace(
                                        go.Scatter(
                                            x=day_data.index.tolist(),
                                            y=day_data[column_name],
                                            mode='lines',
                                            name=self.node_to_name.get(var, var),
                                            showlegend=False
                                        ),
                                        row=i+1, col=1
                                    )

                            fig.update_layout(
                                height=300 * len(mentioned_states),
                                title_text=f"Control Analysis: {', '.join([self.node_to_name.get(var, var) for var in mentioned_states])} on {resolved_date_obj.strftime('%Y-%m-%d')}",
                                showlegend=False
                            )

                            plot_figure = fig
                            final_answer += f"\n\n📊 Here's a plot showing the variables involved in the analysis:"
            else:
                final_answer = "To evaluate a control strategy, I need the control action, its target, and a time range."
            final_date_for_state = resolved_info.get('date')
            final_time_for_state = resolved_info.get('start_time') or resolved_info.get('time') or time(0, 0)
            if final_date_for_state:
                final_timestamp_for_state = datetime.combine(final_date_for_state, final_time_for_state)
                self.update_dialogue_state(
                    original_query=original_query,
                    resolved_intent=resolved_intent,
                    resolved_primary_node=primary_node,
                    resolved_variable_code=variable_code,
                    resolved_query_term=query_term,
                    resolved_timestamp=final_timestamp_for_state,
                    executed_intent=intent_executed,
                    pattern_type=resolved_info.get('pattern_type')
                )
            
            return final_answer, plot_figure
            
        elif resolved_intent == 'explain_trade_off':
            intent_executed = 'explain_trade_off'
            start_time_obj = resolved_info.get('start_time')
            end_time_obj = resolved_info.get('end_time')
            mentioned_states = [self.column_to_node.get(node, node) for node in resolved_info.get('target_nodes', [])]
            query_lower = resolved_info.get('query', '').lower()
            
            # Handle cases where no specific time range is provided
            if len(mentioned_states) >= 2 and resolved_date_obj and not start_time_obj and not end_time_obj:
                # Check for time qualifiers in the query
                if 'afternoon' in query_lower:
                    # Use afternoon hours: 12:00 to 18:00
                    start_time_obj = time(12, 0)
                    end_time_obj = time(18, 0)
                    print("DEBUG - Trade-off analysis: Using afternoon hours (12:00-18:00) for analysis")
                elif 'morning' in query_lower:
                    # Use morning hours: 06:00 to 12:00
                    start_time_obj = time(6, 0)
                    end_time_obj = time(12, 0)
                    print("DEBUG - Trade-off analysis: Using morning hours (06:00-12:00) for analysis")
                elif 'evening' in query_lower or 'night' in query_lower:
                    # Use evening/night hours: 18:00 to 06:00 next day
                    start_time_obj = time(18, 0)
                    end_time_obj = time(6, 0)
                    print("DEBUG - Trade-off analysis: Using evening/night hours (18:00-06:00) for analysis")
                else:
                    # Use whole day: 00:00 to 23:59
                    start_time_obj = time(0, 0)
                    end_time_obj = time(23, 59)
                    print("DEBUG - Trade-off analysis: Using whole day (00:00-23:59) for analysis")
            
            if len(mentioned_states) >= 2 and resolved_date_obj and start_time_obj and end_time_obj:
                start_dt = datetime.combine(resolved_date_obj, start_time_obj)
                end_dt = datetime.combine(resolved_date_obj, end_time_obj)

                # The analysis function is already smart enough to handle both scenarios
                analysis_result = self.analyze_trade_off(mentioned_states, start_dt, end_dt)

                if 'error' in analysis_result:
                    final_answer = f"Error during trade-off analysis: {analysis_result['error']}"
                else:
                    evidence = analysis_result.get('evidence', {})
                    evidence_str = "\n".join([f"- **{key}:** {value}" for key, value in evidence.items()])
                    llm_context_str = (
                        f"--- Controller Strategy Evaluation ---\n\n"
                        f"**Conclusion:** {analysis_result.get('conclusion', 'Not available.')}\n\n"
                        f"**Supporting Evidence:**\n{evidence_str}"
                    )
                    final_prompt_intent = 'evaluate_trade_off' if 'B' in mentioned_states else 'explain_trade_off'

                    print(f"DEBUG - Routing to LLM with specialized prompt: '{final_prompt_intent}'")

                    final_answer = self.respond_with_llm(
                        llm_context_str,
                        original_query,
                        final_prompt_intent,
                        resolved_info,
                        hca_focus_node=mentioned_states[0] if mentioned_states else None,
                        hca_focus_timestamp=start_dt + (end_dt - start_dt) / 2
                    )
                    
                    # Generate plot for the trade-off analysis variables
                    if resolved_date_obj and mentioned_states:
                        # Create a multi-panel plot showing the variables involved in the trade-off
                        import plotly.subplots as sp
                        fig = sp.make_subplots(
                            rows=len(mentioned_states),
                            cols=1,
                            shared_xaxes=True,
                            subplot_titles=[self.node_to_name.get(var, var) for var in mentioned_states]
                        )

                        day_data = self.data[self.data.index.date == resolved_date_obj]
                        if not day_data.empty:
                            for i, var in enumerate(mentioned_states):
                                column_name = self.node_to_column.get(var)
                                if column_name and column_name in day_data.columns:
                                    fig.add_trace(
                                        go.Scatter(
                                            x=day_data.index.tolist(),
                                            y=day_data[column_name],
                                            mode='lines',
                                            name=self.node_to_name.get(var, var),
                                            showlegend=False
                                        ),
                                        row=i+1, col=1
                                    )

                            fig.update_layout(
                                height=300 * len(mentioned_states),
                                title_text=f"Trade-off Analysis: {', '.join([self.node_to_name.get(var, var) for var in mentioned_states])} on {resolved_date_obj.strftime('%Y-%m-%d')}",
                                showlegend=False
                            )

                            plot_figure = fig
                            final_answer += f"\n\n📊 Here's a plot showing the variables involved in the trade-off analysis:"
            else:
                final_answer = "To analyze a trade-off, please mention at least two variables and provide a specific time range."
            final_date_for_state = resolved_info.get('date')
            final_time_for_state = resolved_info.get('start_time') or resolved_info.get('time') or time(0, 0)
            if final_date_for_state:
                final_timestamp_for_state = datetime.combine(final_date_for_state, final_time_for_state)
                self.update_dialogue_state(
                    original_query=original_query,
                    resolved_intent=resolved_intent,
                    resolved_primary_node=primary_node,
                    resolved_variable_code=variable_code,
                    resolved_query_term=query_term,
                    resolved_timestamp=final_timestamp_for_state,
                    executed_intent=intent_executed,
                    pattern_type=resolved_info.get('pattern_type')
                )
            
            return final_answer, plot_figure
           
        elif resolved_intent == 'explain_control_action':
            intent_executed = resolved_intent
            action_analysis_results = None
            start_time = resolved_info.get('start_time')
            end_time = resolved_info.get('end_time')
            resolved_date_obj = resolved_info.get('date')
            
            # **FINAL, CORRECTED ROUTING LOGIC**
            if primary_node and resolved_date_obj and start_time and end_time:
                # PRIORITY 1: A time range was specified. Use the new aggregate analyzer.
                start_dt = datetime.combine(resolved_date_obj, start_time)
                end_dt = datetime.combine(resolved_date_obj, end_time)
                print(f"Executing AGGREGATE analysis for RANGE query from {start_dt} to {end_dt}")
                other_nodes_in_query = [node for node in resolved_info.get('target_nodes', []) if node != primary_node]
                
                action_analysis_results = self._analyze_control_action_over_range(
                    primary_node, start_dt, end_dt,other_nodes_in_query)

            elif primary_node and resolved_date_obj and resolved_info.get('time'):
                # PRIORITY 2 (FALLBACK): A single, specific time was specified. Use the single-event analyzer.
                action_time = resolved_info.get('time') or time(0, 0)
                action_timestamp = datetime.combine(resolved_date_obj, action_time)
                print(f"Executing SINGLE-EVENT analysis for timestamp: {action_timestamp}")
                action_analysis_results = self.analyze_control_action_event(
                    control_action_node=primary_node,
                    timestamp=action_timestamp,
                    original_query=original_query,
                    ablation_mode=self.ablation_mode
                )

            elif primary_node and resolved_date_obj:
                # PRIORITY 3: Only date specified, analyze full day
                start_dt = datetime.combine(resolved_date_obj, time(0, 0))
                end_dt = datetime.combine(resolved_date_obj, time(23, 59))
                print(f"Executing FULL DAY analysis for {resolved_date_obj} from {start_dt} to {end_dt}")
                other_nodes_in_query = [node for node in resolved_info.get('target_nodes', []) if node != primary_node]
                
                action_analysis_results = self._analyze_control_action_over_range(
                    primary_node, start_dt, end_dt, other_nodes_in_query)
            
            # Now, format and send the results from whichever analysis was performed.
            if action_analysis_results:
                print("\n" + "="*20 + " FULL ANALYSIS RESULT (before formatting control action event) " + "="*20)
                import json
                print(json.dumps(action_analysis_results, indent=2, default=str))
                print("="*75 + "\n")

                # PHYSICAL PLAUSIBILITY VALIDATION - Before LLM formatting
                if 'error' not in action_analysis_results:
                    validation_result = self.validator.validate_explanation(
                        action_analysis_results,
                        action_analysis_results.get('timestamp')
                    )
                    print(f"\n🔍 VALIDATION RESULT: Valid={validation_result['is_valid']}, "
                          f"Confidence={validation_result['confidence']:.2f}")
                    if validation_result['issues']:
                        print("ISSUES FOUND:")
                        for issue in validation_result['issues']:
                            print(f"  - {issue}")
                    print()

                    # Log validation for monitoring
                    self.validation_logger.log_validation(
                        query=original_query,
                        control_action=primary_node,
                        validation_result=validation_result
                    )

                if 'error' in action_analysis_results:
                    final_answer = f"Error analyzing control action: {action_analysis_results['error']}"
                else:
                    # Include validation context in LLM prompt
                    validation_context = ""
                    if 'validation_result' in locals() and not validation_result['is_valid']:
                        validation_context = f"\n\nVALIDATION ISSUES DETECTED:\n"
                        validation_context += f"Confidence: {validation_result['confidence']:.2f}\n"
                        validation_context += "Issues:\n"
                        for issue in validation_result['issues']:
                            validation_context += f"- {issue}\n"
                        validation_context += "\nRecommendations:\n"
                        for rec in validation_result['recommendations']:
                            validation_context += f"- {rec}\n"
                        validation_context += "\nPlease acknowledge these validation issues in your response and consider alternative explanations if the primary analysis seems physically implausible."

                    # The standard formatter works for both single-event and range-based results
                    llm_context_str = self.format_control_action_context_for_llm(action_analysis_results)
                    llm_context_str += validation_context  # Add validation context

                    prompt_intent = 'explain_control_action'
                    # But if a specific, high-level strategy was found, use a specialized prompt
                    if action_analysis_results.get('causal_driver_type') == 'Coordinated Action (Free Heating Maneuver)':
                        prompt_intent = 'explain_kickstart_maneuver'
                    # <<< END OF NEW LOGIC >>>

                    final_answer = self.respond_with_llm(
                        llm_context_str,
                        original_query,
                        prompt_intent,
                        resolved_info,
                        hca_focus_node=primary_node
                    )

                    # Update validation log with final explanation
                    if 'validation_result' in locals():
                        self.validation_logger.log_validation(
                            query=original_query,
                            control_action=primary_node,
                            validation_result=validation_result,
                            final_explanation=final_answer
                        )
                    # Generate plot for control action analysis
                    if primary_node and resolved_date_obj:
                        # Check if this is a multi-variable control action question
                        target_nodes = resolved_info.get('target_nodes', [])
                        control_variables = [node for node in [primary_node] + target_nodes if node in self.control_action_nodes]
                        control_variables = list(dict.fromkeys(control_variables))  # Remove duplicates while preserving order
                        
                        if len(control_variables) > 1:
                            # Multi-variable subplot plot for control actions
                            import plotly.subplots as sp
                            fig = sp.make_subplots(
                                rows=len(control_variables),
                                cols=1,
                                shared_xaxes=True,
                                subplot_titles=[self.node_to_name.get(var, var) for var in control_variables]
                            )
                            
                            var_data = self.data[self.data.index.date == resolved_date_obj]
                            if not var_data.empty:
                                for i, var in enumerate(control_variables):
                                    column_name = self.node_to_column.get(var)
                                    if column_name and column_name in var_data.columns:
                                        fig.add_trace(
                                            go.Scatter(
                                                x=var_data.index.tolist(),
                                                y=var_data[column_name],
                                                mode='lines',
                                                name=self.node_to_name.get(var, var),
                                                showlegend=False
                                            ),
                                            row=i+1, col=1
                                        )
                            
                            fig.update_layout(
                                height=300 * len(control_variables),
                                title_text=f"Control Actions Analysis on {resolved_date_obj.strftime('%Y-%m-%d')}",
                                showlegend=False
                            )
                            
                            plot_figure = fig
                            var_names = [self.node_to_name.get(var, var) for var in control_variables]
                            final_answer += f"\n\n📊 Here's a multi-panel plot showing {', '.join(var_names)}:"
                        else:
                            # Single variable plot
                            plot_fig = self.visualize_data(primary_node, resolved_date_obj)
                            if isinstance(plot_fig, go.Figure):
                                plot_figure = plot_fig
                                final_answer += f"\n\nHere's the plot for {query_term or primary_node} on {resolved_date_obj.strftime('%Y-%m-%d')}:"
                    
                    # Update dialogue state before returning
                    final_date_for_state = resolved_info.get('date')
                    final_time_for_state = resolved_info.get('start_time') or resolved_info.get('time')
                    if final_date_for_state and final_time_for_state:
                        final_timestamp_for_state = datetime.combine(final_date_for_state, final_time_for_state)
                        self.update_dialogue_state(
                            original_query=original_query,
                            resolved_intent=resolved_intent,
                            resolved_primary_node=primary_node,
                            resolved_variable_code=variable_code,
                            resolved_query_term=query_term,
                            resolved_timestamp=final_timestamp_for_state,
                            executed_intent=intent_executed,
                            pattern_type=resolved_info.get('pattern_type'),
                            plot_generated=bool(plot_figure)
                        )
                        print(f"DEBUG - Dialogue state successfully updated for explain_control_action. New last_timestamp: {final_timestamp_for_state}")
                    
                    return final_answer, plot_figure
            else:
                final_answer = "To explain a control action, I need the specific action and a time or time range."

            

        # --- Check for Specific Event Query (Highest Priority) ---
        analyze_specific_event = False
        event_to_analyze = None # Store the matched event details from state

        # Check if the query text or resolved intent suggests asking about an event explanation
        is_explanation_query = resolved_intent in ['explain', 'why'] or \
                               any(word in query_lower for word in ['why', 'explain', 'because']) or \
                               any(pt in query_lower for pt in ['sudden change', 'sudden increase', 'sudden decrease'])

        if is_explanation_query and primary_node and resolved_time_obj and resolved_date_obj:
            # Check if this time and variable match any recently detected sudden changes
            last_events = self.dialogue_state.get('last_detected_events', [])
            # Format the resolved time object into HH:MM string for comparison with stored events
            resolved_time_str = resolved_time_obj.strftime('%H:%M')
            resolved_date_date = resolved_date_obj # Keep as date object for comparison

            # Iterate through stored events from most recent to oldest
            for event in reversed(last_events): # Check in reverse order for recency preference
                # Check if variable, timestamp string, and date match
                if event.get('variable') == primary_node and \
                        event.get('timestamp_str') == resolved_time_str and \
                        event.get('date') == resolved_date_date:
                    # Found a match for a recently reported event
                    analyze_specific_event = True
                    event_to_analyze = event
                    print(f"DEBUG - Matched query time {resolved_time_str} with detected event at {event.get('timestamp_str')} for {event.get('variable')} on {event.get('date')}. Event details from state: {event}")
                    break # Found the most recent matching event, stop searching

        if analyze_specific_event:
            # User is asking about a previously reported specific event
            intent_executed = 'explain_specific_event' # Custom internal intent type
            print(f"Executing Specific Event Explanation for {event_to_analyze.get('variable')} at {event_to_analyze.get('timestamp_str')} on {event_to_analyze.get('date')}")

            # Use the variable code and query term derived from the resolved info,
            # falling back to event info if necessary.
            event_variable = event_to_analyze.get('variable') # Get node name from state
            event_variable_code = self.node_to_column.get(event_variable) or variable_code # Get code, fallback
            event_query_term = query_term or event_variable # Get term, fallback


            if not event_variable_code or not event_query_term or not event_variable:
                final_answer = f"Error: Could not map detected event node '{event_variable}' back to variable code/term for analysis."
            else:
                # Call enhance_event_analysis directly for the matched event's exact time and date
                # Assumes enhance_event_analysis returns the explanation string or structure (now a Dict)
                event_analysis_details = self.enhance_event_analysis(
                    event_variable, # Pass the variable node name
                    event_to_analyze['timestamp_str'], # Pass the exact HH:MM string from the detected event
                    event_to_analyze['date'] # Pass the date object from the detected event
                )

                # The enhanced analysis returns a dict. Format it for the LLM.
                # We might need a specific formatter for single event details or reuse parts of format_context_for_llm_query
                # Let's create a dedicated formatter for specific event analysis results.
                if isinstance(event_analysis_details, dict) and 'error' not in event_analysis_details:
                    # Include basic info from the stored event into the analysis details for formatting
                    event_analysis_details['basic_change_info'] = event_to_analyze # Add the stored basic info

                    # Format the detailed analysis for the LLM
                    llm_context_str = self.format_specific_event_context_for_llm(event_analysis_details)

                    # Send the formatted context to the LLM
                    final_answer = self.respond_with_llm(
                        llm_context_str,
                        original_query,
                        resolved_intent,
                        resolved_info,
                        attach_hca=False
                    ) # Pass original query

                elif isinstance(event_analysis_details, dict) and 'error' in event_analysis_details:
                    final_answer = f"Error during specific event analysis: {event_analysis_details['error']}"
                else:
                    final_answer = f"Specific event analysis returned unexpected format: {str(event_analysis_details)}"
        
        elif resolved_intent == 'explain_system_event': 
            intent_executed = 'explain_system_event'
            event_timestamp = None
            resolved_date_obj = resolved_info.get('date')
            if resolved_date_obj and resolved_info.get('time'):
                event_timestamp = datetime.combine(resolved_date_obj, resolved_info.get('time'))
            
            if event_timestamp:
                target_nodes = resolved_info.get('target_nodes', [])
                analysis = self.analyze_system_event(event_timestamp, target_nodes) # <<< IT CALLS YOUR FUNCTION HERE

                llm_context_str = self.format_system_event_for_llm(analysis)
                final_answer = self.respond_with_llm(
                    llm_context_str,
                    original_query,
                    'explain_system_event',
                    resolved_info,
                    hca_focus_timestamp=event_timestamp
                )
            else:
                final_answer = "To explain a system-wide event, I need a specific date and time."

        elif resolved_intent == 'time_range':
            # Query asking about a value OVER a range of time
            start_time_obj = resolved_info.get('start_time')
            end_time_obj = resolved_info.get('end_time')
            
            if primary_node and resolved_date_obj and start_time_obj and end_time_obj:
                intent_executed = 'time_range'
                print(f"Executing Time Range Query for Node='{primary_node}' ({query_term}) from {start_time_obj} to {end_time_obj} on {resolved_date_obj}")
                
                date_str = resolved_date_obj.strftime('%Y-%m-%d')
                start_str = start_time_obj.strftime('%H:%M')
                end_str = end_time_obj.strftime('%H:%M')

                if not variable_code or not query_term:
                    final_answer = f"Error: Could not map node '{primary_node}' back to variable code/term for analysis."
                else:
                    context = self.analyze_context_over_time_range(
                        variable_code, query_term, date_str, start_str, end_str
                    )

                    if 'error' in context:
                        final_answer = f"Error during time range analysis: {context['error']}"
                    else:
                        # You may want a dedicated LLM formatter for range analysis for better summaries
                        # For now, we can reuse the generic one or create a new one.
                        # Let's assume a new one: self.format_range_context_for_llm
                        llm_context_str = self.format_range_context_for_llm(context)
                        final_answer = self.respond_with_llm(
                            llm_context_str,
                            original_query,
                            resolved_intent,
                            resolved_info,
                            hca_focus_node=primary_node
                        )
                        # Generate plot for the time range analysis
                        if primary_node and resolved_date_obj:
                            range_plot = self.visualize_data(primary_node, resolved_date_obj)
                            if isinstance(range_plot, go.Figure):
                                plot_figure = range_plot
                                final_answer += f"\n\nHere's the plot for {query_term or primary_node} over the specified time range:"
            else:
                missing = []
                if not primary_node: missing.append("the variable")
                if not resolved_date_obj: missing.append("a specific date")
                if not start_time_obj or not end_time_obj: missing.append("a start and end time")
                final_answer = f"For a time range query, I need {', '.join(missing)}."

                
        elif resolved_intent == 'explain_disturbance_significance':
            intent_executed = 'explain_disturbance_significance'
            print(f"Executing Disturbance Significance Query for Node='{primary_node}' at {resolved_date_obj} {resolved_time_obj}")

            if not all([primary_node, resolved_date_obj, resolved_time_obj]):
                 final_answer = "To analyze the significance of a factor, I need the variable, a specific date, and time."
            else:
                date_str = resolved_date_obj.strftime('%Y-%m-%d')
                time_str = resolved_time_obj.strftime('%H:%M')

                if not variable_code or not query_term:
                    final_answer = f"Error: Could not map node '{primary_node}' back to variable code/term for analysis."
                else:
                    context = self.analyze_context_at_timestamp(variable_code, query_term, date_str, time_str)
                    if 'error' in context:
                        final_answer = f"Error during disturbance significance analysis: {context['error']}"
                    else:
                        llm_context_str = self.format_context_for_llm_query(context)
                        print("\n" + "="*25 + " FINAL CONTEXT FOR DISTURBANCE LLM " + "="*25)
                        print(llm_context_str)
                        print("="*78 + "\n")
                        final_answer = self.respond_with_llm(
                            llm_context_str,
                            original_query,
                            resolved_intent,
                            resolved_info,
                            attach_hca=False
                        )

        elif resolved_intent == 'timestamp':
            # Query asking for the *value* at a specific time
            if primary_node and resolved_date_obj and resolved_time_obj:
                intent_executed = 'timestamp'
                print(f"Executing Timestamp Query for Node='{primary_node}' ({query_term}) at {resolved_date_obj} {resolved_time_obj}")
                date_str = resolved_date_obj.strftime('%Y-%m-%d')
                time_str = resolved_time_obj.strftime('%H:%M')

                # Use variable_code and query_term derived from primary_node for analysis function
                if not variable_code or not query_term:
                    final_answer = f"Error: Could not map node '{primary_node}' back to variable code/term for analysis."
                else:
                    context = self.analyze_context_at_timestamp(variable_code, query_term, date_str, time_str)
                    # context_for_state_update = context # Store for potential state update (optional)

                    if 'error' in context:
                        final_answer = f"Error during timestamp analysis: {context['error']}"
                    else:
                        llm_context_str = self.format_context_for_llm_query(context)
                        print("\n" + "="*25 + " FINAL CONTEXT FOR TIMEstamp LLM " + "="*25)
                        print(llm_context_str)
                        print("="*73 + "\n")
                        final_answer = self.respond_with_llm(
                            llm_context_str,
                            original_query,
                            resolved_intent,
                            resolved_info,
                            attach_hca=False
                        ) # Send context and original query to LLM
                        # Generate plot for base state variables and control variables
                        if variable_code in self.base_state_variables or variable_code in self.control_action_nodes:
                            plot_fig = self.visualize_data(primary_node, resolved_date_obj)
                            if isinstance(plot_fig, go.Figure):
                                plot_figure = plot_fig
                                final_answer += f"\n\nHere's the plot for {query_term} on {resolved_date_obj.strftime('%Y-%m-%d')}:"
            else:
                missing = []
                if not primary_node: missing.append("the variable/node")
                if not resolved_date_obj or not resolved_time_obj: missing.append("a specific date and time")
                final_answer = f"For a specific value query, I need {', '.join(missing)}."

        elif resolved_intent == 'correlation':
            intent_executed = 'correlation'
            target_nodes = resolved_info.get('target_nodes', [])
            
            # Get unique variables and intelligently select the pair to analyze
            unique_nodes = list(set(target_nodes))  # Remove duplicates
            
            # Use resolved date or fall back to dialogue state date
            correlation_date = resolved_date_obj
            if not correlation_date and hasattr(self, 'dialogue_state') and self.dialogue_state.get('last_date'):
                # Convert string date to datetime.date object
                
                correlation_date = datetime.strptime(self.dialogue_state['last_date'], '%Y-%m-%d').date()
                print(f"DEBUG - Using date from dialogue state for correlation: {correlation_date}")
            
            if len(unique_nodes) >= 2 and correlation_date:
                # If we have multiple unique variables, prioritize:
                # 1. Primary node (if available) vs the most different type of variable
                # 2. Or just take the first two unique variables
                
                if primary_node and primary_node in unique_nodes and len(unique_nodes) > 1:
                    # Use primary node and find the most relevant other variable
                    other_nodes = [node for node in unique_nodes if node != primary_node]
                    # Prefer state variables over control variables for relationships
                    state_vars = [node for node in other_nodes if node in self.base_state_variables]
                    if state_vars:
                        var1, var2 = primary_node, state_vars[0]
                    else:
                        var1, var2 = primary_node, other_nodes[0]
                else:
                    # Just take first two unique variables
                    var1, var2 = unique_nodes[0], unique_nodes[1]
                
                print(f"Executing Two-Variable Relationship Query for Nodes='{var1}' vs '{var2}' (from target_nodes: {target_nodes})")
                
                # Call the new, intelligent analyzer
                analysis_result = self.analyze_relationship(var1, var2, correlation_date, resolved_time_obj)

                if 'error' in analysis_result:
                    final_answer = f"Error during analysis: {analysis_result['error']}"
                else:
                    # Format the rich context returned by the expert analyzer
                    evidence = analysis_result.get('evidence', {})
                    evidence_str = "\n".join([f"- **{key}:** {value}" for key, value in evidence.items()])
                    
                    llm_context_str = (
                        f"--- Relationship Analysis: {analysis_result['var1_name']} vs. {analysis_result['var2_name']} ---\n\n"
                        f"**Conclusion:** {analysis_result.get('conclusion', 'Not available.')}\n\n"
                        f"**Supporting Evidence:**\n{evidence_str}"
                    )
                    final_answer = self.respond_with_llm(
                        llm_context_str,
                        original_query,
                        'correlation',
                        resolved_info,
                        hca_focus_node=var1
                    )
                    # Generate merged plot for comparison
                    plot_fig = self.visualize_data(var1, correlation_date, [var2])
                    if isinstance(plot_fig, go.Figure):
                        plot_figure = plot_fig
                        final_answer += "\n\nHere's a comparison plot showing the relationship between these variables:"
            
            else:
                # This is the fallback case if something went wrong (e.g., date was missing)
                missing = []
                if len(target_nodes) < 2: missing.append("two variables to compare")
                if not resolved_date_obj: missing.append("a specific date")
                final_answer = f"To analyze a relationship, please provide { ' and '.join(missing) }."
            final_date_for_state = resolved_info.get('date')
            final_time_for_state = resolved_info.get('start_time') or resolved_info.get('time')
            if final_date_for_state or final_time_for_state:
                final_timestamp_for_state = None
                if final_date_for_state and final_time_for_state:
                    from datetime import datetime as dt_class
                    final_timestamp_for_state = dt_class.combine(final_date_for_state, final_time_for_state)
                self.update_dialogue_state(
                    original_query=original_query,
                    resolved_intent=resolved_intent,
                    resolved_primary_node=primary_node,
                    resolved_variable_code=variable_code,
                    resolved_query_term=query_term,
                    resolved_timestamp=final_timestamp_for_state,
                    executed_intent=intent_executed,
                    pattern_type=resolved_info.get('pattern_type'),
                    resolved_date=final_date_for_state,
                    resolved_time=final_time_for_state
                )
            
            return final_answer, plot_figure
                

            
        elif resolved_intent == 'explain_strategy':
            intent_executed = 'explain_strategy'
            start_time_obj = resolved_info.get('start_time')
            end_time_obj = resolved_info.get('end_time')
            
            # Check if this is a time-range-specific strategy query
            if resolved_date_obj and start_time_obj and end_time_obj:
                print(f"DEBUG - Time-range strategy query detected: {start_time_obj} to {end_time_obj}")
                # Convert to datetime objects for the range analysis
                start_dt = datetime.combine(resolved_date_obj, start_time_obj)
                end_dt = datetime.combine(resolved_date_obj, end_time_obj)
                
                # Get the primary control node from the query context
                target_nodes = resolved_info.get('target_nodes', [])
                primary_node = resolved_info.get('primary_node')
                
                # For CO2 strategies, we analyze ventilation effectiveness
                if primary_node == 'C':
                    control_node = 'uV'  # Ventilation is the primary CO2 purge mechanism
                elif primary_node == 'T':
                    control_node = 'uQh'  # Heating for temperature
                elif primary_node == 'H':
                    control_node = 'uV'  # Ventilation for humidity control
                else:
                    control_node = primary_node if primary_node in self.control_action_nodes else 'uV'
                
                print(f"DEBUG - Analyzing control strategy: {control_node} for {primary_node} over time window")
                
                # Call the time-range control action analysis
                analysis_result = self._analyze_control_action_over_range(
                    control_node, start_dt, end_dt, [primary_node]
                )
                
                if 'error' in analysis_result:
                    final_answer = analysis_result['error']
                else:
                    # Format and send to LLM
                    llm_context_str = self.format_control_action_context_for_llm(analysis_result)
                    final_answer = self.respond_with_llm(
                        llm_context_str,
                        original_query,
                        'explain_control_action',
                        resolved_info,
                        hca_focus_node=control_node,
                        hca_focus_timestamp=start_dt + (end_dt - start_dt) / 2
                    )
                    
                    # Generate plot showing the control action and affected state
                    if primary_node:
                        plot_fig = self.visualize_data(primary_node, resolved_date_obj, [control_node])
                        if isinstance(plot_fig, go.Figure):
                            plot_figure = plot_fig
                            final_answer += f"\n\n📊 Here's a plot showing {self.node_to_name.get(primary_node, primary_node)}:"
            
            elif resolved_date_obj:
                # Full-day strategy analysis
                analysis_result = self.analyze_day_strategy(
                    resolved_date_obj,
                    resolved_info.get('target_nodes', []) 
                )

                if 'error' in analysis_result:
                    final_answer = analysis_result['error']
                else:
                    # 3. Use the new, flexible formatter and prompt.
                    llm_context_str = self.format_day_strategy_for_llm(analysis_result,original_query)
                    final_answer = self.respond_with_llm(
                        llm_context_str,
                        original_query,
                        'summarize_day_strategy',
                        resolved_info
                    )
            else:
                final_answer = "To evaluate the day's strategy, I need a specific date."

        elif resolved_intent == 'pattern':
            intent_executed = 'pattern'

            # --- START OF CORRECTED AND REORDERED LOGIC ---

            # Get all necessary info from the parse result
            primary_node = resolved_info.get('primary_node')
            resolved_date_obj = resolved_info.get('date')
            resolved_time_obj = resolved_info.get('time')
            start_time_obj = resolved_info.get('start_time')
            end_time_obj = resolved_info.get('end_time')
            pattern_type = resolved_info.get('pattern_type')

            is_causal_query = any(kw in query.lower() for kw in ['why', 'reason', 'cause', 'explain', 'peak', 'peaked', 'drop', 'spike'])
            is_single_time_trend_query = resolved_time_obj and not start_time_obj

            # <-- REORDERED: Check for the single-point causal query FIRST -->
            if is_causal_query and is_single_time_trend_query:
                print("DEBUG - Intent Upgraded: from 'pattern' to 'causal_trend' analysis.")
                intent_executed = 'explain_causal_trend'

                if not all([primary_node, resolved_date_obj, resolved_time_obj]):
                     final_answer = "To explain a causal event, I need the variable, date, and a specific time."
                else:
                    timestamp = datetime.combine(resolved_date_obj, resolved_time_obj)
                    analysis_result = self.analyze_causal_trend(primary_node, timestamp)
                    
                    if 'error' in analysis_result:
                        final_answer = analysis_result['error']
                    else:
                        llm_context = self.format_causal_trend_for_llm(analysis_result)
                        final_answer = self.respond_with_llm(
                            llm_context,
                            original_query,
                            'explain_causal_trend',
                            resolved_info,
                            hca_focus_node=primary_node,
                            hca_focus_timestamp=timestamp
                        )

            # <-- Now, handle the general range-based pattern queries as a fallback -->
            elif start_time_obj and end_time_obj:
                if not all([primary_node, resolved_date_obj]):
                    final_answer = "To analyze a pattern, I need a variable and a complete time range."
                elif pattern_type == 'gradual_trend':
                    print("Executing Gradual Trend Analysis...")
                    intent_executed = 'explain_gradual_trend'
                    analysis_result = self.analyze_gradual_trend(primary_node, resolved_date_obj, start_time_obj, end_time_obj)
                    llm_context = self.format_gradual_trend_for_llm(analysis_result)
                    trend_midpoint = datetime.combine(resolved_date_obj, start_time_obj) + (datetime.combine(resolved_date_obj, end_time_obj) - datetime.combine(resolved_date_obj, start_time_obj)) / 2
                    final_answer = self.respond_with_llm(
                        llm_context,
                        original_query,
                        'explain_gradual_trend',
                        resolved_info,
                        hca_focus_node=primary_node,
                        hca_focus_timestamp=trend_midpoint
                    )
                
                elif pattern_type == 'sudden_event':
                    print("Executing Sudden Event Analysis...")
                    intent_executed = 'explain_sudden_event'
                    analysis_result = self.explain_pattern(
                        variable=primary_node,
                        start_time=start_time_obj.strftime('%H:%M'),
                        end_time=end_time_obj.strftime('%H:%M'),
                        date=resolved_date_obj
                    )
                    llm_context = self.format_sudden_event_for_llm(analysis_result)
                    sudden_midpoint = datetime.combine(resolved_date_obj, start_time_obj) + (datetime.combine(resolved_date_obj, end_time_obj) - datetime.combine(resolved_date_obj, start_time_obj)) / 2
                    final_answer = self.respond_with_llm(
                        llm_context,
                        original_query,
                        'explain_sudden_event',
                        resolved_info,
                        hca_focus_node=primary_node,
                        hca_focus_timestamp=sudden_midpoint
                    )

                else: # General pattern query over a range
                    print(f"Executing General Pattern Query for Node='{primary_node}' on date {resolved_date_obj}")
                    start_str = start_time_obj.strftime('%H:%M')
                    end_str = end_time_obj.strftime('%H:%M')
                    pattern_analysis_results = self.explain_pattern(
                        variable=primary_node, start_time=start_str, end_time=end_str,
                        date=resolved_date_obj, pattern_type=pattern_type
                    )
                    
                    if isinstance(pattern_analysis_results, dict) and "error" in pattern_analysis_results:
                        final_answer = pattern_analysis_results["error"]
                    elif isinstance(pattern_analysis_results, dict):
                        llm_context_str = self.format_pattern_context_for_llm(pattern_analysis_results)
                        final_answer = self.respond_with_llm(
                            llm_context_str,
                            original_query,
                            'pattern',
                            resolved_info,
                            hca_focus_node=primary_node
                        )
                    else:
                        final_answer = "Pattern analysis returned an unexpected format."

            # <-- Final fallback if no conditions are met -->
            else:
                final_answer = "To analyze a pattern, I need either a specific time for a causal event (e.g., 'at 17:30') or a time range (e.g., 'from 10:00 to 12:00')."

        elif resolved_intent == 'evaluate_setpoint_strategy':
            intent_executed = 'evaluate_setpoint_strategy'
            setpoint_node = resolved_info.get('primary_node')
            
            if setpoint_node and resolved_date_obj and resolved_time_obj:
                timestamp = datetime.combine(resolved_date_obj, resolved_time_obj)
                analysis_result = self.analyze_setpoint_strategy(setpoint_node, timestamp)
                
                if 'error' in analysis_result:
                    final_answer = f"Error during setpoint strategy evaluation: {analysis_result['error']}"
                else:
                    llm_context_str = self.format_setpoint_strategy_for_llm(analysis_result)
                    focus_node = setpoint_node.replace('_ref', '')
                    final_answer = self.respond_with_llm(
                        llm_context_str,
                        original_query,
                        'evaluate_setpoint_strategy',
                        resolved_info,
                        hca_focus_node=focus_node,
                        hca_focus_timestamp=timestamp
                    )
                    
                    # Generate plot for the setpoint analysis
                    if resolved_date_obj:
                        plot_fig = self.visualize_data(setpoint_node.replace('_ref', ''), resolved_date_obj)
                        if isinstance(plot_fig, go.Figure):
                            plot_figure = plot_fig
                            final_answer += f"\n\nHere's the plot for {self.node_to_name.get(setpoint_node.replace('_ref', ''), setpoint_node)} on {resolved_date_obj.strftime('%Y-%m-%d')}:"
            else:
                final_answer = "To evaluate a setpoint strategy, I need the setpoint variable, a specific date, and time."
            final_date_for_state = resolved_info.get('date')
            final_time_for_state = resolved_info.get('start_time') or resolved_info.get('time')
            if final_date_for_state or final_time_for_state:
                final_timestamp_for_state = None
                if final_date_for_state and final_time_for_state:
                    from datetime import datetime as dt_class
                    final_timestamp_for_state = dt_class.combine(final_date_for_state, final_time_for_state)
                self.update_dialogue_state(
                    original_query=original_query,
                    resolved_intent=resolved_intent,
                    resolved_primary_node=primary_node,
                    resolved_variable_code=variable_code,
                    resolved_query_term=query_term,
                    resolved_timestamp=final_timestamp_for_state,
                    executed_intent=intent_executed,
                    pattern_type=resolved_info.get('pattern_type'),
                    resolved_date=final_date_for_state,
                    resolved_time=final_time_for_state
                )
            
            return final_answer, plot_figure
        
        elif resolved_intent == 'analyze_control_saturation':
            intent_executed = 'analyze_control_saturation'
            # For control saturation, we need the Lagrangian node, not the control node
            lagrangian_nodes = [node for node in resolved_info.get('target_nodes', []) if node.endswith('_ieq')]
            control_node = resolved_info.get('control_node') or resolved_info.get('primary_node')
            resolved_date_obj = resolved_info.get('date')

            if lagrangian_nodes and resolved_date_obj:
                # Use the first lagrangian node found
                lagrangian_node = lagrangian_nodes[0]
                analysis_result = self.analyze_control_saturation(lagrangian_node, resolved_date_obj)
                
                if 'error' in analysis_result:
                    final_answer = analysis_result['error']
                else:
                    # Create a simple formatter
                    evidence = analysis_result.get('evidence', {})
                    evidence_str = "\n".join([f"- **{key}:** {value}" for key, value in evidence.items()])
                    llm_context_str = (
                        f"--- Analysis of Control System Saturation ---\n\n"
                        f"**Conclusion:** {analysis_result.get('conclusion', 'Not available.')}\n\n"
                        f"**Supporting Evidence:**\n{evidence_str}"
                    )

                    # You can create a new dedicated prompt or reuse a generic one
                    final_answer = self.respond_with_llm(
                        llm_context_str,
                        original_query,
                        'explain_control_action',
                        resolved_info,
                        hca_focus_node=control_node or lagrangian_node.replace('_ieq', '')
                    ) # Reusing is fine
            elif control_node and resolved_date_obj:
                # Fallback: if no lagrangian found but we have a control node, try to infer the lagrangian
                lagrangian_node = control_node + '_ieq'
                if lagrangian_node in self.node_to_column:
                    analysis_result = self.analyze_control_saturation(lagrangian_node, resolved_date_obj)
                    
                    if 'error' in analysis_result:
                        final_answer = analysis_result['error']
                    else:
                        evidence = analysis_result.get('evidence', {})
                        evidence_str = "\n".join([f"- **{key}:** {value}" for key, value in evidence.items()])
                        llm_context_str = (
                            f"--- Analysis of Control System Saturation ---\n\n"
                            f"**Conclusion:** {analysis_result.get('conclusion', 'Not available.')}\n\n"
                            f"**Supporting Evidence:**\n{evidence_str}"
                        )
                        final_answer = self.respond_with_llm(
                            llm_context_str,
                            original_query,
                            'explain_control_action',
                            resolved_info,
                            hca_focus_node=control_node
                        )
                else:
                    final_answer = f"Could not find Lagrangian node for control {control_node}."
            else:
                final_answer = "To analyze a control saturation event, I need the specific Lagrangian and a date."
            final_date_for_state = resolved_info.get('date')
            final_time_for_state = resolved_info.get('start_time') or resolved_info.get('time')
            if final_date_for_state or final_time_for_state:
                final_timestamp_for_state = None
                if final_date_for_state and final_time_for_state:
                    from datetime import datetime as dt_class
                    final_timestamp_for_state = dt_class.combine(final_date_for_state, final_time_for_state)
                self.update_dialogue_state(
                    original_query=original_query,
                    resolved_intent=resolved_intent,
                    resolved_primary_node=primary_node,
                    resolved_variable_code=variable_code,
                    resolved_query_term=query_term,
                    resolved_timestamp=final_timestamp_for_state,
                    executed_intent=intent_executed,
                    pattern_type=resolved_info.get('pattern_type'),
                    resolved_date=final_date_for_state,
                    resolved_time=final_time_for_state
                )
            
            return final_answer, plot_figure        
        

        elif resolved_intent == 'explain_causal_anomaly':
            if primary_node and resolved_date_obj and resolved_time_obj:
                intent_executed = 'explain_causal_anomaly'
                print(f"Executing Causal Anomaly Analysis for Node='{primary_node}' at {resolved_date_obj} {resolved_time_obj}")
                timestamp = datetime.combine(resolved_date_obj, resolved_time_obj)
                analysis_result = self.analyze_causal_anomaly(primary_node, timestamp)
                
                if 'error' in analysis_result:
                    final_answer = f"Error during causal anomaly analysis: {analysis_result['error']}"
                else:
                    # Format the analysis result for LLM
                    conclusion = analysis_result.get('conclusion', 'Not available.')
                    evidence = analysis_result.get('evidence', {})
                    evidence_str = "\n".join([f"- **{key}:** {value}" for key, value in evidence.items()])
                    
                    llm_context_str = (
                        f"--- Causal Anomaly Analysis ---\n\n"
                        f"**Conclusion:** {conclusion}\n\n"
                        f"**Supporting Evidence:**\n{evidence_str}"
                    )

                    final_answer = self.respond_with_llm(
                        llm_context_str,
                        original_query,
                        'explain_causal_anomaly',
                        resolved_info,
                        hca_focus_node=primary_node,
                        hca_focus_timestamp=timestamp
                    )
                    
                    # Generate plot for the variable
                    plot_fig = self.visualize_data(primary_node, resolved_date_obj)
                    if isinstance(plot_fig, go.Figure):
                        plot_figure = plot_fig
                        final_answer += f"\n\n📊 Here's a plot showing {self.node_to_name.get(primary_node, primary_node)} on {resolved_date_obj.strftime('%Y-%m-%d')}:"
            else:
                final_answer = "To explain a causal anomaly, I need to know the specific variable and timestamp."

        # --- 5. Update Dialogue State ---
        final_date_for_state = resolved_info.get('date')
        # Use the primary time for context (start_time for ranges, or single 'time')
        final_time_for_state = resolved_info.get('start_time') or resolved_info.get('time')
        
        # Update dialogue state with available information
        if final_date_for_state or final_time_for_state:
            final_timestamp_for_state = None
            if final_date_for_state and final_time_for_state:
                final_timestamp_for_state = datetime.combine(final_date_for_state, final_time_for_state)
            
            self.update_dialogue_state(
                original_query=original_query,
                resolved_intent=resolved_intent,
                resolved_primary_node=primary_node,
                resolved_variable_code=variable_code,
                resolved_query_term=query_term,
                resolved_timestamp=final_timestamp_for_state,
                executed_intent=intent_executed,
                pattern_type=resolved_info.get('pattern_type'),
                resolved_date=final_date_for_state,
                resolved_time=final_time_for_state
            )
            if final_timestamp_for_state:
                print(f"DEBUG - Dialogue state successfully updated. New last_timestamp: {final_timestamp_for_state}")
            else:
                print(f"DEBUG - Dialogue state partially updated. Date: {final_date_for_state}, Time: {final_time_for_state}")
        else:
            print("DEBUG - Could not update dialogue state, no date or time information available.")
        
        # --- FINAL PLOT GENERATION: Generate plots for ANY question that mentions variables ---
        # Generate multi-panel plots for multiple variables, single plots for single variables
        if not plot_figure and resolved_date_obj:
            target_nodes = resolved_info.get('target_nodes', [])
            primary_node = resolved_info.get('primary_node')

            # Collect all variables mentioned in the question, deduplicated by column
            all_variables = []
            seen_columns = set()
            
            # Add primary node first
            if primary_node and primary_node in self.node_to_column:
                col = self.node_to_column[primary_node]
                if col not in seen_columns:
                    seen_columns.add(col)
                    all_variables.append(primary_node)
            
            # Add target nodes, avoiding duplicates by column
            if target_nodes:
                for node in target_nodes:
                    if node in self.node_to_column:
                        col = self.node_to_column[node]
                        if col not in seen_columns:
                            seen_columns.add(col)
                            all_variables.append(node)

            # Generate plots if we have variables
            if all_variables:
                try:
                    if len(all_variables) == 1:
                        # Single variable plot
                        final_plot = self.visualize_data(all_variables[0], resolved_date_obj)
                        if isinstance(final_plot, go.Figure):
                            plot_figure = final_plot
                            # Add plot indicator to response if not already present
                            if "plot" not in final_answer.lower() and "graph" not in final_answer.lower():
                                var_name = resolved_info.get('query_term') or self.node_to_name.get(all_variables[0], all_variables[0])
                                final_answer += f"\n\n📊 Here's a plot showing {var_name}:"
                    else:
                        # Multi-variable subplot plot
                        import plotly.subplots as sp
                        fig = sp.make_subplots(
                            rows=len(all_variables),
                            cols=1,
                            shared_xaxes=True,
                            subplot_titles=[self.node_to_name.get(var, var) for var in all_variables]
                        )

                        for i, var in enumerate(all_variables):
                            var_data = self.data[self.data.index.date == resolved_date_obj]
                            if not var_data.empty:
                                column_name = self.node_to_column.get(var)
                                if column_name and column_name in var_data.columns:
                                    fig.add_trace(
                                        go.Scatter(
                                            x=var_data.index.tolist(),
                                            y=var_data[column_name],
                                            mode='lines',
                                            name=self.node_to_name.get(var, var),
                                            showlegend=False
                                        ),
                                        row=i+1, col=1
                                    )

                        fig.update_layout(
                            height=300 * len(all_variables),
                            title_text=f"Multi-Variable Analysis on {resolved_date_obj.strftime('%Y-%m-%d')}",
                            showlegend=False
                        )

                        plot_figure = fig
                        # Add plot indicator to response if not already present
                        if "plot" not in final_answer.lower() and "graph" not in final_answer.lower():
                            var_names = [self.node_to_name.get(var, var) for var in all_variables]
                            final_answer += f"\n\n📊 Here's a multi-panel plot showing {', '.join(var_names)}:"

                except Exception as e:
                    print(f"Error creating plot: {e}")
                    plot_figure = None
        
        # Final Answer Return - Handle cases where final_answer might still be None if intent wasn't executed/handled
        if final_answer is None:
            final_answer = "I'm sorry, I couldn't process your query. Please try rephrasing or providing more specific information."
        return final_answer, plot_figure

    def format_specific_event_context_for_llm(self, event_analysis_details: Dict[str, Any]) -> str:
        """
        Formats the structured analysis results for a SINGLE specific event
        into a string for the LLM.
        """
        if 'error' in event_analysis_details:
            basic = event_analysis_details.get('basic_change_info', {})
            timestamp = basic.get('timestamp_str', 'Unknown Time')
            var = basic.get('variable', 'Variable')
            return f"Error retrieving detailed analysis for event at {timestamp} for {var}: {event_analysis_details['error']}"

        # Access basic info (passed from stored event)
        basic_info = event_analysis_details.get('basic_change_info', {})
        timestamp = basic_info.get('timestamp_str', 'Unknown Time')
        change_type = basic_info.get('change_type', 'change')
        value_before = basic_info.get('value_before')
        value_after = basic_info.get('value_after')
        signed_change = basic_info.get('signed_change')
        variable_name = basic_info.get('variable', 'Variable') # Node name e.g. 'T'

        value_before_display = f"{value_before:.2f}" if isinstance(value_before, (int, float)) else str(value_before)
        value_after_display = f"{value_after:.2f}" if isinstance(value_after, (int, float)) else str(value_after)
        signed_change_display = f"{signed_change:.2f}" if isinstance(signed_change, (int, float)) else str(signed_change)


        llm_context = f"--- Analysis for Notable Event: Sudden {change_type} in {variable_name} at {timestamp} ---\n"
        llm_context += f"Value changed from {value_before_display} to {value_after_display} (change: {signed_change_display}).\n\n"


        # Include KG neighborhood nodes
        kg_nodes = event_analysis_details.get('kg_neighborhood_nodes', [])
        if kg_nodes:
            llm_context += f"Related KG nodes ({variable_name} neighborhood): {', '.join(kg_nodes)}\n"
        else:
            llm_context += f"No related KG nodes found for {variable_name}.\n"
        llm_context += "\n"

        # Include Preceding values of related nodes
        preceding_values = event_analysis_details.get('preceding_values_of_related', {})
        if preceding_values:
            llm_context += "Values of related variables in the preceding time step:\n"
            for node, value in preceding_values.items():
                value_display = f"{value:.2f}" if isinstance(value, (int, float)) else str(value)
                llm_context += f"- {node}: {value_display}\n"
        else:
            llm_context += "No preceding values for related variables available.\n"
        llm_context += "\n"

        # Include direct KG relationships
        direct_kg_rels = event_analysis_details.get('kg_direct_relationships', {})
        if direct_kg_rels:
            llm_context += "Direct KG relationships for this event's variable/node:\n"
            for edge_type, edges in direct_kg_rels.items():
                if edges:
                    llm_context += f" {edge_type.replace('_', ' ')}:\n"
                    for edge in edges:
                        source = edge.get('source_node', '?')
                        target = edge.get('target_node', '?')
                        rel = edge.get('relationship', '?')
                        desc = edge.get('description', '')
                        if edge_type == 'incoming_edges':
                            llm_context += f"  - {source} --({rel})--> {variable_name}"
                        else: # outgoing_edges
                            llm_context += f"  - {variable_name} --({rel})--> {target}"
                        if desc:
                            llm_context += f" (Desc: {desc})"
                        llm_context += "\n"
                else:
                    llm_context += f" No {edge_type.replace('_', ' ')} found.\n"
        else:
            llm_context += "No direct KG relationships found for this event's variable/node.\n"
        llm_context += "\n"

        # Include local correlations if implemented in enhance_event_analysis
        # local_corrs = event_analysis_details.get('local_correlations', {})
        # if local_corrs and local_corrs.get('window'):
        #     llm_context += "Local Correlations around this time:\n"
        #     # Format correlations
        #     formatted_local_corrs = []
        #     for other_col, corr_value in sorted(local_corrs['window'].items(), key=lambda item: abs(item[1]) if isinstance(item[1], (int, float)) else -1, reverse=True):
        #          val_str = f"{corr_value:.2f}" if isinstance(corr_value, (int, float)) else str(corr_value)
        #          formatted_local_corrs.append(f"{other_col}: {val_str}")
        #     llm_context += ", ".join(formatted_local_corrs) + "\n"
        # else:
        #     llm_context += "Local correlation analysis not available.\n"
        # llm_context += "\n"

        return llm_context.strip()


    def _interpret_correlation_strength(self, abs_corr: float) -> str:
        """Helper method to interpret correlation strength."""
        if abs_corr > 0.8:
            return "Very Strong (|r| > 0.8)"
        elif abs_corr > 0.6:
            return "Strong (0.6 < |r| ≤ 0.8)"
        elif abs_corr > 0.3:
            return "Moderate (0.3 < |r| ≤ 0.6)"
        elif abs_corr > 0.1:
            return "Weak (0.1 < |r| ≤ 0.3)"
        elif abs_corr > 0.1:
            return "Weak (0.1 < |r| ≤ 0.3)"
        else:
            return "Very Weak or No Relationship (|r| ≤ 0.1)"

    # VALIDATION UTILITY METHODS
    def get_validation_statistics(self) -> Dict[str, Any]:
        """Get comprehensive validation statistics."""
        return self.validation_logger.get_validation_stats()

    def get_validation_improvement_suggestions(self) -> List[str]:
        """Get suggestions for improving the system based on validation history."""
        return self.validation_logger.suggest_improvements()

    def validate_hca_result(self, hca_result: Dict[str, Any], timestamp: datetime) -> Dict[str, Any]:
        """Public method to validate HCA results."""
        return self.validator.validate_explanation(hca_result, timestamp)

    def predict_control_effect(self, control_action: str, state_variable: str, timestamp: datetime) -> Dict[str, Any]:
        """Predict the expected effect of a control action on a state variable."""
        data_row = self.validator._get_data_at_timestamp(timestamp)
        if data_row is None:
            return {'error': 'No data available at specified timestamp'}
        return self.validator.predict_effect_direction(control_action, state_variable, data_row)


class PhysicalPlausibilityValidator:
    """
    Validates the physical plausibility of HCA analysis results before they reach the LLM.
    Checks direction consistency, environmental context, and constraint consistency.
    """

    def __init__(self, kg_system):
        self.kg = kg_system
        self.data = kg_system.data
        self.node_to_column = kg_system.node_to_column
        self.config = kg_system.config

        # Configure logger with proper formatting
        self.logger = logging.getLogger('PhysicalPlausibilityValidator')
        if not self.logger.handlers:
            self.logger.setLevel(logging.INFO)

            # Create formatter
            formatter = logging.Formatter(
                '%(asctime)s - %(name)s - %(levelname)s - %(message)s',
                datefmt='%Y-%m-%d %H:%M:%S'
            )

            # Console handler
            console_handler = logging.StreamHandler()
            console_handler.setLevel(logging.INFO)
            console_handler.setFormatter(formatter)
            self.logger.addHandler(console_handler)

            # File handler for validation logs
            file_handler = logging.FileHandler('validation.log')
            file_handler.setLevel(logging.DEBUG)
            file_handler.setFormatter(formatter)
            self.logger.addHandler(file_handler)

        # Causality rules: control -> state effect direction
        # Format: (control_node, state_node): expected_direction_function
        self.causality_rules = {
            ('uV', 'H'): self._ventilation_humidity_direction,
            ('uV', 'T'): self._ventilation_temperature_direction,
            ('uV', 'C'): self._ventilation_co2_direction,
            ('uQh', 'T'): lambda data_row: 1,  # heating always increases temperature
            ('uQh', 'H'): lambda data_row: -1,  # heating always decreases humidity
            ('uQc', 'T'): lambda data_row: -1,  # cooling always decreases temperature
            ('uQc', 'H'): lambda data_row: -1,  # cooling always decreases humidity
            ('uC', 'C'): lambda data_row: 1,    # CO2 injection always increases CO2
        }

    def validate_explanation(self, hca_result: Dict[str, Any], timestamp: datetime) -> Dict[str, Any]:
        """
        Main validation method that checks physical plausibility of HCA results.

        Args:
            hca_result: HCA analysis result dict with control_action, observed_effects, causal_driver_type
            timestamp: Timestamp of the analysis

        Returns:
            Validation result dict
        """
        validation_result = {
            'is_valid': True,
            'confidence': 1.0,
            'issues': [],
            'recommendations': [],
            'should_query_user': False,
            'validation_details': {}
        }

        try:
            # Get data row for the timestamp
            data_row = self._get_data_at_timestamp(timestamp)
            if data_row is None:
                validation_result['issues'].append("No data available at specified timestamp")
                validation_result['is_valid'] = False
                validation_result['confidence'] = 0.0
                return validation_result

            control_action = hca_result.get('control_action_node')
            observed_effects = hca_result.get('observed_effects', {})
            causal_driver_type = hca_result.get('causal_driver_type')

            # Check 1: Direction Consistency
            if control_action is not None and isinstance(control_action, str):
                direction_issues = self._check_direction_consistency(control_action, observed_effects, data_row)
            else:
                direction_issues = ["WARNING: control_action is missing or not a string, cannot check direction consistency."]
            validation_result['issues'].extend(direction_issues)

            # Check 2: Environmental Context
            context_issues = self._check_environmental_context(control_action, causal_driver_type, data_row)
            validation_result['issues'].extend(context_issues)

            # Check 3: Constraint Consistency
            constraint_issues = self._check_constraint_consistency(control_action, causal_driver_type, timestamp)
            validation_result['issues'].extend(constraint_issues)

            # Calculate overall validity and confidence (severity-weighted)
            total_issues = len(validation_result['issues'])
            validation_result['is_valid'] = total_issues == 0

            # Severity-weighted confidence calculation
            contradiction_count = sum(1 for issue in validation_result['issues'] if 'CONTRADICTION' in issue)
            warning_count = sum(1 for issue in validation_result['issues'] if 'WARNING' in issue)
            inconsistency_count = sum(1 for issue in validation_result['issues'] if 'INCONSISTENCY' in issue)

            # Weight reductions: contradictions (0.5), warnings (0.2), inconsistencies (0.1)
            confidence_reduction = (contradiction_count * 0.5) + (warning_count * 0.2) + (inconsistency_count * 0.1)
            validation_result['confidence'] = max(0.0, 1.0 - confidence_reduction)

            # Determine if user should be queried
            validation_result['should_query_user'] = any('CONTRADICTION' in issue for issue in validation_result['issues'])

            # Add recommendations
            if not validation_result['is_valid']:
                validation_result['recommendations'] = self._generate_recommendations(validation_result['issues'], hca_result)

            validation_result['validation_details'] = {
                'direction_check_performed': True,
                'context_check_performed': True,
                'constraint_check_performed': True,
                'data_timestamp': timestamp.isoformat() if hasattr(timestamp, 'isoformat') else str(timestamp),
                'control_action': control_action
            }

        except Exception as e:
            validation_result['issues'].append(f"Validation error: {str(e)}")
            validation_result['is_valid'] = False
            validation_result['confidence'] = 0.0

        return validation_result

    def _get_data_at_timestamp(self, timestamp: datetime) -> pd.Series:
        """Get data row closest to the given timestamp."""
        try:
            idx = self.data.index.get_indexer([timestamp], method='nearest')[0]
            return self.data.iloc[idx]
        except Exception:
            return None

    def _check_direction_consistency(self, control_action: str, observed_effects: Dict, data_row: pd.Series) -> List[str]:
        """Check if observed state changes match expected physical directions."""
        issues = []
        print("\n" + "="*80)
        print("DEBUG: _check_direction_consistency CALLED")
        print(f"  control_action: {control_action}")
        print(f"  observed_effects keys: {list(observed_effects.keys()) if observed_effects else 'None'}")
        print("="*80)
        if not observed_effects:
            return issues
        
        if 'influenced_variables' in observed_effects:
            effects_dict = observed_effects['influenced_variables']
        else:
            effects_dict = observed_effects.get('effects', observed_effects)
        
        if not effects_dict or not isinstance(effects_dict, dict):
            return issues
        
        for state_node, effect_info in effects_dict.items():
            if not isinstance(effect_info, dict):
                continue
            
            # Extract direction from trend_summary text
            trend_summary = effect_info.get('trend_summary', '')
            observed_direction = self._extract_direction_from_trend_summary(trend_summary)
            
            if observed_direction is None:
                continue
            
            # Get expected direction
            expected_direction = self._get_expected_direction(control_action, state_node, data_row)
            
            if expected_direction is not None and observed_direction != expected_direction:
                issue = (
                    f"CONTRADICTION: {control_action} → {state_node} direction mismatch. "
                    f"Expected: {expected_direction:+d}, Observed: {observed_direction:+d}. "
                    f"{self._explain_direction_mismatch(control_action, state_node, data_row)}"
                )
                issues.append(issue)
        
        return issues

    def _extract_direction_from_trend_summary(self, trend_summary: str) -> Optional[int]:
        """Extract direction from HCA trend summary (handles markdown)."""
        
        if not trend_summary:
            return None
        
        import re
        
        # NEW regex handles markdown: "from **+1.28 units/hour** to **+2.29 units/hour**"
        pattern = r'from\s+(?:\*\*)?([+-]?[0-9.]+).*?to\s+(?:\*\*)?([+-]?[0-9.]+)'
        match = re.search(pattern, trend_summary, re.DOTALL)
        
        if match:
            try:
                final_value = float(match.group(2))
                if final_value > 0.01:
                    return +1
                elif final_value < -0.01:
                    return -1
                else:
                    return 0
            except (ValueError, IndexError):
                pass
        
        # Fallback to keywords
        summary_lower = trend_summary.lower()
        if any(word in summary_lower for word in ['increase', 'increased', 'more', 'higher', 'improved']):
            return +1
        elif any(word in summary_lower for word in ['decrease', 'decreased', 'less', 'lower', 'reduce', 'reduced', 'worsened']):
            return -1
        
        return None


    def _get_expected_direction(self, control_action: str, state_node: str, data_row: pd.Series) -> int:
        """Get expected direction of effect based on causality rules."""
        rule_key = (control_action, state_node)
        if rule_key in self.causality_rules:
            direction_func = self.causality_rules[rule_key]
            return direction_func(data_row)
        return None

    def _ventilation_humidity_direction(self, data_row: pd.Series) -> int:
        """Expected direction for ventilation -> humidity based on Hout vs H_in."""
        try:
            h_in_col = self.node_to_column.get('H', 'relhum_ref')
            h_out_col = self.node_to_column.get('Hout', 'Hout')

            # FIX: Check .index not just 'in data_row'
            if h_in_col not in data_row.index or h_out_col not in data_row.index:
                self.logger.warning(f"Missing humidity columns: in={h_in_col}, out={h_out_col}")
                return None

            H_in = data_row[h_in_col]
            H_out = data_row[h_out_col]
            
            # Safety check for NaN
            if pd.isna(H_in) or pd.isna(H_out):
                self.logger.warning(f"NaN values in humidity: H_in={H_in}, H_out={H_out}")
                return None
            
            return 1 if H_out > H_in else -1
        except KeyError as e:
            self.logger.error(f"KeyError in ventilation humidity direction: {e}")
            return None
        except Exception as e:
            self.logger.error(f"Unexpected error in ventilation humidity direction: {e}")
            return None

    def _ventilation_temperature_direction(self, data_row: pd.Series) -> int:
        """Expected direction for ventilation -> temperature based on Tout vs T_in."""
        try:
            t_in_col = self.node_to_column.get('T', 'Temp_ref')
            t_out_col = self.node_to_column.get('Tout', 'Tout')

            # FIX: Check .index not just 'in data_row'
            if t_in_col not in data_row.index or t_out_col not in data_row.index:
                self.logger.warning(f"Missing temperature columns: in={t_in_col}, out={t_out_col}")
                return None

            T_in = data_row[t_in_col]
            T_out = data_row[t_out_col]
            
            # Safety check for NaN
            if pd.isna(T_in) or pd.isna(T_out):
                self.logger.warning(f"NaN values in temperature: T_in={T_in}, T_out={T_out}")
                return None
            
            return 1 if T_out > T_in else -1
        except KeyError as e:
            self.logger.error(f"KeyError in ventilation temperature direction: {e}")
            return None
        except Exception as e:
            self.logger.error(f"Unexpected error in ventilation temperature direction: {e}")
            return None

    def _ventilation_co2_direction(self, data_row: pd.Series) -> int:
        """Expected direction for ventilation -> CO2 based on Cout vs C_in."""
        try:
            c_in_col = self.node_to_column.get('C', 'CO2_ref')
            c_out_col = self.node_to_column.get('Cout', 'Cout')

            # FIX: Check .index not just 'in data_row'
            if c_in_col not in data_row.index or c_out_col not in data_row.index:
                self.logger.warning(f"Missing CO2 columns: in={c_in_col}, out={c_out_col}")
                return None

            C_in = data_row[c_in_col]
            C_out = data_row[c_out_col]
            
            # Safety check for NaN
            if pd.isna(C_in) or pd.isna(C_out):
                self.logger.warning(f"NaN values in CO2: C_in={C_in}, C_out={C_out}")
                return None
            
            return 1 if C_out > C_in else -1
        except KeyError as e:
            self.logger.error(f"KeyError in ventilation CO2 direction: {e}")
            return None
        except Exception as e:
            self.logger.error(f"Unexpected error in ventilation CO2 direction: {e}")
            return None

    def _explain_direction_mismatch(self, control_action: str, state_node: str, data_row: pd.Series) -> str:
        """Provide human-readable explanation for direction mismatches."""
        explanations = {
            ('uV', 'H'): f"Outside humidity ({data_row.get(self.node_to_column.get('Hout', 'Hout'), 'N/A')}) "
                        f"vs indoor humidity ({data_row.get(self.node_to_column.get('H', 'relhum_ref'), 'N/A')})",
            ('uV', 'T'): f"Outside temperature ({data_row.get(self.node_to_column.get('Tout', 'Tout'), 'N/A')}) "
                        f"vs indoor temperature ({data_row.get(self.node_to_column.get('T', 'Temp_ref'), 'N/A')})",
            ('uV', 'C'): f"Outside CO2 ({data_row.get(self.node_to_column.get('Cout', 'Cout'), 'N/A')}) "
                        f"vs indoor CO2 ({data_row.get(self.node_to_column.get('C', 'CO2_ref'), 'N/A')})",
        }
        return explanations.get((control_action, state_node), "Check environmental conditions")

    def _check_environmental_context(self, control_action: str, causal_driver_type: str, data_row: pd.Series) -> List[str]:
        """Check if claimed causal driver makes sense given environmental conditions."""
        issues = []
        
        try:
            # Get current environmental conditions safely
            env_conditions = {}
            for node in ['Tout', 'Qrad']:
                col = self.node_to_column.get(node)
                if col and col in data_row.index:
                    env_conditions[node] = data_row[col]

            # Get indoor conditions for comparison
            indoor_conditions = {}
            for node in ['T', 'H']:
                col = self.node_to_column.get(node)
                if col and col in data_row.index:
                    indoor_conditions[node] = data_row[col]

            # Check for clear physical contradictions

            # Cooling should not be used when outside is colder than inside
            if control_action == 'uQc':
                if 'Tout' in env_conditions and 'T' in indoor_conditions:
                    t_out = env_conditions['Tout']
                    t_in = indoor_conditions['T']
                    if not (pd.isna(t_out) or pd.isna(t_in)) and t_out < t_in - 5:  # Outside significantly colder
                        issues.append(f"CONTRADICTION: Cooling activated when outside temperature ({t_out:.1f}°C) "
                                    f"is much colder than indoor ({t_in:.1f}°C). Cooling would be ineffective.")

            # Heating should not be used during very high solar radiation
            if control_action == 'uQh':
                if 'Qrad' in env_conditions:
                    q_rad = env_conditions['Qrad']
                    solar_threshold = self.config.get('reasoning_thresholds', {}).get('Qrad_high', 500)
                    if not pd.isna(q_rad) and q_rad > solar_threshold:
                        issues.append(f"CONTRADICTION: Heating activated during very high solar radiation "
                                    f"({q_rad:.0f} W/m² > {solar_threshold} W/m²). Heating may be unnecessary.")

            # Ventilation for humidity control should only be used if outside air is drier
            if control_action == 'uV' and 'humidity' in causal_driver_type.lower():
                # Check if we have both outdoor and indoor humidity data
                h_out_col = self.node_to_column.get('Hout')
                h_in_col = self.node_to_column.get('H', 'relhum_ref')

                if h_out_col and h_out_col in data_row.index and h_in_col in data_row.index:
                    h_out = data_row[h_out_col]
                    h_in = data_row[h_in_col]
                    if not (pd.isna(h_out) or pd.isna(h_in)) and h_out >= h_in:
                        issues.append(f"CONTRADICTION: Ventilation claimed for humidity reduction, but outside humidity "
                                    f"({h_out:.1f}%) >= indoor humidity ({h_in:.1f}%). Ventilation would increase humidity.")
        except Exception as e:
            self.logger.error(f"Error in environmental context check: {e}")
            issues.append(f"ERROR: Could not perform environmental context validation due to: {e}")

        return issues

    def _check_constraint_consistency(self, control_action: str, causal_driver_type: str, timestamp: datetime) -> List[str]:
        """Check if claimed causal driver has active Lagrangian multiplier."""
        issues = []

        # Map causal driver types to Lagrangian nodes
        driver_to_lagrangian = {
            'temperature constraint': 'T_ieq',
            'humidity constraint': 'H_ieq',
            'co2 constraint': 'C_ieq',
            'biomass constraint': 'B_ieq',
            'ventilation constraint': 'uV_ieq',
            'heating constraint': 'uQh_ieq',
            'cooling constraint': 'uQc_ieq',
            'co2 injection constraint': 'uC_ieq'
        }

        # Check if any constraint driver is mentioned
        for driver_keyword, lagrangian_node in driver_to_lagrangian.items():
            if driver_keyword in causal_driver_type.lower():
                # Check if this Lagrangian is active
                is_active = self._check_lagrangian_active(lagrangian_node, timestamp)
                if not is_active:
                    issues.append(f"INCONSISTENCY: Explanation claims '{driver_keyword}' drove the action, "
                                f"but {lagrangian_node} Lagrangian is inactive (near zero) at this time.")
                break

        return issues

    def _check_lagrangian_active(self, lagrangian_node: str, timestamp: datetime) -> bool:
        """Check if a Lagrangian multiplier is active (above threshold)."""
        try:
            data_row = self._get_data_at_timestamp(timestamp)
            if data_row is None:
                self.logger.warning(f"No data available at timestamp {timestamp} for Lagrangian check")
                return False

            column_name = self.node_to_column.get(lagrangian_node)
            if not column_name or column_name not in data_row.index:
                self.logger.warning(f"Lagrangian column '{column_name}' not found in data for node '{lagrangian_node}'")
                return False

            value = data_row[column_name]
            if pd.isna(value):
                self.logger.warning(f"NaN value for Lagrangian {lagrangian_node} at {timestamp}")
                return False
                
            value = abs(value)
            threshold = self.config.get('lagrangian_active_threshold', {}).get(lagrangian_node, 1e-6)
            return value > threshold
        except KeyError as e:
            self.logger.error(f"KeyError accessing Lagrangian data for {lagrangian_node}: {e}")
            return False
        except Exception as e:
            self.logger.error(f"Unexpected error checking Lagrangian {lagrangian_node} at {timestamp}: {e}")
            return False

    def _generate_recommendations(self, issues: List[str], hca_result: Dict[str, Any]) -> List[str]:
        """Generate recommendations based on validation issues."""
        recommendations = []

        control_action = hca_result.get('control_action_node')

        for issue in issues:
            if 'CONTRADICTION' in issue:
                if 'uV' in control_action and 'H' in issue:
                    recommendations.append("Consider alternative explanations: temperature control, CO2 control, or system-wide event")
                    recommendations.append("Check for plant transpiration surge or compensatory cooling effects")
                elif 'uQh' in control_action or 'uQc' in control_action:
                    recommendations.append("Verify sensor readings and environmental conditions")
                    recommendations.append("Check for setpoint changes or system overrides")

            elif 'WARNING' in issue:
                recommendations.append("Review environmental conditions and control strategy appropriateness")

            elif 'INCONSISTENCY' in issue:
                recommendations.append("Examine alternative causal pathways that don't rely on inactive constraints")
                recommendations.append("Consider physics-based or setpoint-driven explanations instead")

        if not recommendations:
            recommendations.append("Investigate data quality and sensor calibration")
            recommendations.append("Review knowledge graph relationships for this control action")

        return recommendations

    def predict_effect_direction(self, control_action: str, state_variable: str, data_row: pd.Series) -> Dict[str, Any]:
        """
        Predict expected direction of effect for a control action on a state variable.

        Args:
            control_action: Control node (e.g., 'uV')
            state_variable: State node (e.g., 'H')
            data_row: Current data row with all sensor values

        Returns:
            Dict with direction, explanation, and confidence
        """
        expected_direction = self._get_expected_direction(control_action, state_variable, data_row)

        if expected_direction is None:
            return {
                'direction': None,
                'explanation': f"No causality rule defined for {control_action} -> {state_variable}",
                'confidence': 0.0
            }

        explanation = self._get_effect_explanation(control_action, state_variable, data_row, expected_direction)

        return {
            'direction': expected_direction,
            'explanation': explanation,
            'confidence': 0.9  # High confidence for physics-based rules
        }

    def _get_effect_explanation(self, control_action: str, state_variable: str, data_row: pd.Series, direction: int) -> str:
        """Generate human-readable explanation for expected effect."""
        direction_word = "increase" if direction == 1 else "decrease"

        if control_action == 'uV':
            if state_variable == 'H':
                H_in = data_row.get(self.node_to_column.get('H', 'relhum_ref'), 'N/A')
                H_out = data_row.get(self.node_to_column.get('Hout', 'Hout'), 'N/A')
                return f"Ventilation should {direction_word} humidity. Outside humidity ({H_out}) is {'higher' if direction == 1 else 'lower'} than indoor ({H_in})."
            elif state_variable == 'T':
                T_in = data_row.get(self.node_to_column.get('T', 'Temp_ref'), 'N/A')
                T_out = data_row.get(self.node_to_column.get('Tout', 'Tout'), 'N/A')
                return f"Ventilation should {direction_word} temperature. Outside temperature ({T_out}°C) is {'higher' if direction == 1 else 'lower'} than indoor ({T_in}°C)."
            elif state_variable == 'C':
                C_in = data_row.get(self.node_to_column.get('C', 'CO2_ref'), 'N/A')
                C_out = data_row.get(self.node_to_column.get('Cout', 'Cout'), 'N/A')
                return f"Ventilation should {direction_word} CO2. Outside CO2 ({C_out} ppm) is {'higher' if direction == 1 else 'lower'} than indoor ({C_in} ppm)."

        elif control_action == 'uQh':
            if state_variable == 'T':
                return "Heating should increase temperature by adding thermal energy to the greenhouse."
            elif state_variable == 'H':
                return "Heating should decrease relative humidity by increasing air temperature (same absolute humidity)."

        elif control_action == 'uQc':
            if state_variable == 'T':
                return "Cooling should decrease temperature by removing thermal energy from the greenhouse."
            elif state_variable == 'H':
                return "Cooling should decrease humidity by condensing water vapor from the air."

        elif control_action == 'uC':
            if state_variable == 'C':
                return "CO2 injection should increase CO2 concentration by adding CO2 gas to the greenhouse air."

        return f"{control_action} should {direction_word} {state_variable} based on physical principles."


class ValidationLogger:
    """
    Logs and analyzes validation results to improve system performance over time.
    """

    def __init__(self):
        self.validation_history = []
        self.logger = logging.getLogger(__name__)

    def log_validation(self, query: str, control_action: str, validation_result: Dict[str, Any],
                      final_explanation: str = None):
        """
        Log a validation event.

        Args:
            query: Original user query
            control_action: Control action that was analyzed
            validation_result: Result from PhysicalPlausibilityValidator
            final_explanation: Final explanation given to user (after any corrections)
        """
        log_entry = {
            'timestamp': datetime.now(),
            'query': query,
            'control_action': control_action,
            'is_valid': validation_result.get('is_valid', False),
            'confidence': validation_result.get('confidence', 0.0),
            'issues': validation_result.get('issues', []),
            'recommendations': validation_result.get('recommendations', []),
            'should_query_user': validation_result.get('should_query_user', False),
            'final_explanation': final_explanation
        }

        self.validation_history.append(log_entry)
        self.logger.info(f"Validation logged: {control_action} - Valid: {log_entry['is_valid']} - Confidence: {log_entry['confidence']:.2f}")

    def get_validation_stats(self) -> Dict[str, Any]:
        """Calculate statistics about validation performance."""
        if not self.validation_history:
            return {'total_validations': 0}

        total = len(self.validation_history)
        valid_count = sum(1 for entry in self.validation_history if entry['is_valid'])
        avg_confidence = sum(entry['confidence'] for entry in self.validation_history) / total

        # Issues by type
        issue_counts = {}
        for entry in self.validation_history:
            for issue in entry['issues']:
                issue_type = issue.split(':')[0] if ':' in issue else 'OTHER'
                issue_counts[issue_type] = issue_counts.get(issue_type, 0) + 1

        # Control actions with most issues
        control_issues = {}
        for entry in self.validation_history:
            if entry['issues']:
                control = entry['control_action']
                control_issues[control] = control_issues.get(control, 0) + len(entry['issues'])

        return {
            'total_validations': total,
            'valid_rate': valid_count / total,
            'average_confidence': avg_confidence,
            'issue_counts': issue_counts,
            'control_actions_with_most_issues': sorted(control_issues.items(), key=lambda x: x[1], reverse=True)[:5],
            'systematic_issues': [k for k, v in issue_counts.items() if v / total > 0.1]  # Issues affecting >10% of validations
        }

    def suggest_improvements(self) -> List[str]:
        """Suggest improvements based on validation history."""
        stats = self.get_validation_stats()
        suggestions = []

        if stats['valid_rate'] < 0.8:
            suggestions.append(f"Overall validation rate is {stats['valid_rate']:.1%}. Consider reviewing causality rules and thresholds.")

        if stats['average_confidence'] < 0.7:
            suggestions.append(f"Average confidence is {stats['average_confidence']:.2f}. Validation may be too strict or rules incomplete.")

        # Check for control actions with high issue rates
        for control, issue_count in stats.get('control_actions_with_most_issues', []):
            if issue_count > 5:  # Arbitrary threshold
                suggestions.append(f"{control} has {issue_count} validation issues. Review causality rules for this control.")

        # Check for systematic issues
        for issue_type in stats.get('systematic_issues', []):
            if 'CONTRADICTION' in issue_type:
                suggestions.append("High rate of direction contradictions. Review causality rules in PhysicalPlausibilityValidator.")
            elif 'WARNING' in issue_type:
                suggestions.append("High rate of environmental warnings. Consider updating control strategy logic.")
            elif 'INCONSISTENCY' in issue_type:
                suggestions.append("High rate of constraint inconsistencies. Review Lagrangian threshold settings.")

        return suggestions


class KnowledgeGraph:
    def __init__(self):
        """Initialize the greenhouse knowledge graph"""
        self.G = nx.DiGraph()
        self.build_graph()
        self.control_action_nodes = {'uV', 'uC', 'uQh', 'uQc'}
        self.state_variables = {'T', 'C', 'H', 'B'}

    def build_graph(self):
        """Build the greenhouse knowledge graph with nodes and relationships"""
        # State variables (internal climate)
        self.G.add_node('T', name='Temperature', unit='°C', description='Inside air temperature', type='State')
        self.G.add_node('C', name='CO2 Concentration', unit='ppm', description='Inside CO2 concentration', type='State')
        self.G.add_node('H', name='Humidity', unit='%', description='Inside relative humidity', type='State')
        self.G.add_node('B', name='Biomass', unit='kg/m²', description='Crop biomass', type='State')

        # Control inputs (actuators)
        self.G.add_node('uV', name='Ventilation Rate', unit='m³/s', description='Ventilation rate control', type='Control')
        self.G.add_node('uC', name='CO2 Injection', unit='kg/s', description='CO2 injection rate', type='Control')
        self.G.add_node('uQh', name='Heating', unit='W/m²', description='Heating power', type='Control')
        self.G.add_node('uQc', name='Cooling', unit='W/m²', description='Cooling power', type='Control')

        # External variables
        self.G.add_node('Tout', name='Outside Temperature', unit='°C', description='Outside air temperature', type='Disturbance')
        self.G.add_node('Cout', name='Outside CO2', unit='ppm', description='Outside CO2 concentration', type='Disturbance')
        self.G.add_node('Hout', name='Outside Humidity', unit='%', description='Outside relative humidity', type='Disturbance')
        self.G.add_node('Qrad', name='Solar Radiation', unit='W/m²', description='Solar radiation', type='Disturbance')

        # Energy flows
        self.G.add_node('Q_vent', name='Ventilation Heat Flow', unit='W/m²', description='Heat flow due to ventilation', type='Flux')
        self.G.add_node('Q_sun', name='Solar Heat Flow', unit='W/m²', description='Heat flow from solar radiation', type='Flux')
        self.G.add_node('Q_cov', name='Cover Heat Flow', unit='W/m²', description='Heat flow through greenhouse cover', type='Flux')
        self.G.add_node('Q_trans', name='Transpiration Heat Flow', unit='W/m²', description='Heat flow from plant transpiration', type='Flux')
        self.G.add_node('Q_heat', name='Heating Flow', unit='W/m²', description='Heat flow from heating system', type='Flux')
        self.G.add_node('Q_cool', name='Cooling Flow', unit='W/m²', description='Heat flow from cooling system', type='Flux')

        # CO2 flows
        self.G.add_node('C_inj', name='CO2 Injection Flow', unit='mg/m²/s', description='CO2 flow from injection', type='Flux')
        self.G.add_node('C_vent', name='CO2 Ventilation Flow', unit='mg/m²/s', description='CO2 flow from ventilation', type='Flux')
        self.G.add_node('C_phot', name='CO2 Photosynthesis Flow', unit='mg/m²/s', description='CO2 consumption by photosynthesis', type='Process')

        # Humidity flows
        self.G.add_node('H_trans', name='Transpiration Humidity Flow', unit='kg/m²/s', description='Humidity from transpiration', type='Flux')
        self.G.add_node('H_vent', name='Ventilation Humidity Flow', unit='kg/m²/s', description='Humidity flow from ventilation', type='Flux')
        self.G.add_node('H_cov', name='Cover Humidity Flow', unit='kg/m²/s', description='Humidity flow through cover', type='Flux')
        self.G.add_node('H_cool', name='Cooling Humidity Flow', unit='kg/m²/s', description='Humidity flow from cooling', type='Flux')

        # Reference variables (for control)
        self.G.add_node('T_ref', name='Temperature Reference', unit='°C', description='Temperature setpoint', type='Reference')
        self.G.add_node('C_ref', name='CO2 Reference', unit='ppm', description='CO2 concentration setpoint', type='Reference')
        self.G.add_node('H_ref', name='Humidity Reference', unit='%', description='Relative humidity setpoint', type='Reference')
        self.G.add_node('B_ref', name='Biomass Reference', unit='kg/m²', description='Biomass target', type='Reference')
      
        self.G.add_node('uV_ref', name='Ventilation Rate Reference', unit='m³/s', description='Ventilation rate setpoint', type='Reference')
        self.G.add_node('uC_ref', name='CO2 Injection Reference', unit='kg/s', description='CO2 injection rate setpoint', type='Reference')
        self.G.add_node('uQh_ref', name='Heating Reference', unit='W/m²', description='Heating power setpoint', type='Reference')
        self.G.add_node('uQc_ref', name='Cooling Reference', unit='W/m²', description='Cooling power setpoint', type='Reference')
       

        # Connect state variables to their references
        for state_var, ref_var in [('T', 'T_ref'), ('C', 'C_ref'), ('H', 'H_ref'), ('B', 'B_ref')]:
            self.G.add_edge(
                ref_var, state_var,
                relationship='+',
                description=f"Reference setpoint for {self.G.nodes[state_var]['name']}")


        # Temperature influences
        self.G.add_edge('Q_sun', 'T', relationship='+', description='Solar radiation increases temperature')
        self.G.add_edge('Q_heat', 'T', relationship='+', description='Heating increases temperature')
        self.G.add_edge('Q_vent', 'T', relationship='-', description='Ventilation typically decreases temperature')
        self.G.add_edge('Q_cov', 'T', relationship='-', description='Heat loss through cover decreases temperature')
        self.G.add_edge('Q_trans', 'T', relationship='-', description='Transpiration decreases temperature')
        self.G.add_edge('Q_cool', 'T', relationship='-', description='Cooling decreases temperature')
        self.G.add_edge('Tout', 'T', relationship='+/-',
                        description='Outside temperature affects inside temperature; warmer outside increases inside temp')

        # CO2 influences
        self.G.add_edge('C_inj', 'C', relationship='+', description='CO2 injection increases CO2 concentration')
        self.G.add_edge('C_vent', 'C', relationship='-', description='CO2 ventilation flux decreases inside CO2 concentration (assuming Cout < C)')
        self.G.add_edge('C_phot', 'C', relationship='-', description='Photosynthesis consumes CO2')
        self.G.add_edge('Cout', 'C', relationship='+/-',
                        description='Outside CO2 affects inside CO2 through ventilation')

        # Humidity influences
        self.G.add_edge('H_trans', 'H', relationship='+', description='Transpiration increases humidity')
        self.G.add_edge('H_vent', 'H', relationship='+/-',
                        description='Ventilation equalizes inside and outside humidity. Effect depends on humidity difference')
        self.G.add_edge('H_cov', 'H', relationship='-', description='Condensation on cover decreases humidity')
        self.G.add_edge('H_cool', 'H', relationship='-', description='Cooling decreases humidity through condensation')
        self.G.add_edge('Hout', 'H', relationship='+/-',
                        description='Outside humidity affects inside humidity through ventilation')

        # Biomass influences
        self.G.add_edge('C', 'B', relationship='+',
                        description='Higher CO2 concentration increases photosynthesis and biomass production')
        self.G.add_edge('T', 'B', relationship='∩',
                        description='Temperature has optimal effect on biomass growth with diminishing returns outside optimal range')
        self.G.add_edge('Qrad', 'B', relationship='+',
                        description='Solar radiation increases photosynthesis and biomass production')

        # Common nodes
        self.G.add_node("Qrad", type="Disturbance", description="Solar Radiation")
        self.G.add_node("Tout", type="Disturbance", description="Outdoor Temperature")
        self.G.add_node("Cout", type="Disturbance", description="Outdoor CO2")
        self.G.add_node("Hout", type="Disturbance", description="Outdoor Humidity")
        self.G.add_node("ϕ_CO2", type="Process", equation="ϕ_CO2 = CG4(Qrad, C, T, Ha)")

        # Biomass connections
        self.G.add_node("B", type="State", description="Biomass")
        self.G.add_edge("ϕ_CO2", "B", relationship="+", equation="(4)")
        self.G.add_edge("kB_CO2", "B", type="Parameter", relationship="Scales")

        # Soft constraints
        self.G.add_node("T_min", type="Constraint", value=18, description="Min Temperature (soft)")
        self.G.add_node("T_max", type="Constraint", value=26, description="Max Temperature (soft)")
        self.G.add_node("C_min", type="Constraint", value=500, description="Min CO2 (soft)")
        self.G.add_node("C_max", type="Constraint", value=900, description="Max CO2 (soft)")
        self.G.add_node("H_min", type="Constraint", value=60, description="Min Humidity (soft)")
        self.G.add_node("H_max", type="Constraint", value=90, description="Max Humidity (soft)")
        self.G.add_node("X_hard", type="Constraint", equation="X=[14,30]×[300,1000]×[10,100]")

        # Constraint relationships
        self.G.add_edge("T_min", "uQh", relationship="Activates")
        self.G.add_edge("T_max", "uQc", relationship="Activates")
        self.G.add_edge("C_min", "uC", relationship="Activates")
        self.G.add_edge("C_max", "uC", relationship="Limits")
        self.G.add_edge("H_max", "uV", relationship="MayActivate")
        self.G.add_edge("H_max", "uQc", relationship="MayActivate")
        self.G.add_edge("H_min", "uV", relationship="MayLimit")

        # Control to flows
        self.G.add_edge('uV', 'Q_vent', relationship='+', description='Ventilation control increases ventilation heat flow')
        self.G.add_edge('uV', 'C_vent', relationship='+', description='Ventilation control increases CO2 ventilation flow')
        self.G.add_edge('uV', 'H_vent', relationship='+', description='Ventilation control increases humidity ventilation flow')
        self.G.add_edge('uC', 'C_inj', relationship='+', description='CO2 injection control increases CO2 injection flow')
        self.G.add_edge('uQh', 'Q_heat', relationship='+', description='Heating control increases heating flow')
        self.G.add_edge('uQc', 'Q_cool', relationship='+', description='Cooling control increases cooling flow')
        self.G.add_edge('uQc', 'H_cool', relationship='+', description='Cooling control increases dehumidification')

        # Environmental influences on flows
        self.G.add_edge('Qrad', 'Q_sun', relationship='+', description='Solar radiation increases solar heat flow')
        self.G.add_edge('Qrad', 'C_phot', relationship='+', description='Solar radiation increases photosynthesis')
        self.G.add_edge('Qrad', 'H_trans', relationship='+', description='Solar radiation increases transpiration')
        self.G.add_edge('Tout', 'Q_vent', relationship='+/-',
                        description='Outside temperature affects ventilation heat flow; direction depends on inside-outside difference')
        self.G.add_edge('Tout', 'Q_cov', relationship='+/-',
                        description='Outside temperature affects cover heat flow; direction depends on inside-outside difference')
        self.G.add_edge('Hout', 'H_vent', relationship='+/-',
                        description='Outside humidity affects ventilation humidity flow')
        self.G.add_edge('Cout', 'C_vent', relationship='+/-',
                        description='Outside CO2 affects ventilation CO2 flow')

        # Cross-variable effects
        self.G.add_edge('T', 'H', relationship='-', description='Higher temperature decreases relative humidity')
        self.G.add_edge('T', 'C_phot', relationship='∩',
                        description='Temperature has optimal effect on photosynthesis with diminishing returns outside optimal range')
        self.G.add_edge('H', 'H_trans', relationship='-', description='Higher humidity decreases transpiration')
        self.G.add_edge("T", "Hsat", relationship="+",
                        equation="Hsat = 5.5638 e^(0.0572T)",
                        description="Temperature exponentially increases saturation humidity capacity")

        # Temperature affects condensation on cover
        self.G.add_edge("T", "H_cov", relationship="+",
                        equation="H_cov = gC*(0.2522e^(0.0485T)(T-Tout) - (Hsat - Ha))",
                        description="Higher temperature difference increases condensation potential")

        # Temperature affects crop transpiration via vapor pressure deficit
        self.G.add_edge("T", "H_trans", relationship="+",
                        equation="H_trans = ge*(Hv - Ha)",
                        description="Temperature increases vapor pressure in leaves, driving transpiration")

        # Temperature affects cooling dehumidification
        self.G.add_edge("T", "H_cool", relationship="+",
                        equation="H_cool = max(0, uQc*kA_pipe*kcool*(Ha - Hsat_cool))",
                        description="Temperature affects dehumidification through cooling pipes")

        # Temperature affects photosynthesis rate (bell curve relationship)
        self.G.add_edge("T", "ϕ_CO2", relationship="∩",
                        equation="ϕ_CO2 = CG4(Qrad, C, T, Ha)",
                        description="Temperature has optimal range for photosynthesis (too low or too high reduces rate)")

        # Temperature affects CO2 solubility in cell fluids
        self.G.add_edge("T", "C_phot", relationship="∩",
                        equation="C_phot = ϕ_CO2*1e-3/kA_gh",
                        description="Temperature affects enzyme activity and CO2 solubility")

        # Humidity affects transpiration cooling
        self.G.add_edge("H", "Q_trans", relationship="-",
                        equation="Q_trans = ge*kL*(Hv - Ha)",
                        description="Higher humidity reduces transpiration cooling effect")

        # Humidity affects condensation heat release
        self.G.add_edge("H", "Q_cov", relationship="-",
                        equation="Part of Q_cov calculation",
                        description="Higher humidity increases condensation on cover, releasing latent heat")

        # Humidity affects stomatal conductance which impacts CO2 uptake
        self.G.add_edge("H", "ϕ_CO2", relationship="∩",
                        equation="Part of CG4 model",
                        description="Optimal humidity range for stomatal opening; extreme values reduce photosynthesis")

        # CO2 affects leaf temperature via stomatal conductance
        self.G.add_edge("C", "Q_trans", relationship="-",
                        equation="Indirect through stomatal response",
                        description="Higher CO2 can reduce stomatal conductance, affecting transpiration and leaf temp")


        # Solar radiation affects multiple processes
        self.G.add_edge("Qrad", "Q_sun", relationship="+",
                        equation="Q_sun = ktot*Qrad",
                        description="Direct heating effect")

        self.G.add_edge("Qrad", "ϕ_CO2", relationship="+",
                        equation="Part of CG4 model",
                        description="Light drives photosynthesis")

        self.G.add_edge("Qrad", "H_trans", relationship="+",
                        equation="Indirect through Rn calculation",
                        description="Radiation increases leaf temperature driving transpiration")

        # Ventilation affects all three state variables
        self.G.add_edge("uV", "Q_vent", relationship="+",
                        equation="Q_vent = kc_air*kρ_air*ku_vent*uV*(Tout - T)",
                        description="Ventilation exchanges heat")

        self.G.add_edge("uV", "C_vent", relationship="+",
                        equation="C_vent = ku_vent*uV*(Cout - C)",
                        description="Ventilation exchanges CO2")

        self.G.add_edge("uV", "H_vent", relationship="+",
                        equation="H_vent = ku_vent*uV*(Ha - Hout)",
                        description="Ventilation exchanges humidity")

        # Cooling affects temperature and humidity
        self.G.add_edge("uQc", "Q_cool", relationship="+",
                        equation="Q_cool = uQc*Pc_max",
                        description="Cooling reduces temperature")

        self.G.add_edge("uQc", "H_cool", relationship="+",
                        equation="H_cool = max(0, uQc*kA_pipe*kcool*(Ha - Hsat_cool))",
                        description="Cooling causes dehumidification")

        """Initialize nodes representing Lagrangian multipliers"""
        # Inequality constraints Lagrangian multipliers
        self.G.add_node("T_ieq", type="LagrangianIneq",
                        description="Temperature inequality constraint multiplier")
        self.G.add_node("C_ieq", type="LagrangianIneq",
                        description="CO2 inequality constraint multiplier")
        self.G.add_node("H_ieq", type="LagrangianIneq",
                        description="Humidity inequality constraint multiplier")
        self.G.add_node("B_ieq", type="LagrangianIneq",
                        description="Biomass inequality constraint multiplier")
        self.G.add_node("uV_ieq", type="LagrangianIneq",
                        description="Ventilation control inequality constraint multiplier")
        self.G.add_node("uC_ieq", type="LagrangianIneq",
                        description="CO2 injection control inequality constraint multiplier")
        self.G.add_node("uQh_ieq", type="LagrangianIneq",
                        description="Heating control inequality constraint multiplier")
        self.G.add_node("uQc_ieq", type="LagrangianIneq",
                        description="Cooling control inequality constraint multiplier")

        # Equality constraints Lagrangian multipliers
        self.G.add_node("T_eq", type="LagrangianEq",
                        description="Temperature dynamics equality constraint multiplier")
        self.G.add_node("C_eq", type="LagrangianEq",
                        description="CO2 dynamics equality constraint multiplier")
        self.G.add_node("H_eq", type="LagrangianEq",
                        description="Humidity dynamics equality constraint multiplier")
        self.G.add_node("B_eq", type="LagrangianEq",
                        description="Biomass dynamics equality constraint multiplier")

        # Connect inequality constraints to state bounds
        self.G.add_edge("T_ieq", "T", relationship="Constrains",
                        description="Active when temperature approaches bounds")
        self.G.add_edge("C_ieq", "C", relationship="Constrains",
                        description="Active when CO2 approaches bounds")
        self.G.add_edge("H_ieq", "H", relationship="Constrains",
                        description="Active when humidity approaches bounds")
        self.G.add_edge("B_ieq", "B", relationship="Constrains",
                        description="Active when biomass approaches bounds")

        # Connect inequality constraints to control bounds
        self.G.add_edge("uV_ieq", "uV", relationship="Constrains",
                        description="Active when ventilation control approaches bounds")
        self.G.add_edge("uC_ieq", "uC", relationship="Constrains",
                        description="Active when CO2 injection control approaches bounds")
        self.G.add_edge("uQh_ieq", "uQh", relationship="Constrains",
                        description="Active when heating control approaches bounds")
        self.G.add_edge("uQc_ieq", "uQc", relationship="Constrains",
                        description="Active when cooling control approaches bounds")

        # Connect equality constraints to state dynamics
        self.G.add_edge("T_eq", "T", relationship="EnforcesDynamics",
                        description="Enforces temperature dynamics equation")
        self.G.add_edge("C_eq", "C", relationship="EnforcesDynamics",
                        description="Enforces CO2 dynamics equation")
        self.G.add_edge("H_eq", "H", relationship="EnforcesDynamics",
                        description="Enforces humidity dynamics equation")
        self.G.add_edge("B_eq", "B", relationship="EnforcesDynamics",
                        description="Enforces biomass dynamics equation")

    def get_related_variables(self, variable: str, direction: str = 'both', depth: int = 1) -> list[str]:
        """Get variables related to the target variable in the knowledge graph"""
        if not self.G.has_node(variable):
            return []

        related = set()

        if direction in ['in', 'both']:
            # Get variables that affect the target
            for depth_level in range(1, depth + 1):
                paths = nx.single_source_shortest_path(self.G, variable, cutoff=depth_level)
                for node in paths:
                    if node != variable:
                        related.add(node)

        if direction in ['out', 'both']:
            # Get variables affected by the target
            reverse_graph = self.G.reverse()
            for depth_level in range(1, depth + 1):
                paths = nx.single_source_shortest_path(reverse_graph, variable, cutoff=depth_level)
                for node in paths:
                    if node != variable:
                        related.add(node)

        return list(related)

    def find_path(self, start_node: str, end_node: str) -> Dict[str, Any]:
        """
        (V3 - CORRECTED) Finds a meaningful causal relationship between two nodes.
        Always returns a dictionary with 'type' and 'path' keys.
        """
        if not self.G.has_node(start_node) or not self.G.has_node(end_node):
            return {'type': 'none', 'path': []}

        # Case 1: Direct Path (A -> B or B -> A)
        try:
            path = nx.shortest_path(self.G, source=start_node, target=end_node)
            # --- FIX: Return a dictionary ---
            if path:
                return {'type': 'direct', 'path': path}
        except nx.NetworkXNoPath:
            pass
            
        try:
            path = nx.shortest_path(self.G, source=end_node, target=start_node)
            # --- FIX: Return a dictionary ---
            if path:
                return {'type': 'direct', 'path': list(reversed(path))}
        except nx.NetworkXNoPath:
            pass

        # Case 2: Common Successor (A -> C <- B)
        start_successors = set(nx.descendants(self.G, start_node))
        end_successors = set(nx.descendants(self.G, end_node))
        common_successors = start_successors.intersection(end_successors)
        
        if common_successors:
            closest_successor = min(common_successors, 
                                    key=lambda s: nx.shortest_path_length(self.G, source=start_node, target=s) + 
                                                nx.shortest_path_length(self.G, source=end_node, target=s))
            
            path1 = nx.shortest_path(self.G, source=start_node, target=closest_successor)
            path2 = nx.shortest_path(self.G, source=end_node, target=closest_successor)
            representative_path = path1 + list(reversed(path2[:-1]))
            # --- FIX: Return a dictionary ---
            return {'type': 'common_successor', 'path': representative_path, 'common_node': closest_successor}

        # Case 3: Common Predecessor (A <- C -> B)
        reversed_G = self.G.reverse(copy=False)
        start_predecessors = set(nx.descendants(reversed_G, start_node))
        end_predecessors = set(nx.descendants(reversed_G, end_node))
        common_predecessors = start_predecessors.intersection(end_predecessors)
        
        if common_predecessors:
            closest_predecessor = min(common_predecessors,
                                    key=lambda p: nx.shortest_path_length(self.G, source=p, target=start_node) +
                                                    nx.shortest_path_length(self.G, source=p, target=end_node))
            
            path1 = nx.shortest_path(self.G, source=closest_predecessor, target=start_node)
            path2 = nx.shortest_path(self.G, source=closest_predecessor, target=end_node)
            representative_path = list(reversed(path1[:-1])) + path2
            # This was already correct, returning a dictionary
            return {'type': 'common_predecessor', 'path': representative_path, 'common_node': closest_predecessor}
        
        # This was already correct, returning a dictionary
        return {'type': 'none', 'path': []}

    def is_control_action(self,
                          node: str) -> bool:
        """Check if a given node is a control action"""
        return node in self.control_action_nodes

    def has_node(self, node):
        return node in self.G

    def in_edges(self, node):
        """Get the incoming edges for a given node."""
        return self.G.in_edges(node, data=True)

    def out_edges(self, node):
        """Get the outgoing edges for a given node."""
        return self.G.out_edges(node, data=True)

    def get_edge_data(self, u, v):
        return self.G.get_edge_data(u, v)

    def get_corrective_actions_for_state(self, state_node: str, direction: str = 'decrease') -> list:
        """
        Finds control actions that are used to either increase or decrease a state variable.
        e.g., get_corrective_actions_for_state('T', 'decrease') -> ['uQc', 'uV']
        """
        corrective_actions = []
        # This logic can be derived from the KG edges, but a simple map is faster and more reliable
        # Map of {State: {increase_actions: [...], decrease_actions: [...]}}
        action_map = {
            'T': {'increase': ['uQh'], 'decrease': ['uQc', 'uV']},
            'H': {'increase': [], 'decrease': ['uV', 'uQc']},
            'C': {'increase': ['uC'], 'decrease': ['uV']},
        }
        
        return action_map.get(state_node, {}).get(direction, [])
    
    def visualize_knowledge_graph(self, focus_node=None, depth=None, layout_prog='neato', figsize=(22, 18), output_filename=None):
        """
        Visualize the knowledge graph.

        Args:
            focus_node (str, optional): Node ID to center the visualization on. Defaults to None (full graph).
            depth (int, optional): How many steps away from the focus_node to include. Requires focus_node. Defaults to None.
            layout_prog (str, optional): Layout program for graphviz ('dot', 'neato', 'fdp', 'sfdp', 'twopi', 'circo')
                                         or 'spring', 'kamada_kawai', 'spectral' for networkx layouts.
                                         Defaults to 'neato'. 'dot' is good for hierarchies.
            figsize (tuple, optional): Figure size. Defaults to (22, 18).
            output_filename (str, optional): If provided, saves the figure to this path. Defaults to None (displays plot).

        Returns:
            matplotlib.figure.Figure: The figure object containing the plot, or None if visualization failed.
        """
        plt.style.use('seaborn-v0_8-whitegrid') # Example style
        fig, ax = plt.subplots(figsize=figsize)
        target_graph = self.G # Use self.G here
        title = "Complete Greenhouse Knowledge Graph"

        # --- Subgraph Logic ---
        if focus_node:
            if focus_node not in self.G:
                plt.close(fig)
                return None
            if depth is None or not isinstance(depth, int) or depth <= 0:
                plt.close(fig)
                return None

            # Build subgraph based on predecessors and successors within depth
            nodes_to_include = {focus_node}
            current_layer = {focus_node}
            for _ in range(depth):
                next_layer_nodes = set()
                for node in current_layer:
                    # Add both neighbors reachable *from* and neighbors reaching *to* the current layer
                    next_layer_nodes.update(self.G.successors(node))
                    next_layer_nodes.update(self.G.predecessors(node))

                new_nodes = next_layer_nodes - nodes_to_include # Only add nodes not already included
                if not new_nodes: # Stop if no new nodes are found
                    break
                nodes_to_include.update(new_nodes)
                current_layer = new_nodes # Next iteration starts from the newly added nodes

            nodes_to_draw = list(nodes_to_include)
            target_graph = self.G.subgraph(nodes_to_draw)
            edges_to_draw = list(target_graph.edges())
            title = f"Greenhouse Knowledge Graph around '{focus_node}' (Depth {depth})"
        else:
            # Assign edges_to_draw here for the case where focus_node is None
            edges_to_draw = list(self.G.edges())  # Use all edges from the original graph

        if not target_graph.nodes():
            print("Warning: The graph (or subgraph) to visualize is empty.")
            plt.close(fig)
            return None

        # --- Styling ---
        # Define node colors based on the 'type' attribute
        color_map = {
            "State": "#87CEEB",          # skyblue
            "Process": "#90EE90",        # lightgreen
            "Flux": "#98FB98",           # palegreen (slightly diff from Process)
            "Control": "#F08080",        # lightcoral
            "Constraint": "#FFA07A",     # lightsalmon
            "Disturbance": "#FFD700",    # gold
            "Parameter": "#D3D3D3",      # lightgrey
            "Reference": "#DA70D6",      # orchid
            "LagrangianIneq": "#FFA500", # orange
            "LagrangianEq": "#FFEC8B",   # lightgoldenrodyellow (distinct from orange)
            "TimeSeriesData": "#A9A9A9", # dimgrey
            "Default": "#E0E0E0"         # default grey
        }
        # Assign colors, falling back to Default if type is missing or unknown
        node_colors = [color_map.get(target_graph.nodes[n].get('type', 'Default'), color_map['Default'])
                       for n in target_graph.nodes()]
        node_sizes = [2500 if target_graph.nodes[n].get('type') == 'State' else 1800 for n in target_graph.nodes()]

        # --- Node and Edge Styling ---
        edge_colors = []
        for u, v in edges_to_draw:
            relationship = target_graph.edges[u, v].get('relationship')
            if relationship == '+':
                edge_colors.append('green')
            elif relationship == '-':
                edge_colors.append('red')
            else:
                edge_colors.append('grey')

        # --- Layout ---
        pos = None
        networkx_layouts = ['spring', 'kamada_kawai', 'spectral', 'circular', 'shell', 'random']
        if layout_prog in networkx_layouts:
            print(f"Using NetworkX layout '{layout_prog}'.")
            layout_func = getattr(nx, f"{layout_prog}_layout")
            pos = layout_func(target_graph)
        else: # Assume graphviz layout
            try:
                # Requires graphviz installed (OS package) and pygraphviz/pydot (pip package)
                pos = nx.nx_agraph.graphviz_layout(target_graph, prog=layout_prog)
                print(f"Using graphviz layout '{layout_prog}'.")
            except ImportError:
                print(f"Warning: pygraphviz/pydot not found. Install for graphviz layouts (like '{layout_prog}'). Falling back to NetworkX 'spring_layout'.")
                pos = nx.spring_layout(target_graph, k=0.6, iterations=50)
            except Exception as e:
                print(f"Warning: graphviz layout '{layout_prog}' failed ({e}). Falling back to NetworkX 'spring_layout'.")
                pos = nx.spring_layout(target_graph, k=0.6, iterations=50)

        # --- Drawing ---
        nx.draw_networkx_nodes(target_graph, pos, node_color=node_colors, node_size=node_sizes, alpha=0.9, ax=ax)
        #nx.draw_networkx_edges(target_graph, pos, alpha=0.5, arrows=True, arrowstyle='-|>', arrowsize=15, edge_color='gray', ax=ax, node_size=node_sizes) # Adjusted arrowstyle/size
        nx.draw_networkx_labels(target_graph, pos, font_size=9, ax=ax)
        for i, (u, v) in enumerate(edges_to_draw):
            nx.draw_networkx_edges(target_graph, pos, edgelist=[(u, v)],
                                   alpha=0.5, arrows=True, arrowstyle='-|>',
                                   arrowsize=15, edge_color=[edge_colors[i]], # Note: passing color as a list of one element
                                   ax=ax, node_size=node_sizes)
        nx.draw_networkx_labels(target_graph, pos, font_size=9, ax=ax)

        # Optional: Add edge labels (can make the graph very cluttered)
        # edge_attrs = nx.get_edge_attributes(target_graph, 'relationship')
        # if edge_attrs:
        #    nx.draw_networkx_edge_labels(target_graph, pos, edge_labels=edge_attrs, font_size=7, ax=ax)

        ax.set_title(title, fontsize=16)
        ax.axis('off')

        # --- Legend ---

        present_types = sorted(list(set(target_graph.nodes[n].get('type', 'Default') for n in target_graph.nodes())))
        active_legend_elements = [Patch(facecolor=color_map.get(label, color_map['Default']), edgecolor='dimgray', label=label)
                                  for label in present_types if label in color_map]

        if active_legend_elements:
            ax.legend(handles=active_legend_elements, loc='upper left', bbox_to_anchor=(1.01, 1), title="Node Types", fontsize=10, title_fontsize=12)

        plt.tight_layout(rect=[0, 0, 0.85, 1]) # Adjust layout to make space for legend if it's outside

        # --- Output ---
        if output_filename:
            try:
                fig.savefig(output_filename, bbox_inches='tight', dpi=300)
                print(f"Graph saved to {output_filename}")
            except Exception as e:
                print(f"Error saving graph to {output_filename}: {e}")
                plt.close(fig)
                return None
        else:
            # If not saving, display the plot interactively
            # Make sure your environment supports interactive plotting (e.g., not a plain script run)
            print("Displaying graph...")
            plt.show()

        return fig

    # Interactive helpers: add proper methods here (ensures they belong to the class)
    def get_related_nodes_interactive(self, selected_node: str, depth: int = 2) -> List[str]:
        """Return a list of nodes related to `selected_node` up to `depth` hops."""
        if selected_node not in self.G:
            return []

        related = set([selected_node])
        frontier = {selected_node}
        for _ in range(depth):
            next_frontier = set()
            for n in frontier:
                try:
                    neighbors = set(self.G.successors(n)) | set(self.G.predecessors(n))
                except Exception:
                    neighbors = set(self.G.neighbors(n)) if hasattr(self.G, 'neighbors') else set()
                next_frontier.update(neighbors)
            related.update(next_frontier)
            frontier = next_frontier

        return list(related)

    def create_interactive_kg_plot(self, focus_node: Optional[str] = None, depth: int = 2):
        """Create a Plotly interactive knowledge graph. If `focus_node` is provided,
        only nodes within `depth` hops are shown. Returns a Plotly Figure.
        """
        try:
            target_graph = self.G
            title = "Interactive Greenhouse Knowledge Graph"

            if focus_node:
                if focus_node not in target_graph:
                    return None
                nodes_to_include = set(self.get_related_nodes_interactive(focus_node, depth))
                target_graph = target_graph.subgraph(nodes_to_include).copy()
                title = f"Interactive KG around '{focus_node}' (depth {depth})"

            if not target_graph.nodes():
                return None

            pos = nx.spring_layout(target_graph, seed=42)

            # Build edge traces
            edge_x = []
            edge_y = []
            edge_colors = []
            edge_text = []
            for u, v in target_graph.edges():
                x0, y0 = pos[u]
                x1, y1 = pos[v]
                edge_x += [x0, x1, None]
                edge_y += [y0, y1, None]
                relationship = target_graph.edges[u, v].get('relationship', '')
                edge_colors.append('green' if relationship == '+' else ('red' if relationship == '-' else 'gray'))
                edge_text.append(f"{u} → {v}: {relationship}")

            edge_trace = go.Scatter(x=edge_x, y=edge_y, mode='lines', line=dict(width=1, color='#888'), hoverinfo='text')

            # Build node traces
            node_x = []
            node_y = []
            node_text = []
            node_color = []
            for n, data in target_graph.nodes(data=True):
                x, y = pos[n]
                node_x.append(x)
                node_y.append(y)
                display_name = data.get('name', n)
                node_text.append(f"{display_name} ({n})")
                ntype = data.get('type', 'Default')
                # simple palette
                palette = {
                    'Control': '#d62728', 'Flux': '#9467bd', 'Disturbance': '#17becf', 'State': '#1f77b4'
                }
                node_color.append(palette.get(ntype, '#1f77b4'))

            node_trace = go.Scatter(
                x=node_x, y=node_y,
                mode='markers+text',
                text=[t.split(' (')[0] for t in node_text],
                textposition='top center',
                hovertext=node_text,
                hoverinfo='text',
                marker=dict(size=18, color=node_color, line_width=1)
            )

            fig = go.Figure(data=[edge_trace, node_trace])
            fig.update_layout(title=title, showlegend=False, hovermode='closest', margin=dict(b=20,l=5,r=5,t=40))
            return fig
        except Exception as e:
            logger.error(f"Error creating interactive KG plot: {e}")
            return None

    def create_interactive_kg_plot(self, focus_node=None, depth=None):
        """
        (V2 - ENHANCED AESTHETICS)
        Create a visually appealing and informative interactive Plotly visualization of the knowledge graph.
        """
        print(f"🎨 create_interactive_kg_plot (V2) called with focus_node={focus_node}, depth={depth}")
        import plotly.graph_objects as go
        from plotly.subplots import make_subplots
        import networkx as nx
        import numpy as np

        target_graph = self.G
        title = "Interactive Greenhouse Knowledge Graph"

        if focus_node:
            if focus_node not in self.G:
                print(f"❌ Focus node '{focus_node}' not found in graph")
                return None
            if depth is None or not isinstance(depth, int) or depth <= 0:
                depth = 2 # Default depth
            
            nodes_to_include = set(nx.ego_graph(self.G, focus_node, radius=depth).nodes())
            target_graph = self.G.subgraph(nodes_to_include)
            title = f"Interactive KG around '{focus_node}' (Depth {depth})"

        if not target_graph.nodes():
            print("❌ No nodes to visualize")
            return None

        pos = nx.kamada_kawai_layout(target_graph)

        # --- EDGES ---
        edge_traces = []
        for edge in target_graph.edges(data=True):
            x0, y0 = pos[edge[0]]
            x1, y1 = pos[edge[1]]
            
            relationship = edge[2].get('relationship', '')
            color = 'rgba(0,255,0,0.7)' if relationship == '+' else 'rgba(255,0,0,0.7)' if relationship == '-' else 'rgba(128,128,128,0.5)'
            
            edge_trace = go.Scatter(
                x=[x0, (x0+x1)/2, x1], y=[y0, (y0+y1)/2, y1],
                line=dict(width=1.5, color=color, shape='spline'),
                hoverinfo='text',
                text=f"<b>{edge[0]} → {edge[1]}</b><br>Relationship: {relationship}",
                mode='lines',
                showlegend=False
            )
            edge_traces.append(edge_trace)

        # --- NODES ---
        node_x, node_y, node_text, node_color, node_size, node_symbol = [], [], [], [], [], []
        
        color_map = {
            "State": "#1f77b4", "Process": "#2ca02c", "Flux": "#9467bd",
            "Control": "#d62728", "Constraint": "#ff7f0e", "Disturbance": "#e377c2",
            "Parameter": "#7f7f7f", "Reference": "#bcbd22", "LagrangianIneq": "#8c564b",
            "LagrangianEq": "#17becf", "Default": "#7f7f7f"
        }
        symbol_map = {
            "State": "circle", "Control": "diamond", "Disturbance": "star",
            "Flux": "triangle-up", "Process": "triangle-down", "Reference": "square",
            "Default": "circle"
        }

        for node, data in target_graph.nodes(data=True):
            x, y = pos[node]
            node_x.append(x)
            node_y.append(y)
            
            node_type = data.get('type', 'Default')
            node_name = data.get('name', node)
            
            hover_text = f"<b>{node}</b><br>Name: {node_name}<br>Type: {node_type}"
            if node in self.node_to_column:
                hover_text += f"<br>Data Column: {self.node_to_column[node]}"
            
            node_text.append(hover_text)
            node_color.append(color_map.get(node_type, color_map['Default']))
            node_size.append(40 if node_type == 'State' else 30)
            node_symbol.append(symbol_map.get(node_type, symbol_map['Default']))

        node_trace = go.Scatter(
            x=node_x, y=node_y,
            mode='markers+text',
            hoverinfo='text',
            hovertext=node_text,
            text=[n.split('_')[0] for n in target_graph.nodes()],
            textposition="middle center",
            textfont=dict(size=10, color='white'),
            marker=dict(
                showscale=False,
                color=node_color,
                size=node_size,
                symbol=node_symbol,
                line=dict(width=2, color='white')
            ),
            showlegend=False
        )

        # --- FIGURE ---
        fig = go.Figure(data=edge_traces + [node_trace],
                        layout=go.Layout(
                            title=title,
                            titlefont_size=16,
                            showlegend=True,
                            hovermode='closest',
                            margin=dict(b=20, l=5, r=5, t=40),
                            annotations=[dict(
                                showarrow=False,
                                xref="paper", yref="paper",
                                x=0.005, y=-0.002,
                                text="Green edges are positive relationships, red are negative.")],
                            xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                            yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                            plot_bgcolor='#111111',
                            paper_bgcolor='#111111',
                            font=dict(color='white')
                        ))
        
        # Add legend manually
        present_types = sorted(list(set(data.get('type', 'Default') for _, data in target_graph.nodes(data=True))))
        for i, node_type in enumerate(present_types):
            fig.add_trace(go.Scatter(
                x=[None], y=[None], mode='markers',
                marker=dict(
                    size=10,
                    color=color_map.get(node_type, color_map['Default']),
                    symbol=symbol_map.get(node_type, symbol_map['Default'])
                ),
                name=node_type,
                showlegend=True
            ))
        fig.update_layout(legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01, bgcolor='rgba(0,0,0,0.5)'))

        print("✅ Enhanced interactive KG plot created successfully")
        return fig

    def get_related_nodes_interactive(self, selected_node):
        """
        Get related nodes for interactive filtering.

        Args:
            selected_node (str): The node that was clicked.

        Returns:
            dict: Data for updating the plot with related nodes only.
        """
        if selected_node not in self.G:
            return None

        # Get direct neighbors (1-hop)
        neighbors = set()
        neighbors.update(self.G.successors(selected_node))
        neighbors.update(self.G.predecessors(selected_node))
        neighbors.add(selected_node)  # Include the selected node itself

        # Create subgraph
        subgraph = self.G.subgraph(list(neighbors))

        # Get positions for the subgraph
        pos = nx.spring_layout(subgraph, k=0.8, iterations=50)

        # Prepare data for Plotly update
        node_x = []
        node_y = []
        node_text = []
        node_color = []
        node_size = []
        node_hover = []

        color_map = {
            "State": "#87CEEB",
            "Process": "#90EE90",
            "Flux": "#98FB98",
            "Control": "#F08080",
            "Constraint": "#FFA07A",
            "Disturbance": "#FFD700",
            "Parameter": "#D3D3D3",
            "Reference": "#DA70D6",
            "LagrangianIneq": "#FFA500",
            "LagrangianEq": "#FFEC8B",
            "TimeSeriesData": "#A9A9A9",
            "Default": "#E0E0E0"
        }

        for node in subgraph.nodes():
            x, y = pos[node]
            node_x.append(x)
            node_y.append(y)

            node_data = subgraph.nodes[node]
            node_name = node_data.get('name', node)
            node_type_val = node_data.get('type', 'Default')

            hover_text = f"<b>{node}</b><br>"
            hover_text += f"Name: {node_name}<br>"
            hover_text += f"Type: {node_type_val}<br>"

            if node in self.node_to_column:
                hover_text += f"Data Column: {self.node_to_column[node]}<br>"

            node_text.append(node.split('_')[0])
            node_color.append(color_map.get(node_type_val, color_map['Default']))
            node_size.append(35 if node == selected_node else (30 if node_type_val == 'State' else 20))
            node_hover.append(hover_text)

        # Prepare edge data
        edge_x = []
        edge_y = []
        edge_colors = []
        edge_hover = []

        for edge in subgraph.edges():
            x0, y0 = pos[edge[0]]
            x1, y1 = pos[edge[1]]

            edge_x.extend([x0, x1, None])
            edge_y.extend([y0, y1, None])

            relationship = subgraph.edges[edge].get('relationship', '')
            if relationship == '+':
                edge_colors.append('green')
            elif relationship == '-':
                edge_colors.append('red')
            else:
                edge_colors.append('gray')

            edge_hover.append(f"<b>{edge[0]} → {edge[1]}</b><br>Relationship: {relationship}")

        return {
            'node_x': node_x,
            'node_y': node_y,
            'node_text': node_text,
            'node_color': node_color,
            'node_size': node_size,
            'node_hover': node_hover,
            'edge_x': edge_x,
            'edge_y': edge_y,
            'edge_colors': edge_colors,
            'edge_hover': edge_hover,
            'title': f"Related Nodes for '{selected_node}'"
        }


kg = KnowledgeGraph()
qa_system = EnhancedGreenhouseKnowledgeGraph(kg, "./greenhouse_usecase/data/filtered_dates.csv")


def create_interactive_kg_plot_global(system: EnhancedGreenhouseKnowledgeGraph, focus_node: Optional[str] = None, depth: Optional[int] = None):
    """Compatibility wrapper to create an interactive KG plot from the QA system.
    If the system exposes `create_interactive_kg_plot`, use it; otherwise use the
    fallback interactive generator available in this module.
    """
    try:
        # Prefer instance method if available
        if hasattr(system, 'create_interactive_kg_plot') and callable(getattr(system, 'create_interactive_kg_plot')):
            return system.create_interactive_kg_plot(focus_node=focus_node, depth=depth if depth is not None else 2)

        # Fallback: build a simple plot using networkx + plotly as a best-effort
        G = system.G
        subG = G
        if focus_node:
            nodes_to_include = set(system.kg.get_related_nodes_interactive(focus_node, depth if depth else 2)) if hasattr(system.kg, 'get_related_nodes_interactive') else {focus_node}
            subG = G.subgraph(nodes_to_include).copy()

        pos = nx.spring_layout(subG, seed=42)
        edge_x = []
        edge_y = []
        for u, v in subG.edges():
            x0, y0 = pos[u]
            x1, y1 = pos[v]
            edge_x += [x0, x1, None]
            edge_y += [y0, y1, None]

        edge_trace = go.Scatter(x=edge_x, y=edge_y, mode='lines', line=dict(width=1, color='#888'))

        node_x, node_y, node_text, node_color = [], [], [], []
        for n, data in subG.nodes(data=True):
            x, y = pos[n]
            node_x.append(x); node_y.append(y)
            node_text.append(data.get('name', n))
            ntype = data.get('type', 'Default')
            palette = {'Control': '#d62728', 'Flux': '#9467bd', 'Disturbance': '#17becf', 'State': '#1f77b4'}
            node_color.append(palette.get(ntype, '#1f77b4'))

        node_trace = go.Scatter(x=node_x, y=node_y, mode='markers+text', text=[t.split(' (')[0] for t in node_text], textposition='top center', marker=dict(size=18, color=node_color))

        fig = go.Figure(data=[edge_trace, node_trace])
        fig.update_layout(title='Interactive KG' + (f" (focus={focus_node})" if focus_node else ''), showlegend=False)
        return fig
    except Exception as e:
        logger.error(f"create_interactive_kg_plot_global error: {e}")
        return None


def create_chat_message(message: str, is_user: bool = False, has_plot: bool = False) -> Dict[str, str]:
    """Create a formatted chat message in the new Gradio messages format"""
    role = "user" if is_user else "assistant"
    
    if has_plot and not is_user:
        plot_indicator = "\n\n📊 Visualization generated below"
        message += plot_indicator

    return {"role": role, "content": message}

def greenhouse_chat_interface(message: str, history: List[Dict[str, str]]):
    """Enhanced chat interface with integrated visualization support"""

    if not message.strip():
        # Clear the plot if the message is empty and return history
        return history, None, ""

    # Add user message to history
    user_msg = create_chat_message(message, is_user=True)
    history.append(user_msg)

    try:
        # Get response from your QA system
        response_data = qa_system.answer_query(message)

        chat_message = ""
        plot_fig = None

        if isinstance(response_data, tuple) and len(response_data) == 2:
            chat_message = response_data[0]
            plot_fig = response_data[1]
        elif isinstance(response_data, str):
            chat_message = response_data
            plot_fig = plot_session.get_current_plot()
        else:
            chat_message = "I apologize, but I encountered an issue processing your request. Please try again."
            plot_fig = plot_session.get_current_plot()

        # Add bot message to history
        has_plot = plot_fig is not None
        bot_msg = create_chat_message(chat_message, is_user=False, has_plot=has_plot)
        history.append(bot_msg)

        # Return updated history, plot, and clear input
        return history, plot_fig, ""

    except Exception as e:
        error_msg = f"I encountered an error: {str(e)}. Please try rephrasing your question."
        bot_msg_html = create_chat_message(error_msg, is_user=False)
        history.append([user_msg_html, bot_msg_html])
        return history, None, ""

# Custom CSS for better styling and theme support
custom_css = """
/* --- Modern Theme --- */

/* Light Theme (Default) */
:root {
    --bg-primary: #F7F9FC;
    --bg-secondary: #FFFFFF;
    --bg-tertiary: #E9EDF5;
    --text-primary: #1F2937;
    --text-secondary: #6B7280;
    --border-color: #D1D5DB;
    --accent-color: #3B82F6;
    --success-color: #10B981;
    --info-color: #3B82F6;
    --shadow: rgba(0, 0, 0, 0.05);
    --font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
}

/* Dark Theme */
.dark {
    --bg-primary: #111827 !important;
    --bg-secondary: #1F2937 !important;
    --bg-tertiary: #374151 !important;
    --text-primary: #F9FAFB !important;
    --text-secondary: #9CA3AF !important;
    --border-color: #4B5563 !important;
    --accent-color: #60A5FA !important;
    --success-color: #34D399 !important;
    --info-color: #60A5FA !important;
    --shadow: rgba(0, 0, 0, 0.2) !important;
}

body, .gradio-container {
    font-family: var(--font-family) !important;
    background-color: var(--bg-primary) !important;
    color: var(--text-primary) !important;
}

/* Header */
.header-container {
    background: linear-gradient(135deg, var(--accent-color), var(--success-color));
    padding: 2rem;
    border-radius: 1rem;
    margin-bottom: 1.5rem;
    box-shadow: 0 10px 15px -3px var(--shadow), 0 4px 6px -2px var(--shadow);
}
.header-title {
    color: white;
    text-align: center;
    font-size: 2.5rem;
    font-weight: 700;
}
.header-subtitle {
    color: rgba(255, 255, 255, 0.9);
    text-align: center;
    font-size: 1.1rem;
    margin-top: 0.5rem;
}

/* Chat Interface */
.chat-container {
    background: var(--bg-secondary);
    border-radius: 1rem;
    padding: 1rem;
    box-shadow: 0 4px 6px -1px var(--shadow);
    border: 1px solid var(--border-color);
}

/* Message Bubbles */
.user-message, .bot-message {
    padding: 1rem;
    margin: 0.5rem 0;
    border-radius: 1rem;
    box-shadow: 0 1px 3px 0 var(--shadow);
    max-width: 80%;
}
.user-message {
    background: var(--accent-color);
    color: white;
    border-bottom-right-radius: 0.25rem;
    margin-left: auto;
}
.bot-message {
    background: var(--bg-tertiary);
    color: var(--text-primary);
    border-bottom-left-radius: 0.25rem;
    margin-right: auto;
}

/* Input Area */
.input-container {
    background: var(--bg-secondary);
    border-radius: 1.5rem;
    padding: 0.5rem 1rem;
    box-shadow: 0 4px 6px -1px var(--shadow);
    border: 1px solid var(--border-color);
    transition: border-color 0.2s ease, box-shadow 0.2s ease;
}
.input-container:focus-within {
    border-color: var(--accent-color);
    box-shadow: 0 0 0 3px var(--accent-color-translucent, rgba(59, 130, 246, 0.2));
}

/* Buttons */
.quick-btn, .gradio-button {
    background: var(--bg-tertiary);
    border: 1px solid var(--border-color);
    border-radius: 0.75rem;
    padding: 0.5rem 1rem;
    font-size: 0.9rem;
    font-weight: 500;
    color: var(--text-primary);
    cursor: pointer;
    transition: all 0.2s ease;
}
.quick-btn:hover, .gradio-button:hover {
    background: var(--accent-color);
    color: white;
    transform: translateY(-2px);
    box-shadow: 0 4px 6px -1px var(--shadow);
}
.gradio-button.primary {
    background: var(--accent-color);
    color: white;
    border-color: var(--accent-color);
}

/* Send Button Centering */
.send-button {
    align-self: center;
}

/* Visualization Panel */
.viz-container {
    background: var(--bg-secondary);
    border-radius: 1rem;
    padding: 1rem;
    margin-top: 1rem;
    box-shadow: 0 4px 6px -1px var(--shadow);
    border: 1px solid var(--border-color);
}

/* Accordion for controls */
.gradio-accordion {
    background: var(--bg-secondary);
    border-radius: 1rem;
    border: 1px solid var(--border-color);
}

/* Status Indicator */
.status-indicator {
    display: inline-block;
    width: 10px;
    height: 10px;
    border-radius: 50%;
    margin-right: 0.5rem;
    background: var(--success-color);
    animation: pulse 2s infinite;
}
@keyframes pulse {
    0% { box-shadow: 0 0 0 0 rgba(16, 185, 129, 0.7); }
    70% { box-shadow: 0 0 0 10px rgba(16, 185, 129, 0); }
    100% { box-shadow: 0 0 0 0 rgba(16, 185, 129, 0); }
}
"""

def create_chat_message(message: str, is_user: bool = False, has_plot: bool = False) -> Dict[str, str]:
    """Create a chat message dictionary for the Gradio chatbot interface"""
    return {
        "role": "user" if is_user else "assistant",
        "content": message,
        "metadata": {
            "has_plot": has_plot,
            "timestamp": datetime.now().isoformat()
        }
    }

def greenhouse_chat_interface(message: str, history: List[Dict[str, str]]):
    """Main chat interface function that processes user messages and returns responses with plots"""
    try:
        print(f"📨 Processing message: '{message}'")

        # Skip empty messages
        if not message or message.strip() == "":
            return history, None, ""

        # Add user message to history
        user_msg = create_chat_message(message, is_user=True, has_plot=False)
        history.append(user_msg)

        # Process the query using the QA system
        print("🤖 Calling qa_system.answer_query()...")
        response = qa_system.answer_query(message)

        # Handle the response (it can be a tuple with plot or just text)
        if isinstance(response, tuple) and len(response) == 2:
            text_response, plot_fig = response
            has_plot = plot_fig is not None
        else:
            text_response = response if isinstance(response, str) else str(response)
            plot_fig = None
            has_plot = False

        print(f"📝 Response received: {len(text_response)} chars, plot: {has_plot}")

        # Create bot message
        bot_msg = create_chat_message(text_response, is_user=False, has_plot=has_plot)
        history.append(bot_msg)

        # Store plot in session if we have one
        if plot_fig is not None:
            plot_session.set_plot(plot_fig, {'type': 'data_visualization', 'query': message})
            print("💾 Plot stored in session")

        print("✅ Message processed successfully")
        return history, plot_fig, ""

    except Exception as e:
        print(f"❌ Error in chat interface: {e}")
        import traceback
        traceback.print_exc()

        # Add error message to history
        error_msg = create_chat_message(
            "I apologize, but I encountered an error while processing your message. Please try again.",
            is_user=False,
            has_plot=False
        )
        history.append(error_msg)

        return history, None, ""

def handle_quick_action(action: str, history: List[Dict[str, str]]):
    """Handle quick action button clicks"""
    return greenhouse_chat_interface(action, history)

def handle_kg_visualization(history: List[Dict[str, str]]):
    """Handle knowledge graph visualization button click - shows focused view based on conversation context"""
    try:
        print("🔍 KG Button clicked - Starting focused visualization...")

        # Try to find relevant nodes from the conversation history
        focus_nodes = []
        focus_node = None
        depth = 2

        # Look through recent conversation history to find relevant nodes
        for message in reversed(history):
            if message.get('role') == 'user':
                user_query = message.get('content', '')
                try:
                    # Parse the user query to extract relevant nodes
                    resolved_info = qa_system.parse_query_nlp(user_query)
                    target_nodes = resolved_info.get('target_nodes', [])
                    primary_node = resolved_info.get('primary_node')

                    if target_nodes:
                        focus_nodes = target_nodes[:3]  # Limit to top 3 nodes
                    if primary_node:
                        focus_node = primary_node

                    if focus_nodes or focus_node:
                        print(f"📍 Found relevant nodes from conversation: focus_node={focus_node}, focus_nodes={focus_nodes}")
                        break
                except Exception as e:
                    print(f"⚠️ Failed to parse query for KG context: {e}")
                    continue

        # Generate the interactive KG plot - focused or complete
        print("📊 Creating interactive KG plot...")
        if focus_node or focus_nodes:
            # Show focused view based on conversation context
            if focus_node:
                kg_plot = create_interactive_kg_plot_global(qa_system, focus_node=focus_node, depth=depth)
                context_desc = f"focused on '{focus_node}'"
            elif focus_nodes:
                # Use the first focus node as primary
                kg_plot = create_interactive_kg_plot_global(qa_system, focus_node=focus_nodes[0], depth=depth)
                context_desc = f"focused on '{focus_nodes[0]}'"
            else:
                kg_plot = create_interactive_kg_plot_global(qa_system)
                context_desc = "complete view"
        else:
            # Fallback to complete view if no context found
            kg_plot = create_interactive_kg_plot_global(qa_system)
            context_desc = "complete view"

        if kg_plot is None:
            print("❌ KG plot creation returned None")
            # Fallback message if plot creation fails
            bot_msg = create_chat_message(
                "I apologize, but I couldn't generate the knowledge graph visualization at this time.",
                is_user=False,
                has_plot=False
            )
            history.append(bot_msg)
            return history, None, ""

        print("✅ KG plot created successfully")
        print(f"   Plot type: {type(kg_plot)}")
        print(f"   Context: {context_desc}")

        # Store the plot in the session
        plot_session.set_plot(kg_plot, {
            'type': 'knowledge_graph',
            'interactive': True,
            'focus_node': focus_node,
            'focus_nodes': focus_nodes,
            'context': context_desc
        })
        print("💾 Plot stored in session")

        # Create response message
        if focus_node or focus_nodes:
            bot_msg = create_chat_message(
                f"Here's the Knowledge Graph focused on the topic from your recent question! This shows the relationships and connections related to your query. Click on any node to explore further connections.",
                is_user=False,
                has_plot=True
            )
        else:
            bot_msg = create_chat_message(
                "Here's the interactive Knowledge Graph for your greenhouse system! Click on any node to see only its related connections. The graph shows all the variables, controls, and relationships in your greenhouse climate control system.",
                is_user=False,
                has_plot=True
            )

        # Add to history
        history.append(bot_msg)
        print("📝 Message added to history")

        print("🎉 Returning from KG handler")
        return history, kg_plot, ""

    except Exception as e:
        print(f"❌ Error in KG visualization: {e}")
        import traceback
        traceback.print_exc()

        bot_msg = create_chat_message(
            "I encountered an error while generating the knowledge graph visualization. Please try again.",
            is_user=False,
            has_plot=False
        )
        history.append(bot_msg)
        return history, None, ""


def handle_kg_full_visualization(history: List[Dict[str, str]]):
    """Handle knowledge graph full visualization button click - shows complete KG"""
    try:
        print("🔍 Full KG Button clicked - Starting complete visualization...")

        # Generate the complete interactive KG plot
        print("📊 Creating complete interactive KG plot...")
        kg_plot = create_interactive_kg_plot_global(qa_system)

        if kg_plot is None:
            print("❌ KG plot creation returned None")
            # Fallback message if plot creation fails
            bot_msg = create_chat_message(
                "I apologize, but I couldn't generate the knowledge graph visualization at this time.",
                is_user=False,
                has_plot=False
            )
            history.append(bot_msg)
            return history, None, ""

        print("✅ Complete KG plot created successfully")
        print(f"   Plot type: {type(kg_plot)}")

        # Store the plot in the session
        plot_session.set_plot(kg_plot, {'type': 'knowledge_graph_full', 'interactive': True})
        print("💾 Plot stored in session")

        # Create response message
        bot_msg = create_chat_message(
            "Here's the complete interactive Knowledge Graph for your greenhouse system! Click on any node to see only its related connections. The graph shows all the variables, controls, and relationships in your greenhouse climate control system.",
            is_user=False,
            has_plot=True
        )

        # Add to history
        history.append(bot_msg)
        print("📝 Message added to history")

        print("🎉 Returning from full KG handler")
        return history, kg_plot, ""

    except Exception as e:
        print(f"❌ Error in full KG visualization: {e}")
        import traceback
        traceback.print_exc()

        bot_msg = create_chat_message(
            "I encountered an error while generating the knowledge graph visualization. Please try again.",
            is_user=False,
            has_plot=False
        )
        history.append(bot_msg)
        return history, None, ""

def handle_kg_filter(selected_node: str, depth: int, history: List[Dict[str, str]]):
    """Handle knowledge graph filtering by selected node"""
    if not selected_node:
        bot_msg = create_chat_message(
            "Please select a node from the dropdown to filter the knowledge graph.",
            is_user=False,
            has_plot=False
        )
        history.append(bot_msg)
        return history, None, ""

    try:
        # Generate filtered KG plot
        kg_plot = create_interactive_kg_plot_global(qa_system, focus_node=selected_node, depth=depth)

        if kg_plot is None:
            bot_msg = create_chat_message(
                f"I couldn't find the node '{selected_node}' in the knowledge graph. Please try selecting a different node.",
                is_user=False,
                has_plot=False
            )
            history.append(bot_msg)
            return history, None, ""

        # Store the plot in the session
        plot_session.set_plot(kg_plot, {
            'type': 'knowledge_graph_filtered',
            'focus_node': selected_node,
            'depth': depth,
            'interactive': True
        })

        # Create response message
        bot_msg = create_chat_message(
            f"Here's the knowledge graph filtered to show connections related to '{selected_node}' (depth: {depth}). This shows all variables and relationships connected to your selected node within {depth} connection{'s' if depth > 1 else ''}.",
            is_user=False,
            has_plot=True
        )

        # Add to history
        history.append(bot_msg)

        return history, kg_plot, ""

    except Exception as e:
        logger.error(f"Error filtering KG visualization: {e}")
        bot_msg = create_chat_message(
            "I encountered an error while filtering the knowledge graph. Please try again.",
            is_user=False,
            has_plot=False
        )
        history.append(bot_msg)
        return history, None, ""

def clear_plot_session():
    """Clear the current plot session"""
    plot_session.clear_plot()
    return None

def clear_entire_session():
    """Clear both chat and plot session"""
    plot_session.clear_session()
    qa_system.clear_dialogue_state()  # Also clear the dialogue state
    return [], None

def greenhouse_chat_interface(message: str, history: List[Dict[str, str]]):
    """Main chat interface function that processes user messages and returns responses with plots"""
    try:
        print(f"📨 Processing message: '{message}'")

        # Skip empty messages
        if not message or message.strip() == "":
            return history, None, ""

        # Add user message to history
        user_msg = create_chat_message(message, is_user=True, has_plot=False)
        history.append(user_msg)

        # Process the query using the QA system
        print("🤖 Calling qa_system.answer_query()...")
        response = qa_system.answer_query(message)

        # Handle the response (it can be a tuple with plot or just text)
        if isinstance(response, tuple) and len(response) == 2:
            text_response, plot_fig = response
            has_plot = plot_fig is not None
        else:
            text_response = response if isinstance(response, str) else str(response)
            if text_response is None:
                text_response = "Unable to generate response."
            plot_fig = None
            has_plot = False

        print(f"📝 Response received: {len(text_response)} chars, plot: {has_plot}")

        # Create bot message
        bot_msg = create_chat_message(text_response, is_user=False, has_plot=has_plot)
        history.append(bot_msg)

        # Store plot in session if we have one
        if plot_fig is not None:
            plot_session.set_plot(plot_fig, {'type': 'data_visualization', 'query': message})
            print("💾 Plot stored in session")

        print("✅ Message processed successfully")
        return history, plot_fig, ""

    except Exception as e:
        print(f"❌ Error in chat interface: {e}")
        import traceback
        traceback.print_exc()

        # Add error message to history
        error_msg = create_chat_message(
            "I apologize, but I encountered an error while processing your message. Please try again.",
            is_user=False,
            has_plot=False
        )
        history.append(error_msg)

        return history, None, ""

# Create the main interface
def create_greenhouse_interface():
    with gr.Blocks(
            css=custom_css,
            title="🌿 Greenhouse AI Assistant",
            theme=gr.themes.Base(),  # Use base theme and let CSS handle theming
            head='''
            <script>
                function toggleTheme() {
                    document.body.classList.toggle('dark');
                    const themeToggle = document.getElementById('theme-toggle');
                    if (document.body.classList.contains('dark')) {
                        themeToggle.innerHTML = '☀️';
                    } else {
                        themeToggle.innerHTML = '🌙';
                    }
                }
            </script>
            ''',
            #favicon_path="./assets/favicon.gif"  # Optional: add your own favicon
    ) as demo:

        # Header
        with gr.Row():
            gr.HTML("""
                <div class="header-container">
                    <div class="header-left">
                        <h1 class="header-title">🌿 Greenhouse AI Assistant 🌿</h1>
                        <p class="header-subtitle">
                            <span class="status-indicator status-online"></span>
                            Your intelligent greenhouse monitoring companion
                        </p>
                    </div>
                    <div class="header-right">
                        <button id="theme-toggle" class="theme-toggle-btn" onclick="toggleTheme()" title="Toggle theme">
                            🌙
                        </button>
                    </div>
                </div>
            """)

        # Main chat interface
        with gr.Row():
            with gr.Column(scale=4):
                # Chat history with custom styling
                chatbot = gr.Chatbot(
                    label="💬 Conversation",
                    height=400,
                    elem_classes=["chat-container"],
                    avatar_images=None,  # Optional: specify custom avatar images
                    type='messages'
                )

                # Input area
                with gr.Row():
                    msg = gr.Textbox(
                        placeholder="Ask me about your greenhouse... (e.g., 'Show me today's temperature')",
                        label="",
                        lines=1,
                        scale=4,
                        elem_classes=["input-container"]
                    )
                    send_btn = gr.Button("🌿 Send", scale=1, variant="primary", elem_classes=["send-button"])

                # Quick action buttons
                gr.HTML('<div style="margin: 1rem 0; font-weight: bold; color: var(--text-secondary);">Quick Actions:</div>')

                with gr.Row():
                    temp_btn = gr.Button("🌡️ Current Temperature", size="sm")
                    humid_btn = gr.Button("💧 Humidity Now", size="sm")
                    co2_btn = gr.Button("🌱 CO2 Levels", size="sm")

                with gr.Row():
                    viz_temp_btn = gr.Button("📊 Temperature Chart", size="sm")
                    viz_humid_btn = gr.Button("📈 Humidity Trends", size="sm")
                    pattern_btn = gr.Button("🔍 Daily Patterns", size="sm")

        # Visualization panel (integrated)
        with gr.Row():
            with gr.Column():
                plot_display = gr.Plot(
                    label="📊 Data Visualization",
                    elem_classes=["viz-container"]
                )

        # Clear button
        with gr.Row():
            clear_btn = gr.Button("🗑️ Clear Conversation", variant="secondary", size="sm")

        # Help section
        with gr.Accordion("❓ How to Use", open=False):
            gr.Markdown("""
            ### 🎯 What I can help you with:
            
            **📊 Visualizations:**
            - "Show me the temperature chart for at Aug 24,2011?"
            - "Visualize CO2 levels on Aug 24,2011?"
            - "Plot humidity trends on Aug 25,2011?"
            - "Show me the knowledge graph" (or use the 🕸️ button)
            
            **📈 Data Queries:**
            - "What's the current temperature at Aug 24,2011?"
            - "What was the humidity at 10:00 AM?"
            - "Show me Aug 25,2011's patterns"
            
            **🔍 Analysis:**
            - "Why was ventilation turned on?"
            - "Is temperature related to humidity?"
            - "Explain the temperature drop at 6 PM"
            
            
            **�💡 Pro Tips:**
            - Use natural language - I understand context!
            - Ask follow-up questions for detailed analysis
            - Try the quick action buttons for common queries
            - Use the Knowledge Graph controls to explore relationships interactively
            """)

        # Event handlers
        def send_message(message, history):
            return greenhouse_chat_interface(message, history)

        # Message sending
        msg.submit(
            fn=send_message,
            inputs=[msg, chatbot],
            outputs=[chatbot, plot_display, msg],
            queue=False
        )

        send_btn.click(
            fn=send_message,
            inputs=[msg, chatbot],
            outputs=[chatbot, plot_display, msg],
            queue=False
        )

        # Quick action handlers
        temp_btn.click(
            fn=lambda h: handle_quick_action("What is the temperature at 10:00 am on Aug 24,2011?", h),
            inputs=[chatbot],
            outputs=[chatbot, plot_display, msg]
        )

        humid_btn.click(
            fn=lambda h: handle_quick_action("What is the humidity at 17:45 on Aug 24,2011??", h),
            inputs=[chatbot],
            outputs=[chatbot, plot_display, msg]
        )

        co2_btn.click(
            fn=lambda h: handle_quick_action("What are the CO2 levels at 07:30 on Aug 24,2011??", h),
            inputs=[chatbot],
            outputs=[chatbot, plot_display, msg]
        )

        viz_temp_btn.click(
            fn=lambda h: handle_quick_action("visualize the plot for temperature", h),
            inputs=[chatbot],
            outputs=[chatbot, plot_display, msg]
        )

        viz_humid_btn.click(
            fn=lambda h: handle_quick_action("show me the humidity chart", h),
            inputs=[chatbot],
            outputs=[chatbot, plot_display, msg]
        )

        pattern_btn.click(
            fn=lambda h: handle_quick_action("Was August 24th a good growing day? The temperature hit 24°C - is that too hot for optimal photosynthesis?", h),
            inputs=[chatbot],
            outputs=[chatbot, plot_display, msg]
        )


        clear_btn.click(
            fn=lambda h: (h, clear_plot_session()),
            inputs=[chatbot],
            outputs=[chatbot, plot_display],
            queue=False
        )

    return demo

def setup_ngrok():
    """Set up ngrok with your auth token"""
    # Set your ngrok auth token
    ngrok.set_auth_token("2xwxFDqqAkgpP5BIlpp1EMBN6yH_6WQyzPHLnAbMy8pL2Ejhj")

    # Kill any existing ngrok processes
    ngrok.kill()

    # Create a tunnel to port 7860
    try:
        tunnel = ngrok.connect("7860", "http")
        public_url = tunnel.public_url
        print(f"🌐 Public URL: {public_url}")
        print(f"🔒 Login with username: acsd, password: acsd")
        return public_url
    except Exception as e:
        print(f"Error creating ngrok tunnel: {e}")
        return None

def cleanup_ngrok():
    """Clean up ngrok on exit"""
    try:
        ngrok.kill()
        print("🧹 Cleaned up ngrok tunnels")
    except:
        pass

if __name__ == "__main__":
    print("🌿 Starting Greenhouse AI Assistant with ngrok...")

    # Set up cleanup on exit
    atexit.register(cleanup_ngrok)

    # Create the interface
    demo = create_greenhouse_interface()

    # Set up ngrok tunnel
    public_url = setup_ngrok()

    if public_url:
        print(f"\n{'='*60}")
        print(f"🎉 SUCCESS! Your app is now publicly accessible!")
        print(f"🌐 Public URL: {public_url}")
        print(f"🔐 Username: acsd")
        print(f"🔐 Password: acsd")
        print(f"{'='*60}\n")
    else:
        print("⚠️ ngrok tunnel failed, running locally only")

    try:
        # Launch Gradio (locally, ngrok handles public access)
        demo.launch(
            auth=("acsd", "acsd"),
            server_name="127.0.0.1",  # Local only since ngrok handles public access
            server_port=int(os.getenv('GRADIO_SERVER_PORT', 7860)),
            share=False,  # Don't use Gradio's share since we're using ngrok
            quiet=False,
            prevent_thread_lock=False,
            show_error=True
        )
    except KeyboardInterrupt:
        print("Shutting down...")
        cleanup_ngrok()
    except Exception as e:
        print(f"Error launching Gradio: {e}")
        cleanup_ngrok()
