import pandas as pd
import numpy as np
import casadi as ca
import glob
import matplotlib.pyplot as plt
import os
import warnings
import time
from datetime import datetime, time, date, timedelta
import dateparser
from dateparser.search import search_dates
import re
import matplotlib.dates as mdates


def robust_date_parser(query: str, results_df: pd.DataFrame):
    """(FINAL HYBRID PARSER) Finds the most complete date and time in a query."""
    date_pattern = r'((?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]*\s+\d{1,2}(?:st|nd|rd|th)?(?:,)?\s+\d{4})'
    time_pattern = r'(\d{1,2}:\d{2})'
    date_match = re.search(date_pattern, query, re.IGNORECASE)
    time_match = re.search(time_pattern, query)
    found_date_str = date_match.group(1) if date_match else None
    found_time_str = time_match.group(1) if time_match else None
    if not found_date_str:
        search_results = search_dates(query, settings={'PREFER_DATES_FROM': 'past'})
        return search_results[0][1] if search_results else None
    full_datetime_str = f"{found_date_str} {found_time_str}" if found_time_str else found_date_str
    return dateparser.parse(full_datetime_str)

# Suppress FutureWarnings
warnings.filterwarnings("ignore", category=FutureWarning)

def _find_file(folder, patterns):
    files = []
    for pat in patterns:
        files.extend(glob.glob(os.path.join(folder, pat)))
    return files


def _find_column(df, keywords):
    for col in df.columns:
        low = col.lower()
        for kw in keywords:
            if kw.lower() in low:
                return col
    return None

def create_occupancy_schedule(index):
    occupancy_heat_gain_watts = 100 # W per person
    max_occupants = 50
    schedule = pd.Series(0.0, index=index, name='occupancy_gain_kw')
    # Assume occupancy from 8 AM to 6 PM on weekdays
    for ts in index:
        if 8 <= ts.hour < 18 and ts.weekday() < 5:
            schedule.loc[ts] = (max_occupants * occupancy_heat_gain_watts) / 1000.0
    return schedule

# --- Part 1: Load Your Pre-Processed Data ---
def load_and_prepare_data(folder_path):
    # Task 1: Process Weather Data
    # Prefer exact file name from prompt, fall back to patterns
    candidates = []
    exact = os.path.join(folder_path, 'Climate_HourlyWeather.csv')
    if os.path.isfile(exact):
        candidates = [exact]
    else:
        candidates = _find_file(folder_path, ["*Climate*Hourly*Weather*.csv", "*Climate*HourlyWeather*.csv", "*Climate_HourlyWeather*.csv", "*HourlyWeather*.csv"]) or _find_file(folder_path, ["*Climate*.csv", "*Hourly*.csv"])

    if not candidates:
        raise FileNotFoundError(f"Keine Wetterdatei im Ordner gefunden: {folder_path}\nErwartet: Climate_HourlyWeather.csv oder ähnliches.")
    weather_file = candidates[0]

    weather_raw = pd.read_csv(weather_file)
    print(f"Gelesene Wetterdatei: {weather_file}, Zeilen: {len(weather_raw)}, Spalten: {list(weather_raw.columns)[:10]}{'...' if len(weather_raw.columns)>10 else ''}")

    # If file has expected columns, use them. Accept alternative 'Temp (C)'.
    cols = set(weather_raw.columns)
    if {'Date/Time', 'Temp (°C)', 'GHI (W/m²)'} <= cols or {'Date/Time', 'Temp (C)', 'GHI (W/m²)'} <= cols:
        date_col = 'Date/Time'
        temp_col = 'Temp (°C)' if 'Temp (°C)' in cols else 'Temp (C)'
        solar_col = 'GHI (W/m²)' if 'GHI (W/m²)' in cols else None
    else:
        # find columns heuristically
        date_col = _find_column(weather_raw, ["date", "time", "timestamp"]) or weather_raw.columns[0]
        temp_col = _find_column(weather_raw, ["temp", "temperature", "air temp"]) or _find_column(weather_raw, ["temp (c)", "temp (°c)"]) or None
        solar_col = _find_column(weather_raw, ["ghi", "solar", "radiation", "irradiance"]) or None

    if temp_col is None:
        raise ValueError(f"Konnten Temperaturspalte nicht finden in {os.path.basename(weather_file)}. Verfügbare Spalten: {list(weather_raw.columns)}")

    # Build weather DataFrame; if solar not found, create solar_rad filled with 0
    if solar_col is None:
        weather = weather_raw[[date_col, temp_col]].rename(columns={date_col: 'datetime', temp_col: 'T_outside'})
        weather['solar_rad'] = 0.0
        print(f"Warnung: Keine Solarspalte gefunden in {os.path.basename(weather_file)}. solar_rad wird mit 0 gefüllt.")
    else:
        weather = weather_raw[[date_col, temp_col, solar_col]].rename(columns={date_col: 'datetime', temp_col: 'T_outside', solar_col: 'solar_rad'})

    weather['datetime'] = pd.to_datetime(weather['datetime'], errors='coerce')
    if weather['datetime'].isna().any():
        raise ValueError(f"Fehler beim Parsen von Datumswerten in {os.path.basename(weather_file)}. Prüfen Sie das Format der '{date_col}' Spalte.")

    weather = weather.set_index('datetime').sort_index()

    # Resample/upsample to 15-minute frequency
    weather_df = weather.resample('15min').ffill()
    print(f"weather_df resampled auf 15min, range: {weather_df.index.min()} - {weather_df.index.max()}, shape: {weather_df.shape}")

    # Task 2: Create price signal (TOU)
    index = weather_df.index

    def tou_price(ts):
        h = ts.hour
        # Peak 14:00-19:00 (14 <= hour < 19)
        if 14 <= h < 19:
            return 0.28
        # Off-peak 22:00-07:00 (22 <= h or h < 7)
        if h >= 22 or h < 7:
            return 0.12
        # Mid-peak otherwise
        return 0.18

    price_series = pd.Series(index=index, data=[tou_price(t) for t in index], name='price')
    price_df = price_series.to_frame()

    print("\nProcessing Natural Gas data with enhanced diagnostics...")
    try:
        # 1. Load data
        billing_file = os.path.join(folder_path, 'NaturalGas_Billing.csv')
        gas_billing_df = pd.read_csv(billing_file)
        
        # 2. Clean and prepare billing data
        gas_billing_df.dropna(subset=['Commodity charges', 'Billed GJ'], inplace=True)
        gas_billing_df = gas_billing_df[gas_billing_df['Billed GJ'] > 0] # Avoid division by zero

        if gas_billing_df.empty:
            raise ValueError("NaturalGas_Billing.csv is empty or contains no valid entries after cleaning.")
            
        # 3. Calculate average rate robustly
        avg_rate_per_gj = (gas_billing_df['Commodity charges'] / gas_billing_df['Billed GJ']).mean()

        print(f"  Cleaned Gas Billing Rows: {len(gas_billing_df)}")
        print(f"  Average Natural Gas Rate: ${avg_rate_per_gj:.3f}/GJ")

        if pd.isna(avg_rate_per_gj):
            raise ValueError("Calculated average for gas rate is NaN.")
        
        # --- THE FIX: Correct conversion from $/GJ to $/kWh ---
        # Conversion factors: 1 GJ = 947,817 BTU; 1 kWh = 3412 BTU
        price_per_btu = avg_rate_per_gj / 947817.0
        price_gas_per_kwh_th = price_per_btu * 3412.0
        
        print(f"  --> Raw Calculated Gas Price: ${price_gas_per_kwh_th:.4f}/kWh_thermal")

        # Set a stable, scaled price for the solver
        avg_elec_price = price_df['price'].mean()
        stable_gas_price = avg_elec_price / 3.0
        
        print(f"  --> Using Stable Gas Price for Solver: ${stable_gas_price:.4f}/kWh_thermal")
        price_df['price_gas'] = stable_gas_price

    except Exception as e:
        # Fallback remains the same
        print(f"WARNING: Could not process Natural Gas files. Reason: {e}")
        fallback_price = 0.03
        print(f"--> Using a fallback gas price of ${fallback_price:.4f}/kWh_thermal")
        price_df['price_gas'] = fallback_price

    # Task 3: Aggregate HVAC Power Data
    all_csv_files = [os.path.basename(f) for f in glob.glob(os.path.join(folder_path, "*.csv"))]
    # Aggregate all Electricity_*.csv files
    hvac_files = [f for f in all_csv_files if f.startswith('Electricity_')]

    hvac_series_list = []
    for fname in hvac_files:
        fpath = os.path.join(folder_path, fname)
        try:
            df = pd.read_csv(fpath)
        except Exception:
            continue
        # find timestamp column
        ts_col = _find_column(df, ['unix', 'unix_ts', 'ts', 'time', 'timestamp', 'date'])
        if ts_col is None:
            continue
        # convert to datetime
        if np.issubdtype(df[ts_col].dtype, np.number):
            try:
                df['datetime'] = pd.to_datetime(df[ts_col], unit='s')
            except Exception:
                df['datetime'] = pd.to_datetime(df[ts_col], errors='coerce')
        else:
            df['datetime'] = pd.to_datetime(df[ts_col], errors='coerce')

        # choose active power column: look for P, Pt, power
        power_col = _find_column(df, ['p', 'pt', 'power', 'active'])
        if power_col is None:
            # fallback: choose first numeric column that is not the timestamp
            numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
            numeric_cols = [c for c in numeric_cols if c != ts_col]
            if not numeric_cols:
                continue
            power_col = numeric_cols[0]

        temp = df[['datetime', power_col]].copy().set_index('datetime').sort_index()
        # resample to 15min by mean
        temp_15 = temp.resample('15min').mean()
        col_name = os.path.splitext(fname)[0]
        temp_15 = temp_15.rename(columns={power_col: col_name})
        hvac_series_list.append(temp_15)

    if hvac_series_list:
        hvac_concat = pd.concat(hvac_series_list, axis=1)
        hvac_concat = hvac_concat.resample('15min').mean()
        # sum across columns to get total HVAC power
        hvac_concat['P_hvac_actual'] = hvac_concat.sum(axis=1)
        max_val = hvac_concat['P_hvac_actual'].max()
        print(f"Ermittelte MAX HVAC raw: {max_val}")
        if pd.isna(max_val):
            MAX_HVAC_POWER_KW = np.nan
        else:
            # guess units: if very large (>1000) assume W and convert to kW
            if max_val > 1000:
                MAX_HVAC_POWER_KW = float(max_val) / 1000.0
            else:
                MAX_HVAC_POWER_KW = float(max_val)
        total_hvac_power_df = hvac_concat[['P_hvac_actual']]
    else:
        total_hvac_power_df = pd.DataFrame(index=weather_df.index)
        total_hvac_power_df['P_hvac_actual'] = 0.0
        MAX_HVAC_POWER_KW = 0.0

    # Task 4: Final Assembly
    nmpc_input_df = weather_df.join(price_df, how='left')
    # ensure price present; if price_df had different spanning index, reindex
    nmpc_input_df['price'] = nmpc_input_df['price'].fillna(method='ffill').fillna(method='bfill')

    # align hvac index and optionally include P_hvac_actual for parameter checks (not required by NMPC disturbances)
    total_hvac_power_df = total_hvac_power_df.reindex(nmpc_input_df.index).fillna(method='ffill').fillna(0.0)

    # final cleaning (fill gaps)
    nmpc_input_df = nmpc_input_df.fillna(method='ffill').fillna(method='bfill')

    print("Creating occupancy schedule...")
    occupancy_schedule = create_occupancy_schedule(weather_df.index)
    nmpc_input_df = nmpc_input_df.join(occupancy_schedule)

    # Filter by Date Range: Slice to one full month (e.g., April 2012)
    start_date = '2012-04-01'
    end_date = '2012-04-30'
    nmpc_input_df = nmpc_input_df.loc[start_date:end_date]

    # ensure DatetimeIndex frequency
    try:
        nmpc_input_df = nmpc_input_df.asfreq('15min')
    except Exception:
        # if not possible, create a new index range
        start = nmpc_input_df.index.min()
        end = nmpc_input_df.index.max()
        new_idx = pd.date_range(start=start, end=end, freq='15min')
        nmpc_input_df = nmpc_input_df.reindex(new_idx).fillna(method='ffill').fillna(method='bfill')
    # verification
    required_cols = ['T_outside', 'solar_rad', 'price']
    missing = [c for c in required_cols if c not in nmpc_input_df.columns]
    if missing:
        raise ValueError(f"Fehlende erforderliche Spalten im Ergebnis: {missing}")
    print(f"nmpc_input_df erstellt, shape: {nmpc_input_df.shape}, index start: {nmpc_input_df.index.min()}, end: {nmpc_input_df.index.max()}")

    return nmpc_input_df, total_hvac_power_df, MAX_HVAC_POWER_KW


def setup_nmpc_solver(dt_seconds, N, max_elec_power_kw, max_gas_power_kw, T_min, T_max, T_target):
    """
    Sets up a 2nd-Order DUAL FUEL NMPC solver with SOFT constraints and Occupancy.
    """
    print("Setting up 2nd-Order DUAL FUEL NMPC solver...")
    opti = ca.Opti()

    # --- System Model Parameters (2nd Order) ---
    C_air, C_wall = 2.0, 40.0       # Thermal Capacitance (Air, Wall)
    R_air_wall, R_wall_out = 0.5, 8.0 # Thermal Resistance (Air-Wall, Wall-Outside)
    eta_gas = 0.90
    
    # --- State and Control Variables ---
    T_air = opti.variable(1, N + 1)
    T_wall = opti.variable(1, N + 1)
    P_elec = opti.variable(1, N)
    P_gas = opti.variable(1, N)
    slack_T_lower = opti.variable(1, N + 1)
    slack_T_upper = opti.variable(1, N + 1)

    # --- Parameters ---
    T_air_current_param = opti.parameter(1, 1)
    T_wall_current_param = opti.parameter(1, 1)
    T_outside_forecast_param = opti.parameter(1, N)
    solar_rad_forecast_param = opti.parameter(1, N)
    price_elec_forecast_param = opti.parameter(1, N)
    price_gas_forecast_param = opti.parameter(1, N)
    occupancy_gain_param = opti.parameter(1, N) # New disturbance parameter
    
    # --- Cost Function ---
    cost = 0
    for k in range(N):
        smooth_abs_elec = ca.sqrt(P_elec[k]**2 + 1e-6)
        cost += price_elec_forecast_param[k] * smooth_abs_elec * (dt_seconds / 3600.0) 
        cost += price_gas_forecast_param[k] * P_gas[k] * (dt_seconds / 3600.0)
        cost += 1.0 * (T_air[k] - T_target)**2 # Penalize AIR temperature
    
    cost += 1000.0 * (ca.sumsqr(slack_T_lower) + ca.sumsqr(slack_T_upper))
    opti.minimize(cost)

    # --- System Dynamics (2nd Order) ---
    for k in range(N):
        total_heat_power = P_elec[k] + (eta_gas * P_gas[k])
        A_sol, eta_sol = 5.0, 0.6
        
        T_air_next = T_air[k] + (dt_seconds / 3600.0) / C_air * ((T_wall[k] - T_air[k]) / R_air_wall + total_heat_power + occupancy_gain_param[k] + (A_sol * eta_sol * solar_rad_forecast_param[k]) / 1000)
        T_wall_next = T_wall[k] + (dt_seconds / 3600.0) / C_wall * ((T_air[k] - T_wall[k]) / R_air_wall + (T_outside_forecast_param[k] - T_wall[k]) / R_wall_out)
        
        opti.subject_to(T_air[k+1] == T_air_next)
        opti.subject_to(T_wall[k+1] == T_wall_next)

    # --- Constraints ---
    opti.subject_to(T_air[0] == T_air_current_param)
    opti.subject_to(T_wall[0] == T_wall_current_param)
    
    lower_temp_constraint = (T_air >= T_min - slack_T_lower)
    opti.subject_to(lower_temp_constraint); opti.subject_to(slack_T_lower >= 0)
    upper_temp_constraint = (T_air <= T_max + slack_T_upper)
    opti.subject_to(upper_temp_constraint); opti.subject_to(slack_T_upper >= 0)
    
    opti.subject_to(P_elec >= -max_elec_power_kw); opti.subject_to(P_elec <= max_elec_power_kw)
    opti.subject_to(P_gas >= 0); opti.subject_to(P_gas <= max_gas_power_kw)

    # --- Configure Solver ---
    opts = {
        'ipopt.print_level': 0, 
        'print_time': 0, 
        'ipopt.tol': 1e-5, 
        'ipopt.acceptable_tol': 1e-3, 
        'ipopt.max_iter': 3000, 
        'ipopt.mu_strategy': 'adaptive', 
        'ipopt.linear_solver': 'mumps'
    }
    opti.solver('ipopt', opts)
    print("NMPC solver setup complete.")
    
    return { "opti": opti, "T_air": T_air, "T_wall": T_wall, "P_elec": P_elec, "P_gas": P_gas,
        "T_air_current_param": T_air_current_param, "T_wall_current_param": T_wall_current_param,
        "T_outside_forecast_param": T_outside_forecast_param, "solar_rad_forecast_param": solar_rad_forecast_param,
        "price_elec_forecast_param": price_elec_forecast_param, "price_gas_forecast_param": price_gas_forecast_param,
        "occupancy_gain_param": occupancy_gain_param,
        "constraints": {"T_lower_hard": lower_temp_constraint, "T_upper_hard": upper_temp_constraint} }

def run_simulation(nmpc_solver_obj, data, dt_seconds, N, T_air_initial, T_wall_initial):
    print("Starting 2nd-Order DUAL FUEL NMPC simulation...")
    opti = nmpc_solver_obj['opti']
    T_air, T_wall, P_elec, P_gas = nmpc_solver_obj['T_air'], nmpc_solver_obj['T_wall'], nmpc_solver_obj['P_elec'], nmpc_solver_obj['P_gas']
    T_air_current_param, T_wall_current_param = nmpc_solver_obj['T_air_current_param'], nmpc_solver_obj['T_wall_current_param']
    T_outside_forecast_param, solar_rad_forecast_param = nmpc_solver_obj['T_outside_forecast_param'], nmpc_solver_obj['solar_rad_forecast_param']
    price_elec_forecast_param, price_gas_forecast_param = nmpc_solver_obj['price_elec_forecast_param'], nmpc_solver_obj['price_gas_forecast_param']
    occupancy_gain_param = nmpc_solver_obj['occupancy_gain_param']
    constraints = nmpc_solver_obj['constraints']

    # History lists
    T_air_history, T_wall_history, P_elec_history, P_gas_history = [], [], [], []
    duals_T_lower_history, duals_T_upper_history = [], []
    # --- NEW: Store predictions for BOTH states ---
    T_air_prediction_history = []
    T_wall_prediction_history = []
    cost_history = []
    
    T_air_current, T_wall_current = T_air_initial, T_wall_initial
    num_steps = len(data) - N
    for i in range(num_steps):
        if i % 200 == 0: print(f"Simulating step {i}/{num_steps}...")

        forecast_slice = data.iloc[i : i + N]
        opti.set_value(T_air_current_param, T_air_current)
        opti.set_value(T_wall_current_param, T_wall_current)
        opti.set_value(T_outside_forecast_param, forecast_slice['T_outside'].values)
        opti.set_value(solar_rad_forecast_param, forecast_slice['solar_rad'].values)
        opti.set_value(price_elec_forecast_param, forecast_slice['price'].values)
        opti.set_value(price_gas_forecast_param, forecast_slice['price_gas'].values)
        opti.set_value(occupancy_gain_param, forecast_slice['occupancy_gain_kw'].values)

        sol = opti.solve()
        opti.set_initial(sol.value_variables())
        
        u_elec_optimal, u_gas_optimal = sol.value(P_elec[:, 0]), sol.value(P_gas[:, 0])
        
        # --- CORRECTED: Extract BOTH predicted state trajectories ---
        predicted_T_air_trajectory = sol.value(T_air)
        predicted_T_wall_trajectory = sol.value(T_wall)
        cost_history.append(sol.value(opti.f))
        all_duals_lower = sol.value(opti.dual(constraints['T_lower_hard']))
        all_duals_upper = sol.value(opti.dual(constraints['T_upper_hard']))
        
        # Store history
        T_air_history.append(T_air_current)
        T_wall_history.append(T_wall_current)
        P_elec_history.append(u_elec_optimal)
        P_gas_history.append(u_gas_optimal)
        duals_T_lower_history.append(all_duals_lower[1])
        duals_T_upper_history.append(all_duals_upper[1])
        # --- Store prediction for the next step for both states ---
        T_air_prediction_history.append(predicted_T_air_trajectory[1]) 
        T_wall_prediction_history.append(predicted_T_wall_trajectory[1])

        # Simulate "real" 2nd-order system one step forward
        disturbances = data.iloc[i]
        C_air, C_wall, R_air_wall, R_wall_out, eta_gas = 2.0, 40.0, 0.5, 8.0, 0.90
        total_heat_power = u_elec_optimal + (eta_gas * u_gas_optimal)
        A_sol, eta_sol = 5.0, 0.6
        T_air_old_for_wall_calc = T_air_current
        
        T_air_current = T_air_current + (dt_seconds / 3600.0) / C_air * ((T_wall_current - T_air_current) / R_air_wall + total_heat_power + disturbances['occupancy_gain_kw'] + (A_sol * eta_sol * disturbances['solar_rad']) / 1000)
        T_wall_current = T_wall_current + (dt_seconds / 3600.0) / C_wall * ((T_air_old_for_wall_calc - T_wall_current) / R_air_wall + (disturbances['T_outside'] - T_wall_current) / R_wall_out)

    print("Simulation finished.")
    sim_index = data.index[:num_steps]
    results_df = pd.DataFrame({
        'T_air_internal': T_air_history, 'T_wall_internal': T_wall_history,
        'T_air_predicted': T_air_prediction_history,
        'T_wall_predicted': T_wall_prediction_history, # Add wall prediction
        'cost_optimal': cost_history, 
        'P_elec_optimal': P_elec_history, 'P_gas_optimal': P_gas_history,
        'dual_T_lower': duals_T_lower_history, 'dual_T_upper': duals_T_upper_history,
    }, index=sim_index)
    return data.join(results_df, how='inner')

def run_counterfactual_analysis(nmpc_solver_obj, data_point, forecast_slice, dt_seconds, N):
    """
    Runs one-off optimization problems to answer "what if" questions.
    """
    counterfactuals = {}

    # Scenario 1: What if only electricity could be used for heating?
    try:
        cf_opti = nmpc_solver_obj['opti'].copy() # Create a copy to modify
        cf_p_gas = nmpc_solver_obj['P_gas']
        
        # Constrain gas power to zero
        cf_opti.subject_to(cf_p_gas == 0)

        # Set initial values and solve
        cf_opti.set_value(nmpc_solver_obj['T_air_current_param'], data_point['T_air_internal'])
        cf_opti.set_value(nmpc_solver_obj['T_wall_current_param'], data_point['T_wall_internal'])
        cf_opti.set_value(nmpc_solver_obj['T_outside_forecast_param'], forecast_slice['T_outside'].values)
        cf_opti.set_value(nmpc_solver_obj['solar_rad_forecast_param'], forecast_slice['solar_rad'].values)
        cf_opti.set_value(nmpc_solver_obj['price_elec_forecast_param'], forecast_slice['price'].values)
        cf_opti.set_value(nmpc_solver_obj['price_gas_forecast_param'], forecast_slice['price_gas'].values)
        cf_opti.set_value(nmpc_solver_obj['occupancy_gain_param'], forecast_slice['occupancy_gain_kw'].values)

        cf_sol = cf_opti.solve()
        cost_if_electric_only = cf_sol.value(cf_opti.f)
        cost_increase = cost_if_electric_only - data_point['cost_optimal']
        
        counterfactuals['electric_only_cost_increase'] = cost_increase
    except Exception as e:
        counterfactuals['electric_only_cost_increase'] = 'N/A (Solver Failed)'
        
    return counterfactuals

def generate_publication_ready_hca_explanation(timestamp_str, results_df, nmpc_solver_obj, dt_seconds, N, kg, pcmci_links, node_to_column, T_min, T_max, significance_threshold=0.01):
    """(Reviewer-Ready Version) Generates a quantitative, precise explanation."""
    try:
        current_timestamp = pd.to_datetime(timestamp_str)
        data_point = results_df.loc[results_df.index.asof(current_timestamp)]
        actual_timestamp_str = str(data_point.name)
        forecast_slice = results_df.loc[data_point.name : data_point.name + pd.Timedelta(minutes=N*15-1)]
    except KeyError:
        return f"Error: Timestamp {timestamp_str} not found in results."

    # --- 1. Observed Action and System State (Quantitative) ---
    p_elec, p_gas = data_point['P_elec_optimal'], data_point['P_gas_optimal']
    t_air, t_wall = data_point['T_air_internal'], data_point['T_wall_internal']
    action_desc, action_type = "No significant HVAC action.", "nominal"
    if p_elec < -0.1: action_desc, action_type = f"activated ELECTRIC COOLING at {-p_elec:.2f} kW", "cooling"
    elif p_gas > 0.1: action_desc, action_type = f"activated GAS HEATING at {p_gas:.2f} kW", "heating"
    elif p_elec > 0.1: action_desc, action_type = f"activated ELECTRIC HEATING at {p_elec:.2f} kW", "heating"
    
    # 2. Hierarchical Analysis with Lookback
    lookback_window = results_df.loc[current_timestamp - timedelta(minutes=15) : current_timestamp]
    peak_dual_lower = lookback_window['dual_T_lower'].max()
    peak_dual_upper = lookback_window['dual_T_upper'].abs().max()
    
    primary_reason, math_evidence, physical_context, historical_context, counterfactual = "", "", "", "", ""

    if peak_dual_lower > significance_threshold:
        # --- Defensive Heating Explanation ---
        primary_reason = "Defensive Action: To prevent the indoor air temperature from violating the lower comfort bound."
        math_evidence = f"The Lagrangian multiplier for the lower temperature constraint was high (peak λ_T_min = {peak_dual_lower:.2f}, threshold={significance_threshold}), indicating an imminent constraint violation was the dominant factor."
        physical_context = query_knowledge_graph(kg, 'lower_temp_bound_active', data_point, node_to_column)
        historical_context = query_pcmci_model(pcmci_links, 'heating', current_timestamp, results_df)
        counterfactual = f"Without this defensive heating, the NMPC's forecast predicted the air temperature would have violated the {T_min}°C limit."
    elif peak_dual_upper > significance_threshold:
        # --- Defensive Cooling Explanation ---
        primary_reason = "Defensive Action: To prevent the indoor air temperature from violating the upper comfort bound."
        math_evidence = f"The Lagrangian multiplier for the upper temperature constraint was high (peak λ_T_max = {peak_dual_upper:.2f}, threshold={significance_threshold}), indicating an imminent constraint violation was the dominant factor."
        physical_context = query_knowledge_graph(kg, 'upper_temp_bound_active', data_point, node_to_column)
        historical_context = query_pcmci_model(pcmci_links, 'cooling', current_timestamp, results_df)
        counterfactual = f"Without this defensive cooling, the NMPC's forecast predicted the air temperature would have violated the {T_max}°C limit."
    else: # Economic Trade-off Explanation
        primary_reason = "Economic Trade-off: The controller chose the most cost-effective action to keep the air temperature near the 22.0°C target."
        math_evidence = f"All temperature constraints were inactive (max λ_T_min in preceding window = {peak_dual_lower:.4f} < {significance_threshold}). The decision was driven entirely by cost optimization."

    print(f"\n--- HCA Explanation for {actual_timestamp_str} ---")
    print(f"Observed Action: Controller {action_desc}")
    print(f"System State: Air Temp={t_air:.1f}°C, Wall Temp={t_wall:.1f}°C (Comfort Band: {T_min}-{T_max}°C)")

    # --- 2. Hierarchical Analysis ---
    dual_T_lower, dual_T_upper = data_point['dual_T_lower'], abs(data_point['dual_T_upper'])
    
    # Initialize all explanation parts
    primary_reason, math_evidence, physical_context, historical_context, counterfactual = "", "", "", "", ""

    if dual_T_lower > significance_threshold:
        # --- Defensive Heating Explanation ---
        primary_reason = "Defensive Action: The controller's primary goal was to prevent the air temperature from violating the lower comfort bound."
        math_evidence = f"The Lagrangian multiplier for the lower temperature constraint was high (λ_T_min = {dual_T_lower:.2f}, threshold={significance_threshold}), indicating this constraint was the dominant factor in the decision."
        physical_context = query_knowledge_graph(kg, 'lower_temp_bound_active', data_point, node_to_column)
        historical_context = query_pcmci_model(pcmci_links, 'heating', current_timestamp, results_df)
        counterfactual = f"Without this action, the model predicted the temperature would violate the {T_min}°C limit."

    elif dual_T_upper > significance_threshold:
        # --- Defensive Cooling Explanation ---
        primary_reason = "Defensive Action: The controller's primary goal was to prevent the air temperature from violating the upper comfort bound."
        math_evidence = f"The Lagrangian multiplier for the upper temperature constraint was high (λ_T_max = {dual_T_upper:.2f}, threshold={significance_threshold}), indicating this constraint was the dominant factor."
        physical_context = query_knowledge_graph(kg, 'upper_temp_bound_active', data_point, node_to_column)
        historical_context = query_pcmci_model(pcmci_links, 'cooling', current_timestamp, results_df)
        counterfactual = f"Without this action, the model predicted the temperature would violate the {T_max}°C limit."
        
    else: # Economic Trade-off Explanation
        primary_reason = "Economic Trade-off: The controller chose the most cost-effective action to keep the air temperature near the 22.0°C target."
        math_evidence = f"All temperature constraints were inactive (λ_T_min = {dual_T_lower:.4f} < {significance_threshold}), proving the decision was driven by cost optimization, not immediate constraint risk."
        
        if action_type == "heating":
            physical_context = f"The controller chose gas heating because its price (${data_point['price_gas']:.3f}/kWh) was significantly cheaper than electricity (${data_point['price']:.3f}/kWh)."
            cf_results = run_counterfactual_analysis(nmpc_solver_obj, data_point, forecast_slice, dt_seconds, N)
            cost_increase = cf_results.get('electric_only_cost_increase', 'N/A')
            if isinstance(cost_increase, float):
                counterfactual = f"If gas had been unavailable, using the electric heat pump would have increased the projected horizon cost by ${cost_increase:.4f}."
            else:
                 counterfactual = "The electric-only counterfactual scenario was not solvable."
        else:
            physical_context = "The primary driver was to counteract minor thermal disturbances to stay close to the 22.0°C target."
            counterfactual = "Not applicable for a non-heating economic action."
            
        historical_context = query_pcmci_model(pcmci_links, action_type, current_timestamp, results_df)
        
    # --- Synthesize and Print ---
    explanation = (
        f"**Primary Reason:** {primary_reason}\n\n"
        f"**Mathematical Evidence:** {math_evidence}\n\n"
        f"**Physical Context:** {physical_context}\n\n"
        f"**Historical Context:** {historical_context}\n\n"
        f"**Counterfactual:** {counterfactual}"
    )
    return explanation

# --- NEW: Part 6a: Knowledge Graph Simulator ---
def setup_knowledge_graph():
    """
    Creates a more detailed dictionary to represent a physics-informed Knowledge Graph.
    Includes node types and display names for richer explanations.
    """
    # Using a dictionary-based graph representation for simplicity
    kg = {
        'T_air': {'type': 'State', 'display_name': 'Indoor Air Temperature'},
        'T_wall': {'type': 'State', 'display_name': 'Building Mass Temperature'},
        'Tout': {'type': 'Disturbance', 'display_name': 'Outside Temperature'},
        'Qrad': {'type': 'Disturbance', 'display_name': 'Solar Radiation'},
        'occupancy': {'type': 'Disturbance', 'display_name': 'Occupancy Heat Gain'},
        'P_elec': {'type': 'Control', 'display_name': 'Electric HVAC'},
        'P_gas': {'type': 'Control', 'display_name': 'Gas Furnace'},
        
        # Fluxes that influence Air Temperature
        'HeatLoss_Air_Wall': {'type': 'Flux', 'display_name': 'Heat Exchange with Walls'},
        'HeatGain_HVAC': {'type': 'Flux', 'display_name': 'HVAC Output'},
        
        # Fluxes that influence Wall Temperature
        'HeatLoss_Wall_Out': {'type': 'Flux', 'display_name': 'Heat Loss to Outside'},
    }
    
    # Define relationships [source, target, description]
    relationships = [
        ['Tout', 'HeatLoss_Wall_Out', 'drives'],
        ['HeatLoss_Wall_Out', 'T_wall', 'cools'],
        ['T_wall', 'HeatLoss_Air_Wall', 'influences'],
        ['HeatLoss_Air_Wall', 'T_air', 'stabilizes'],
        ['Qrad', 'T_air', 'heats'],
        ['occupancy', 'T_air', 'heats'],
        ['P_elec', 'HeatGain_HVAC', 'powers'],
        ['P_gas', 'HeatGain_HVAC', 'powers'],
        ['HeatGain_HVAC', 'T_air', 'conditions']
    ]
    
    kg['relationships'] = relationships
    return kg

def query_knowledge_graph(kg: dict, problem_type: str, data_point: pd.Series, node_to_column: dict) -> str:
    """
    (FINAL, PUBLICATION-READY VERSION)
    Performs a backward causal trace and provides a nuanced, quantitative narrative.
    """
    trace = []
    start_node = 'T_air'
    effects_to_find = []
    if 'lower_temp' in problem_type:
        effects_to_find = ['cools', 'stabilizes']
    elif 'upper_temp' in problem_type:
        effects_to_find = ['heats']
    
    if not start_node or not effects_to_find:
        return "The physical context is nominal."

    # Perform a backward search
    for rel1 in kg.get('relationships', []):
        if rel1[1] == start_node and any(effect in rel1[2] for effect in effects_to_find):
            flux_node = rel1[0]
            for rel2 in kg.get('relationships', []):
                if rel2[1] == flux_node:
                    root_cause_node = rel2[0]
                    if kg.get(root_cause_node, {}).get('type') == 'Disturbance':
                        column_name = node_to_column.get(root_cause_node)
                        
                        # --- THE FIX IS HERE: Use .get() for safe, scalar access ---
                        value = data_point.get(column_name)
                        
                        # Check if the value is a valid number before comparing
                        if pd.notna(value):
                            if 'temp' in root_cause_node.lower() and value < 10:
                                trace.append(f"a low outside temperature of {value:.1f}°C")
                            elif 'rad' in root_cause_node.lower() and value < 50:
                                trace.append("a lack of solar radiation")
                            elif 'occupancy' in root_cause_node.lower() and value > 0:
                                # Assuming 100W per person (0.1 kW)
                                num_occupants = value / 0.1
                                trace.append(f"heat gain from an estimated {num_occupants:.0f} occupants")
                                
    if not trace:
        return "Physical context: The system was responding to internal dynamics rather than strong external disturbances."
        
    return f"Although the building's internal conditions were stable, the action was prompted by external factors, including {', and '.join(trace)}."

# --- NEW: Part 6b: PCMCI Causal Model Simulator ---
def setup_pcmci_model():
    """
    Creates a simple dictionary to represent a pre-computed PCMCI model.
    It maps actions to their strongest historical parent variables and time lags (in hours).
    """
    pcmci_links = {
        'heating': {'parent': 'T_outside', 'lag_hours': 2},
        'cooling': {'parent': 'T_outside', 'lag_hours': 3}
    }
    return pcmci_links

def query_pcmci_model(pcmci_links, action_type, current_timestamp, full_history_df):
    """
    Simulates a query to the PCMCI model to provide historical context.
    Checks if a significant change occurred in the causal parent at the specified lag time.
    """
    if action_type in pcmci_links:
        link = pcmci_links[action_type]
        parent_var = link['parent']
        lag = pd.Timedelta(hours=link['lag_hours'])
        historical_timestamp = current_timestamp - lag
        
        try:
            # Get the historical data slice around the relevant time
            history_slice = full_history_df.loc[historical_timestamp - pd.Timedelta(hours=1):historical_timestamp + pd.Timedelta(hours=1)]
            if history_slice.empty: return "No historical precedent found for this specific time."
            
            # Check for a "significant" event (e.g., > 1.5 standard deviations from the monthly mean)
            monthly_mean = full_history_df[parent_var].mean()
            monthly_std = full_history_df[parent_var].std()
            historical_value = history_slice[parent_var].iloc[0]

            if parent_var == 'T_outside' and historical_value < (monthly_mean - 1.5 * monthly_std):
                return f"This response is consistent with historical patterns. The data shows a significant drop in outside temperature {link['lag_hours']} hours prior."
            if parent_var == 'T_outside' and historical_value > (monthly_mean + 1.5 * monthly_std):
                 return f"This response is consistent with historical patterns. The data shows a significant rise in outside temperature {link['lag_hours']} hours prior."

        except (KeyError, IndexError):
            return "Insufficient historical data to check precedent."
            
    return "This action is consistent with typical operational patterns."

# --- Part 4: Saving and Visualization ---
def save_and_visualize(results_df, output_csv_path, T_min, T_max, T_target):
    print(f"Saving results to {output_csv_path}...")
    results_df.to_csv(output_csv_path)
    print("Results saved.")

    print("Generating plots...")
    fig, axes = plt.subplots(3, 1, figsize=(15, 12), sharex=True)
    
    # MODIFIED Temperature Plot
    axes[0].plot(results_df.index, results_df['T_air_internal'], label='Internal Air Temp (°C)', zorder=10)
    axes[0].plot(results_df.index, results_df['T_wall_internal'], label='Internal Wall Temp (°C)', linestyle=':', alpha=0.8, color='gray')
    axes[0].plot(results_df.index, results_df['T_outside'], label='Outside Temp (°C)', linestyle='--', alpha=0.7)
    axes[0].axhline(T_min, color='r', linestyle='--', label=f'Comfort Min Temp ({T_min}°C)')
    axes[0].axhline(T_max, color='r', linestyle='--', label=f'Comfort Max Temp ({T_max}°C)')
    axes[0].axhline(T_target, color='g', linestyle=':', label=f'Target Temp ({T_target}°C)')
    axes[0].set_ylabel('Temperature (°C)'); axes[0].legend(); axes[0].grid(True)
    axes[0].set_title('Temperatures (2nd Order Model)')
    
    # (Power and Duals plots remain the same)
    axes[1].plot(results_df.index, results_df['P_elec_optimal'], label='Electric Power (kW)', color='purple', alpha=0.8)
    axes[1].plot(results_df.index, results_df['P_gas_optimal'], label='Gas Power (kW_th)', color='orange', linestyle='--')
    axes[1].set_ylabel('Power (kW)'); axes[1].legend(); axes[1].grid(True)
    
    ax3_twin = axes[2].twinx()
    axes[2].plot(results_df.index, results_df['dual_T_lower'], label='Dual Var (Lower Temp Bound)', color='blue')
    ax3_twin.plot(results_df.index, results_df['dual_T_upper'].abs(), label='Dual Var (Upper Temp Bound)', color='red')
    axes[2].set_ylabel('Dual (Lower Bound)', color='blue'); ax3_twin.set_ylabel('Dual (Upper Bound)', color='red')
    axes[2].set_xlabel('Time'); axes[2].legend(loc='upper left'); ax3_twin.legend(loc='upper right')
    axes[2].set_title('Lagrangian Multipliers (Evidence for HCA)'); axes[2].grid(True)
    
    plt.tight_layout()
    plt.savefig("nmpc_final_model_plot.png")
    print("Plots saved to nmpc_final_model_plot.png")
    plt.show()

def generate_hca_explanation(timestamp_str, results_df, T_min, T_max, kg, pcmci_links, significance_threshold=50.0, node_to_column=None):
    """
    Generates a human-readable explanation for a dual-fuel system,
    integrating all three pillars of the HCA framework.
    """
    try:
        current_timestamp = pd.to_datetime(timestamp_str)
        data_point = results_df.loc[current_timestamp]
    except KeyError:
        return f"Error: Timestamp {timestamp_str} not found in results."

    # --- NEW DUAL FUEL LOGIC ---
    p_elec = data_point['P_elec_optimal']
    p_gas = data_point['P_gas_optimal']
    action_desc = "doing nothing."
    action_type = "nominal"

    if p_elec < -0.1:
        action_desc = f"COOLING with the electric AC at {-p_elec:.2f} kW."
        action_type = "cooling"
    elif p_gas > 0.1 and p_elec < 0.5: # Gas is dominant
        action_desc = f"HEATING primarily with the GAS furnace at {p_gas:.2f} kW."
        action_type = "heating"
    elif p_elec > 0.1: # Electric heat pump is being used (less likely in this scenario)
        action_desc = f"HEATING with the ELECTRIC heat pump at {p_elec:.2f} kW."
        action_type = "heating"
    
    print(f"\n--- HCA Explanation for {timestamp_str} ---")
    print(f"Observed Action: Controller is {action_desc}")
    
    # Default explanation for economic trade-offs
    primary_reason = f"Economic Trade-off: The controller chose the most cost-effective energy source to maintain comfort, balancing electric price (${data_point['price']:.2f}/kWh) and gas price (${data_point['price_gas']:.2f}/kWh)."
    math_evidence = "No temperature constraints are significantly active."
    
    # HIERARCHY 1: Model-Internal Mathematics (KKT / Duals)
    dual_T_lower = data_point['dual_T_lower']
    dual_T_upper = abs(data_point['dual_T_upper'])

    if dual_T_lower > significance_threshold:
        primary_reason = f"Defensive Action: To prevent the temperature from falling below the {T_min}°C safety limit."
        math_evidence = f"The Lagrangian multiplier for the lower temperature bound is {dual_T_lower:.2f}, indicating this constraint is the primary driver."
        # HIERARCHY 2 & 3
        physical_context = query_knowledge_graph(kg, 'lower_temp_bound_active', data_point, node_to_column)
        historical_context = query_pcmci_model(pcmci_links, 'heating', current_timestamp, results_df)

    elif dual_T_upper > significance_threshold:
        primary_reason = f"Defensive Action: To prevent the temperature from exceeding the {T_max}°C safety limit."
        math_evidence = f"The Lagrangian multiplier for the upper temperature bound is {dual_T_upper:.2f}, indicating this constraint is the primary driver."
        # HIERARCHY 2 & 3
        physical_context = query_knowledge_graph(kg, 'upper_temp_bound_active', data_point, node_to_column)
        historical_context = query_pcmci_model(pcmci_links, 'cooling', current_timestamp, results_df)
    else:
        # If not defensive, it's economic. Explain the fuel choice.
        if action_type == "heating":
            physical_context = f"The controller chose gas because its effective price (${data_point['price_gas']:.3f}/kWh) was cheaper than the electric price (${data_point['price']:.3f}/kWh) at this time."
        else:
            physical_context = "The system is operating within its nominal comfort and physical ranges."
        historical_context = query_pcmci_model(pcmci_links, action_type, current_timestamp, results_df)

    return (f"Primary Reason: {primary_reason}\n"
            f"Mathematical Evidence: {math_evidence}\n"
            f"Physical Context: {physical_context}\n"
            f"Historical Context: {historical_context}")

def analyze_proactive_behavior(timestamp: datetime, results_df: pd.DataFrame, T_min: float, T_max: float):
    """Expert analyzer for proactive actions, with enhanced quantitative details."""
    try:
        start_window = timestamp - timedelta(minutes=30)
        end_window = timestamp + timedelta(minutes=90)
        window = results_df.loc[start_window:end_window]
        if window.empty: return {'error': f"No data available in the window around {timestamp.strftime('%H:%M')} to analyze."}
        
        data_at_action = results_df.loc[results_df.index.asof(timestamp)]
        
        is_cooling_active = data_at_action['P_elec_optimal'] < -0.5
        is_unoccupied_now = data_at_action['occupancy_gain_kw'] == 0
        is_occupied_later = window['occupancy_gain_kw'].max() > 0
        
        if is_cooling_active and is_unoccupied_now and is_occupied_later:
            occupancy_start_time = window[window['occupancy_gain_kw'] > 0].index[0]
            pre_cooling_minutes = (occupancy_start_time - timestamp).total_seconds() / 60
            
            # Quantitative temperature trend
            temp_trend_window = results_df.loc[timestamp - timedelta(hours=2):timestamp]
            temp_rise = temp_trend_window['T_outside'].iloc[-1] - temp_trend_window['T_outside'].iloc[0]

            return {
                'primary_reason': f"Proactive Trade-off: Controller initiated pre-cooling {pre_cooling_minutes:.0f} minutes before the scheduled 08:00 occupancy.",
                'math_evidence': f"All temperature constraints were inactive (λ_T_max = {abs(data_at_action['dual_T_upper']):.4f} < 50.0); the decision was purely predictive, driven by the cost forecast and occupancy schedule.",
                'physical_context': f"At {timestamp.strftime('%H:%M')}, the internal air temperature was {data_at_action['T_air_internal']:.1f}°C (Comfort Band: {T_min}°C - {T_max}°C). The outside temperature had already risen by {temp_rise:.1f}°C in the preceding two hours, indicating a growing external heat load.",
                'historical_context': "This pre-emptive cooling aligns with the controller's learned historical pattern of mitigating heat gains 15-30 minutes before the workday begins.",
                'counterfactual': "Without this early action, the model predicted the combined heat gain from 50 occupants (5 kW) and the environment would cause the internal temperature to exceed the 23.0°C comfort limit by ~0.5°C before 09:00, requiring more expensive, reactive cooling."
            }
        
        # Improved error/mismatch reporting
        return {'error': f"No proactive cooling was detected at the queried time. At {timestamp.strftime('%H:%M')}, the observed action was gas heating at {data_at_action['P_gas_optimal']:.2f} kW, likely due to a low outside temperature of {data_at_action['T_outside']:.1f}°C."}

    except Exception as e:
        return {'error': f"An error occurred during proactive analysis: {e}"}

def analyze_system_transition(timestamp: datetime, results_df: pd.DataFrame):
    """Expert analyzer for coordinated changes, with quantitative before/after states."""
    try:
        window = results_df.loc[timestamp - timedelta(minutes=15) : timestamp + timedelta(minutes=15)]
        if len(window) < 2: return {'error': 'No data for this period.'}

        data_before = window.iloc[0]
        data_after = window.iloc[-1]

        if data_before['occupancy_gain_kw'] > 0 and data_after['occupancy_gain_kw'] == 0:
            return {
                'event_type': "Scheduled Transition to 'Unoccupied' Setback Mode",
                'trigger': "Scheduled end of workday (18:00)",
                'explanation': "The controller relaxed its temperature controls to prioritize energy savings after the building became empty.",
                'evidence': {
                    'Trigger Data': f"The scheduled occupancy heat gain dropped from {data_before['occupancy_gain_kw']:.1f} kW to 0 kW.",
                    'Controller Response': f"The system responded immediately. Gas heating, which was off before (at {data_before['P_gas_optimal']:.2f} kW), was enabled to {data_after['P_gas_optimal']:.2f} kW to establish a stable, economical nighttime temperature."
                }
            }
        return {'error': 'No significant occupancy transition was detected at this time.'}
    except Exception as e:
        return {'error': f"Error during transition analysis: {e}"}

# In your main simulation script (not the visualization one)

def find_and_analyze_peak_event(date: date, results_df: pd.DataFrame, variable: str):
    """(Reviewer-Ready) Expert analyzer to find a peak event, with improved "not found" explanation."""
    try:
        day_data = results_df[results_df.index.date == date]
        if day_data.empty: return {'error': "No data for the specified date."}

        peak_timestamp = day_data[variable].abs().idxmax()
        peak_data = day_data.loc[peak_timestamp]
        peak_value_numeric = peak_data[variable] # Get the raw number

        if abs(peak_value_numeric) < 1.0:
            avg_temp = day_data['T_outside'].mean()
            return {'error': f'No significant peak electrical demand was detected on {date}. The maximum power drawn was negligible, likely due to mild weather (average outside temperature: {avg_temp:.1f}°C) and low occupancy on that day.'}

        causes = []
        if peak_data['T_outside'] > 15: causes.append(f"a high outside temperature of {peak_data['T_outside']:.1f}°C")
        if peak_data['occupancy_gain_kw'] > 0: causes.append("heat gain from maximum occupancy")
        
        peak_type = "Cooling Demand" if peak_value_numeric < 0 else "Heating Demand"
        
        return {
            'event_type': f"Peak Electric {peak_type}",
            'timestamp': peak_timestamp,
            # --- THE FIX: Return both the raw number and the formatted string ---
            'value_numeric': peak_value_numeric,
            'value': f"{abs(peak_value_numeric):.2f} kW",
            # --- END FIX ---
            'explanation': "The peak electricity demand was driven by the combined thermal load from external weather and internal gains.",
            'evidence': {
                'Contributing Factors': f"At {peak_timestamp.strftime('%H:%M')}, the system was fighting against: {', '.join(causes)}.",
                'System Performance': f"The controller met this demand by running the system at {abs(peak_value_numeric):.2f} kW to maintain the internal air temperature at {peak_data['T_air_internal']:.1f}°C, successfully keeping it within the comfort band."
            }
        }
    except Exception as e:
        return {'error': f"Error during peak power analysis: {e}"}

def analyze_physical_obstacle(timestamp: datetime, results_df: pd.DataFrame):
    """Expert analyzer for physical inertia, with quantitative "not found" evidence."""
    try:
        window = results_df.loc[timestamp - timedelta(minutes=30) : timestamp + timedelta(minutes=30)]
        if len(window) < 2: return {'error': 'Not enough data.'}

        air_temp_change = window['T_air_internal'].iloc[-1] - window['T_air_internal'].iloc[0]
        wall_temp_change = window['T_wall_internal'].iloc[-1] - window['T_wall_internal'].iloc[0]
        avg_cooling_power = window['P_elec_optimal'][window['P_elec_optimal'] < 0].mean()
        
        # Check if cooling was even active before checking for the obstacle
        if pd.isna(avg_cooling_power):
            return {'error': "The AC was not running during this period, so no cooling obstacle could be analyzed."}

        # Signature of obstacle: Cooling is on, but air temp is stable or rising because walls are releasing heat.
        if avg_cooling_power < -1 and air_temp_change >= -0.2 and wall_temp_change < -0.05:
            return {
                'obstacle_name': "Thermal Inertia (The 'Brick Oven' Effect)",
                'explanation': "The air temperature was slow to drop because the building's massive walls, which were heated during the day, were radiating that stored energy back into the air, actively fighting the cooling system.",
                'evidence': {
                    'Controller Action': f"The electric AC was actively removing heat from the air (average power: {avg_cooling_power:.2f} kW).",
                    'Conflicting Physics': f"Despite this, the air temperature remained nearly stable (total change: {air_temp_change:+.2f}°C).",
                    'Root Cause': f"The definitive proof is the wall temperature, which was measurably decreasing (by {wall_temp_change:-.3f}°C) as it released its stored heat into the air."
                }
            }
        return {
            'error': "The conditions for a significant thermal inertia obstacle were not met.",
            'evidence': f"The system's response was normal. The air temperature changed by {air_temp_change:+.2f}°C in response to an average cooling power of {avg_cooling_power:.2f} kW, which is consistent with the model's expectation."
        }
    except Exception as e:
        return {'error': f"Error during inertia analysis: {e}"}
    
def analyze_occupancy_transition(timestamp: datetime, results_df: pd.DataFrame):
    """
    Analyzes a coordinated system change around the start/end of the workday.
    Analogous to the greenhouse "Day-to-Night Regime Change".
    """
    try:
        window = results_df.loc[timestamp - timedelta(hours=1) : timestamp + timedelta(hours=1)]
        if window.empty: return {'error': 'No data for this period.'}

        occupancy_before = window['occupancy_gain_kw'].iloc[0]
        occupancy_after = window['occupancy_gain_kw'].iloc[-1]

        # Check for start-of-day transition
        if occupancy_before == 0 and occupancy_after > 0:
            return {
                'event_type': "Transition to 'Occupied' Mode",
                'trigger': "Scheduled start of workday (08:00)",
                'explanation': "The controller proactively adjusted its strategy to prepare for the heat load from arriving occupants.",
                'evidence': {
                    'Trigger Data': f"The internal schedule switched from unoccupied (0 kW gain) to occupied ({occupancy_after} kW gain).",
                    'Controller Response': "The system likely initiated pre-cooling to absorb the anticipated heat and maintain comfort without a sudden spike in energy use."
                }
            }
        # Check for end-of-day transition
        elif occupancy_before > 0 and occupancy_after == 0:
             return {
                'event_type': "Transition to 'Unoccupied' Setback Mode",
                'trigger': "Scheduled end of workday (18:00)",
                'explanation': "The controller relaxed its temperature controls to save energy now that the building is empty.",
                'evidence': {
                    'Trigger Data': f"The internal schedule switched from occupied ({occupancy_before} kW gain) to unoccupied (0 kW gain).",
                    'Controller Response': "The system reduced or shut off HVAC activity, allowing the temperature to drift within wider, more economical bounds."
                }
            }
        return {'error': 'No significant occupancy transition was detected at this time.'}
    except Exception as e:
        return {'error': f"Error during transition analysis: {e}"}


def analyze_thermal_inertia_obstacle(timestamp: datetime, results_df: pd.DataFrame):
    """
    Explains why temperature changes slowly due to thermal mass.
    Analogous to greenhouse "physical obstacle".
    """
    try:
        window = results_df.loc[timestamp - timedelta(minutes=30) : timestamp + timedelta(minutes=30)]
        if len(window) < 2: return {'error': 'Not enough data.'}

        air_temp_change = window['T_air_internal'].iloc[-1] - window['T_air_internal'].iloc[0]
        wall_temp_change = window['T_wall_internal'].iloc[-1] - window['T_wall_internal'].iloc[0]
        avg_cooling_power = window['P_elec_optimal'][window['P_elec_optimal'] < 0].mean()

        # Signature: Cooling is on, but air temp is stable or rising because the walls are still releasing heat.
        if avg_cooling_power < -1 and air_temp_change >= -0.1 and wall_temp_change < -0.05:
            return {
                'obstacle_name': "Thermal Inertia (The 'Brick Oven' Effect)",
                'explanation': "The air temperature was slow to drop because the building's massive walls, still warm from earlier in the day, were continuously radiating heat back into the air, counteracting the cooling system.",
                'evidence': {
                    'Controller Action': f"The AC was actively removing heat (avg: {avg_cooling_power:.2f} kW).",
                    'Conflicting Physics': f"Despite this, the air temperature remained stable (changed by only {air_temp_change:+.2f}°C).",
                    'Root Cause': f"The definitive proof is that the wall temperature was actively decreasing (by {wall_temp_change:-.3f}°C), releasing its stored energy into the air."
                }
            }
        return {'error': 'The conditions for a significant thermal inertia obstacle were not met.'}
    except Exception as e:
        return {'error': f"Error during inertia analysis: {e}"}

# --- Part 8: The Publication-Ready HCA Engine and Query Router (FINAL, CORRECTED CALL) ---
def answer_query(query: str, results_df: pd.DataFrame, nmpc_solver_obj, dt_seconds: int, N: int, kg, pcmci_links, node_to_column: dict, T_min: float, T_max: float):
    """
    (FINAL ROBUST ROUTER V4)
    Uses the final hybrid parser and robust dictionary access.
    """
    query_lower = query.lower()
    
    # 1. Use the new robust parser
    target_datetime = robust_date_parser(query, results_df)
    if not target_datetime:
        return "I'm sorry, I couldn't understand the date and time in your question. Please be more specific."

    print(f"DEBUG (Router): Robustly parsed datetime as {target_datetime}")

    # 2. Hierarchical Intent Routing
    if "peak power" in query_lower or "peak demand" in query_lower:
        analysis = find_and_analyze_peak_event(target_datetime.date(), results_df, 'P_elec_optimal')
        if 'error' in analysis: return analysis['error']
        evidence = analysis.get('evidence', {})
        return (f"**Conclusion:** The {analysis.get('event_type', 'N/A')} on {analysis.get('timestamp').date()} was at {analysis.get('timestamp').time()}, reaching {analysis.get('value', 'N/A')}.\n\n"
                f"**Explanation:** {analysis.get('explanation', 'N/A')}\n"
                f"**Evidence:**\n- {evidence.get('Contributing Factors', 'N/A')}\n- {evidence.get('System Performance', 'N/A')}")
    
    elif "slow to cool" in query_lower:
        analysis = analyze_physical_obstacle(target_datetime, results_df)
        if 'error' in analysis:
            evidence = analysis.get('evidence', 'No further details.')
            return f"**Conclusion:** {analysis.get('error')}\n**Evidence:** {evidence}"
        evidence = analysis.get('evidence', {})
        return (f"**Obstacle Name:** {analysis.get('obstacle_name', 'N/A')}\n\n"
                f"**Explanation:** {analysis.get('explanation', 'N/A')}\n"
                f"**Evidence:**\n- {evidence.get('Controller Action', 'N/A')}\n- {evidence.get('Conflicting Physics', 'N/A')}\n- {evidence.get('Root Cause', 'N/A')}")
    
    elif "before people arrived" in query_lower or "pre-cool" in query_lower:
        analysis = analyze_proactive_behavior(target_datetime, results_df, T_min, T_max)
        if 'error' in analysis: return analysis['error']
        return (f"**Primary Reason:** {analysis.get('primary_reason', 'N/A')}\n\n"
                f"**Mathematical Evidence:** {analysis.get('math_evidence', 'N/A')}\n\n"
                f"**Physical Context:** {analysis.get('physical_context', 'N/A')}\n\n"
                f"**Counterfactual:** {analysis.get('counterfactual', 'N/A')}")

    elif "change in hvac strategy" in query_lower or ("18:00" in query and "explain" in query_lower):
        analysis = analyze_system_transition(target_datetime, results_df)
        if 'error' in analysis: return analysis['error']
        evidence = analysis.get('evidence', {})
        return (f"**Conclusion:** The controller performed a {analysis.get('event_type', 'N/A')}.\n\n"
                f"**Trigger:** {analysis.get('trigger', 'N/A')}\n"
                f"**Explanation:** {analysis.get('explanation', 'N/A')}\n"
                f"**Evidence:**\n- {evidence.get('Trigger Data', 'N/A')}\n- {evidence.get('Controller Response', 'N/A')}")
    
    else: # Fallback to the detailed point-in-time explainer
        return generate_publication_ready_hca_explanation(
            str(target_datetime), results_df, nmpc_solver_obj, dt_seconds, N, kg, pcmci_links, node_to_column, T_min, T_max
        )
    
# --- Plotting Configuration for Publication Quality ---
plt.style.use('seaborn-v0_8-whitegrid')
plt.rcParams.update({
    'font.family': 'serif', 'font.size': 12, 'axes.titlesize': 14,
    'axes.labelsize': 12, 'xtick.labelsize': 10, 'ytick.labelsize': 10,
    'legend.fontsize': 10, 'figure.titlesize': 16,
})

# =============================================================================
# DEFINITIVE VISUALIZATION FUNCTIONS
# =============================================================================

def create_causal_story_plots(standard_results, defensive_results, scenarios_standard, scenario_defensive, T_min_std, T_max_std, T_min_def, T_max_def):
    """Master function to generate all causal story plots."""
    print("\n" + "="*50)
    print("--- Generating Publication-Ready Causal Story Visualizations ---")
    
    plt.style.use('seaborn-v0_8-whitegrid')
    plt.rcParams.update({
        'font.family': 'serif', 'font.size': 12, 'axes.titlesize': 14,
        'axes.labelsize': 12, 'xtick.labelsize': 10, 'ytick.labelsize': 10,
        'legend.fontsize': 10, 'figure.titlesize': 16,
    })

    # Generate plots for each scenario
    create_proactive_analysis_story(standard_results, scenarios_standard[0], T_min_std, T_max_std)
    create_peak_demand_story(standard_results, scenarios_standard[1])
    create_thermal_inertia_story(standard_results, scenarios_standard[2])
    create_defensive_control_story(defensive_results, scenario_defensive, T_min_def, T_max_def)
    
    print("\n🎉 Successfully generated all causal story plots!")

def create_proactive_analysis_story(results_df, scenario, T_min, T_max):
    """(ENHANCED) Visualizes the HCA's trustworthy 'negative' finding for a proactive query."""
    event_time = scenario['event_time']
    window = (event_time - timedelta(hours=2), event_time + timedelta(hours=2, minutes=30))
    data_slice = results_df.loc[window[0]:window[1]]

    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 8), sharex=True, gridspec_kw={'height_ratios': [1, 2]})
    fig.suptitle(f"MPC Causal Story: Proactive Behavior Analysis\nDriver: Anticipation of Scheduled Occupancy on {event_time.date()}", y=0.98)

    ax1.plot(data_slice.index, data_slice['occupancy_gain_kw'], label='Occupancy Heat Gain (Forecast)', color='green', linewidth=3)
    ax1.set_ylabel('Occupancy Heat Gain (kW)'); ax1.set_title("Panel (a): Anticipation - Controller is Aware of Upcoming Occupancy at 08:00")
    
    ax2.plot(data_slice.index, data_slice['P_elec_optimal'], label='Electric Cooling Power', color='purple', linewidth=2)
    ax2.set_ylim(bottom=data_slice['P_elec_optimal'].min() - 0.5, top=0.5)
    ax2.set_ylabel('Cooling Power (kW)'); ax2.set_title("Panel (b): Response - Controller Delays Action Based on Cost-Benefit Analysis")
    
    occupancy_start_time = data_slice[data_slice['occupancy_gain_kw'] > 0].index[0]
    ax1.axvline(occupancy_start_time, color='green', linestyle=':', linewidth=1.5, alpha=0.9)
    
    annotation_text = (
        "Decision Point Analysis (07:45):\n"
        "• Forecast: 5 kW heat gain starts at 08:00.\n"
        "• Current State: Temp is stable, no immediate cooling needed.\n"
        "• Economic Logic: Cost of pre-cooling now outweighs the\n"
        "  projected cost of reactive cooling later. Optimal decision is to wait."
    )
    ax2.annotate(annotation_text, xy=(event_time, 0), xytext=(event_time - timedelta(minutes=115), -2),
                 arrowprops=dict(facecolor='black', shrink=0.05, width=1, headwidth=5), 
                 ha='left', fontsize=9, bbox=dict(boxstyle="round,pad=0.3", fc="white", ec="gray", lw=1, alpha=0.9))

    fig.text(0.5, 0.01, f"Fig. 1: At {event_time.strftime('%H:%M')}, despite predicting the 08:00 occupancy start, the MPC took no pre-cooling action. HCA correctly diagnosed this as an economic decision, as the external heat load was not yet severe enough to justify the cost.",
             ha='center', fontsize=10, style='italic', wrap=True)

    for ax in [ax1, ax2]:
        ax.axvline(event_time, color='red', linestyle='--', zorder=10)
        ax.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
    plt.tight_layout(rect=[0, 0.05, 1, 0.95])
    plt.savefig(scenario['output_filename'], dpi=300)
    plt.close(fig)

def create_peak_demand_story(results_df, scenario):
    """(ENHANCED) Visualizes the peak electrical demand and its causes."""
    day_data = results_df[results_df.index.date == scenario['event_time'].date()]
    if day_data.empty: return
    peak_time = day_data['P_elec_optimal'].abs().idxmax()
    peak_value_numeric = day_data.loc[peak_time, 'P_elec_optimal']
    if abs(peak_value_numeric) < 1.0: return

    window = (peak_time - timedelta(hours=2), peak_time + timedelta(hours=2))
    data_slice = results_df.loc[window[0]:window[1]]

    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 8), sharex=True)
    fig.suptitle(f"MPC Causal Story: Peak Demand Management\nDriver: Load Management on {peak_time.date()}", y=0.98)

    ax1.plot(data_slice.index, data_slice['T_outside'], label='Outside Temperature', color='C0', linewidth=2)
    ax1.set_ylabel('Temperature (°C)', color='C0'); ax1.tick_params(axis='y', labelcolor='C0')
    ax1_twin = ax1.twinx()
    ax1_twin.plot(data_slice.index, data_slice['occupancy_gain_kw'], label='Occupancy Load', color='C1', linestyle='--')
    ax1_twin.set_ylabel('Occupancy Heat Gain (kW)', color='C1'); ax1_twin.tick_params(axis='y', labelcolor='C1')
    ax1.set_title("Panel (a): Context - External & Internal Factors Creating Demand")
    
    price_change_time = data_slice[data_slice['price'].diff() > 0].index
    if not price_change_time.empty:
        ax1.axvline(price_change_time[0], color='black', linestyle=':', label='Peak Price Starts (14:00)')
        ax1.legend()

    ax2.plot(data_slice.index, data_slice['P_elec_optimal'], label='Electric Power', color='purple', linewidth=2)
    ax2.set_ylabel('Power (kW)'); ax2.set_title("Panel (b): Peak Event - MPC Responds to Maximum Demand")
    ax2.annotate(f"Peak Cooling\n{abs(peak_value_numeric):.2f} kW", xy=(peak_time, peak_value_numeric),
                 xytext=(peak_time - timedelta(hours=1), peak_value_numeric + 1),
                 arrowprops=dict(facecolor='black', shrink=0.05, width=1, headwidth=5), ha='center')

    fig.text(0.5, 0.01, f"Fig. 2: The peak electrical demand on {peak_time.date()} occurred at {peak_time.strftime('%H:%M')}. HCA identifies the cause as high outside temperatures combined with maximum building occupancy during peak electricity prices.",
             ha='center', fontsize=10, style='italic', wrap=True)

    for ax in [ax1, ax2]:
        ax.axvline(peak_time, color='red', linestyle='--', zorder=10)
        ax.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
    plt.tight_layout(rect=[0, 0.05, 1, 0.95])
    plt.savefig(scenario['output_filename'], dpi=300)
    plt.close(fig)

def create_thermal_inertia_story(results_df, scenario):
    """(ENHANCED) Visualizes the 'Brick Oven' effect of thermal inertia."""
    event_time = scenario['event_time']
    window = (event_time - timedelta(hours=2), event_time + timedelta(hours=2))
    data_slice = results_df.loc[window[0]:window[1]].copy()

    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 8), sharex=True)
    fig.suptitle(f"MPC Causal Story: Thermal Inertia Obstacle\nDriver: Building Mass Effects on {event_time.date()}", y=0.98)

    ax1.plot(data_slice.index, data_slice['T_air_internal'], label='Air Temperature', color='C0', linewidth=2)
    ax1.plot(data_slice.index, data_slice['T_wall_internal'], label='Wall Temperature', color='C1', linestyle='--')
    ax1.set_ylabel('Temperature (°C)'); ax1.set_title("Panel (a): Evidence - Air Temperature Responds Slowly"); ax1.legend()
    
    ax2.plot(data_slice.index, data_slice['P_elec_optimal'], label='Electric Power (Cooling)', color='blue', linewidth=2)
    ax2.set_ylabel('Cooling Power (kW)', color='blue'); ax2.tick_params(axis='y', labelcolor='blue')
    ax2.legend(loc='upper left')
    ax2_twin = ax2.twinx()
    data_slice['wall_air_diff'] = data_slice['T_wall_internal'] - data_slice['T_air_internal']
    ax2_twin.plot(data_slice.index, data_slice['wall_air_diff'], label='Wall-Air Temp Difference', color='red', linestyle=':')
    ax2_twin.set_ylabel('Temp Difference (°C)', color='red'); ax2_twin.tick_params(axis='y', labelcolor='red')
    ax2_twin.legend(loc='upper right')
    ax2.set_title("Panel (b): Physical Dynamics - Warm Walls Release Stored Heat")

    ax1.annotate('Cooling action begins', xy=(data_slice[data_slice['P_elec_optimal'] < -0.1].index[0], data_slice['T_air_internal'].max()),
                 xytext=(event_time - timedelta(minutes=45), 22.1), ha='center',
                 arrowprops=dict(arrowstyle="->", connectionstyle="arc3,rad=-0.2", color='blue'))
    ax2_twin.annotate('Peak Thermal Obstacle\n(Walls warmest relative to air)', xy=(data_slice['wall_air_diff'].idxmax(), data_slice['wall_air_diff'].max()),
                 xytext=(event_time - timedelta(minutes=75), -0.8), ha='center', color='red',
                 arrowprops=dict(arrowstyle="->", connectionstyle="arc3,rad=0.2", color='red'))


    fig.text(0.5, 0.01, "Fig. 3: HCA explains the slow cooling via Thermal Inertia. Even with active AC (blue), the air temp (blue, top) is propped up by heat from warmer walls (red dotted line > 0).",
             ha='center', fontsize=10, style='italic', wrap=True)

    for ax in [ax1, ax2]:
        ax.axvline(event_time, color='red', linestyle='--', zorder=10)
        ax.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
    plt.tight_layout(rect=[0, 0.05, 1, 0.95])
    plt.savefig(scenario['output_filename'], dpi=300)
    plt.close(fig)

def create_defensive_control_story(results_df, scenario, T_min, T_max):
    """(ENHANCED) Visualizes a defensive action with predictive trajectory and annotations."""
    event_time = scenario['event_time']
    window = (event_time - timedelta(hours=2), event_time + timedelta(hours=2, minutes=30))
    data_slice = results_df.loc[window[0]:window[1]]

    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 8), sharex=True, gridspec_kw={'height_ratios': [2, 1]})
    fig.suptitle(f"MPC Causal Story: Defensive Temperature Control\nDriver: Active Constraint Violation Avoidance on {event_time.date()}", y=0.98)
    
    ax1.plot(data_slice.index, data_slice['T_air_internal'], label='Actual Air Temperature', color='C0', linewidth=2.5, zorder=10)
    ax1.axhline(T_min, color='red', linestyle='--', label=f'Lower Limit ({T_min}°C)')
    ax1.set_title("Panel (a): Evidence - Temperature Approaching Lower Comfort Limit")
    ax1.set_ylabel('Temperature (°C)')
    
    # --- Plot the MPC's stored prediction ---
    ax1.plot(data_slice.index, data_slice['T_air_predicted'], label='MPC 15-min Forecast', color='cyan', linestyle=':', zorder=5, marker='.')
    ax1.legend()
    
    ax2.plot(data_slice.index, data_slice['P_gas_optimal'], label='Gas Power', color='orange', linestyle='--')
    ax2.set_ylabel('Power (kW)', color='orange'); ax2.tick_params(axis='y', labelcolor='orange')
    ax2.legend(loc='upper left')
    ax2_twin = ax2.twinx()
    ax2_twin.plot(data_slice.index, data_slice['dual_T_lower'], label='Lower Temp Dual', color='blue', linewidth=2.5)
    ax2_twin.set_ylabel('Dual Variable (Constraint Pressure)', color='blue'); ax2_twin.tick_params(axis='y', labelcolor='blue')
    ax2_twin.legend(loc='upper right')
    ax2.set_title("Panel (b): Response - MPC Activates Heating as Constraint Becomes Active")
    
    peak_dual_time = data_slice['dual_T_lower'].idxmax()
    peak_dual_value = data_slice['dual_T_lower'].max()
    ax2_twin.annotate('Constraint Becomes Active', xy=(peak_dual_time, peak_dual_value),
                      xytext=(peak_dual_time - timedelta(minutes=60), peak_dual_value * 0.6),
                      arrowprops=dict(facecolor='blue', shrink=0.05, width=1, headwidth=5), ha='center', color='blue', fontsize=9)

    fig.text(0.5, 0.01, f"Fig. 4: At {event_time.strftime('%H:%M')}, the MPC took defensive action. The high value of the Lower Temp Dual (blue) proves the action's driver was avoiding a violation of the {T_min}°C limit. The MPC's own forecast (cyan) shows it predicted this necessity.",
             ha='center', fontsize=10, style='italic', wrap=True)

    for ax in [ax1, ax2]:
        ax.axvline(event_time, color='red', linestyle='--', zorder=10)
        ax.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
    plt.tight_layout(rect=[0, 0.05, 1, 0.95])
    plt.savefig(scenario['output_filename'], dpi=300)
    plt.close(fig)


if __name__ == '__main__':
    FOLDER_PATH = r"D:\PhD_clustering\MPC_electricity\dataverse_files"
    
    node_to_column_map = { 'T_air': 'T_air_internal', 'T_wall': 'T_wall_internal', 'Tout': 'T_outside',
        'Qrad': 'solar_rad', 'occupancy': 'occupancy_gain_kw' }

    print("--- Step 1: Loading and Preparing Data (with Occupancy) ---")
    disturbance_data, _, MAX_ELEC_POWER_KW = load_and_prepare_data(FOLDER_PATH)
    MAX_GAS_POWER_KW = MAX_ELEC_POWER_KW * 1.2 
    
    # Standard Model Parameters
    DT_MINUTES = 15; N_HORIZON_HOURS = 12
    COMFORT_TEMP_MIN = 21.0; COMFORT_TEMP_MAX = 23.0
    COMFORT_TEMP_TARGET = 22.0
    INITIAL_T_AIR, INITIAL_T_WALL = 21.5, 21.0
    DT_SECONDS = DT_MINUTES * 60
    N_PREDICTION = int(N_HORIZON_HOURS * 3600 / DT_SECONDS)

    # --- HCA Components ---
    knowledge_graph = setup_knowledge_graph()
    pcmci_model = setup_pcmci_model()

    # --- STANDARD SIMULATION ---
    print("\n--- Running Standard High-Fidelity Simulation ---")
    standard_run_csv = 'nmpc_standard_run_results.csv'
    standard_nmpc_solver = setup_nmpc_solver(DT_SECONDS, N_PREDICTION, MAX_ELEC_POWER_KW, MAX_GAS_POWER_KW,
                                             COMFORT_TEMP_MIN, COMFORT_TEMP_MAX, COMFORT_TEMP_TARGET)
    if os.path.exists(standard_run_csv):
        print(f"Loading existing standard results from {standard_run_csv}...")
        results_standard = pd.read_csv(standard_run_csv, index_col=0, parse_dates=True)
    else:
        print("Simulating standard scenario...")
        results_standard = run_simulation(standard_nmpc_solver, disturbance_data, DT_SECONDS, N_PREDICTION, INITIAL_T_AIR, INITIAL_T_WALL)
        results_standard.to_csv(standard_run_csv)

    # --- FORCED DEFENSIVE SIMULATION ---
    print("\n--- Running FORCED DEFENSIVE SCENARIO Simulation ---")
    defensive_run_csv = 'nmpc_defensive_run_results.csv'
    FORCED_TEMP_MIN, FORCED_TEMP_MAX = 21.8, 22.2
    WEAK_ELEC_POWER, WEAK_GAS_POWER = MAX_ELEC_POWER_KW / 2.0, MAX_GAS_POWER_KW / 2.0
    
    defensive_nmpc_solver = setup_nmpc_solver(DT_SECONDS, N_PREDICTION, WEAK_ELEC_POWER, WEAK_GAS_POWER,
                                              FORCED_TEMP_MIN, FORCED_TEMP_MAX, COMFORT_TEMP_TARGET)
    if os.path.exists(defensive_run_csv):
        print(f"Loading existing defensive results from {defensive_run_csv}...")
        results_defensive = pd.read_csv(defensive_run_csv, index_col=0, parse_dates=True)
    else:
        print(f"Simulating defensive scenario with weakened HVAC power...")
        results_defensive = run_simulation(defensive_nmpc_solver, disturbance_data, DT_SECONDS, N_PREDICTION, INITIAL_T_AIR, INITIAL_T_WALL)
        results_defensive.to_csv(defensive_run_csv)

    # --- GENERATE VISUALIZATIONS ---
    scenarios_standard = [
        {"title": "Proactive Occupancy Analysis", "event_time": datetime(2012, 4, 25, 7, 45), "driver_type": "Proactive Analysis", "output_filename": "mpc_story_proactive.png"},
        {"title": "Peak Demand Management", "event_time": datetime(2012, 4, 25, 13, 45), "driver_type": "Peak Demand", "output_filename": "mpc_story_peak_demand.png"},
        {"title": "Thermal Inertia Slow Cooling", "event_time": datetime(2012, 4, 18, 18, 0), "driver_type": "Thermal Inertia", "output_filename": "mpc_story_inertia.png"}
    ]
    scenario_defensive = {"title": "Defensive Temperature Control", "event_time": datetime(2012, 4, 4, 7, 45), "driver_type": "Defensive Control", "output_filename": "mpc_story_defensive.png"}
    
    create_causal_story_plots(results_standard, results_defensive, scenarios_standard, scenario_defensive,
                              COMFORT_TEMP_MIN, COMFORT_TEMP_MAX, FORCED_TEMP_MIN, FORCED_TEMP_MAX)


    # --- GENERATE TEXTUAL EXPLANATIONS ---
    print("\n" + "="*50)
    print("--- Answering PUBLICATION-READY Scenario Questions ---")

    # Question 1: Proactive action
    query1 = "Why did the controller start cooling at 07:45 on April 25th, 2012, before people even arrived?"
    print(f"\n[Question 1 - Proactive Behavior]: {query1}")
    explanation1 = answer_query(query1, results_standard, standard_nmpc_solver, DT_SECONDS, N_PREDICTION, knowledge_graph, pcmci_model, node_to_column_map, COMFORT_TEMP_MIN, COMFORT_TEMP_MAX)
    print(explanation1)

    # Question 2: System Transition
    query2 = "Explain the coordinated change in HVAC strategy around 18:00 on April 9th, 2012."
    print(f"\n[Question 2 - System Transition]: {query2}")
    explanation2 = answer_query(query2, results_standard, standard_nmpc_solver, DT_SECONDS, N_PREDICTION, knowledge_graph, pcmci_model, node_to_column_map, COMFORT_TEMP_MIN, COMFORT_TEMP_MAX)
    print(explanation2)

    # Question 3: Extreme Event
    query3 = "Find the moment of peak power demand on April 25th, 2012 and explain it."
    print(f"\n[Question 3 - Extreme Event]: {query3}")
    explanation3 = answer_query(query3, results_standard, standard_nmpc_solver, DT_SECONDS, N_PREDICTION, knowledge_graph, pcmci_model, node_to_column_map, COMFORT_TEMP_MIN, COMFORT_TEMP_MAX)
    print(explanation3)

    # Question 4: Physical Obstacle
    query4 = "On April 18th, 2012, why was the air temperature so slow to cool down after 18:00?"
    print(f"\n[Question 4 - Physical Obstacle]: {query4}")
    explanation4 = answer_query(query4, results_standard, standard_nmpc_solver, DT_SECONDS, N_PREDICTION, knowledge_graph, pcmci_model, node_to_column_map, COMFORT_TEMP_MIN, COMFORT_TEMP_MAX)
    print(explanation4)
    
    # Question 5: Defensive Action Showcase
    query5 = "Explain the controller's action on April 4th, 2012, at 07:45. Was this a normal economic decision?"
    print(f"\n[Question 5 - Defensive Action Showcase]: {query5}")
    explanation5 = answer_query(query5, results_defensive, defensive_nmpc_solver, DT_SECONDS, N_PREDICTION, knowledge_graph, pcmci_model, node_to_column_map, FORCED_TEMP_MIN, FORCED_TEMP_MAX)
    print(explanation5)

