import numpy as np
import pandas as pd
import networkx as nx
from pgmpy.readwrite import BIFReader
rename_mapping_hailfinder = {
    'N0_7muVerMo': '10.7µm Vertical Motion',
    'SubjVertMo': 'Subjective Vertical Motion',
    'QGVertMotion': 'Quasi-Geostrophic Vertical Motion',
    'CombVerMo': 'Combined Vertical Motion',
    'AreaMeso_ALS': 'Meso-Alpha Lift Area',
    'SatContMoist': 'Satellite Moisture Contribution',
    'RaoContMoist': 'Forecast-Centre Moisture Reading',
    'CombMoisture': 'Combined Moisture',
    'AreaMoDryAir': 'Moisture–Dry-Air Area',
    'VISCloudCov': 'Visible Cloud Cover',
    'IRCloudCover': 'Infra-Red Cloud Cover',
    'CombClouds': 'Combined Cloud Cover',
    'CldShadeOth': 'Cloud Shading (Other)',
    'AMInstabMt': 'AM Instability (Mountains)',
    'InsInMt': 'Instability in Mountains',
    'WndHodograph': 'Wind Hodograph',
    'OutflowFrMt': 'Mountain Outflow',
    'MorningBound': 'Morning Boundaries',
    'Boundaries': 'Boundaries',
    'CldShadeConv': 'Cloud Shading (Convection)',
    'CompPlFcst': 'Composite Plains Forecast',
    'CapChange': 'Capping Change',
    'LoLevMoistAd': 'Low-Level Moisture Advection',
    'InsChange': 'Instability Change',
    'MountainFcst': 'Mountain Forecast',
    'Date': 'Date',
    'Scenario': 'Scenario',
    'ScenRelAMCIN': 'Scenario Rel. AM CIN',
    'MorningCIN': 'Morning CIN',
    'AMCINInScen': 'AM CIN in Scenario',
    'CapInScen': 'Capping in Scenario',
    'ScenRelAMIns': 'Scenario Rel. AM Instability',
    'LIfr12ZDENSd': 'Lifted Index DEN 12Z',
    'AMDewptCalPl': 'AM Dew-Point (Plains)',
    'AMInsWliScen': 'AM Instability w/in Scenario',
    'InsSclInScen': 'Instability Scaling in Scenario',
    'ScenRel3_4': 'Scenario Rel. Regions 2/3/4',
    'LatestCIN': 'Latest CIN',
    'LLIW': 'LLIW Index',
    'CurPropConv': 'Current Propensity to Convection',
    'ScnRelPlFcst': 'Scenario Rel. Plains Forecast',
    'PlainsFcst': 'Plains Forecast',
    'N34StarFcst': 'Regions 2/3/4 Forecast',
    'R5Fcst': 'Region 5 Forecast',
    'Dewpoints': 'Dew-Points',
    'LowLLapse': 'Low-Level Lapse Rate',
    'MeanRH': 'Mean Relative Humidity',
    'MidLLapse': 'Mid-Level Lapse Rate',
    'MvmtFeatures': 'Movement Features',
    'RHRatio': 'RH Ratio',
    'SfcWndShfDis': 'Surface Wind-Shift/Discontinuities',
    'SynForcng': 'Synoptic Forcing',
    'TempDis': 'Temperature Discontinuity',
    'WindAloft': 'Wind Aloft',
    'WindFieldMt': 'Wind Fields (Mountains)',
    'WindFieldPln': 'Wind Fields (Plains)',
}

value_mappings_hailfinder = {
    # ───────── LIFT / VERTICAL MOTION ─────────
    "10.7µm Vertical Motion":               {"StrongUp": 0, "WeakUp": 1, "Neutral": 2, "Down": 3},
    "Subjective Vertical Motion":           {"StrongUp": 0, "WeakUp": 1, "Neutral": 2, "Down": 3},
    "Quasi-Geostrophic Vertical Motion":    {"StrongUp": 0, "WeakUp": 1, "Neutral": 2, "Down": 3},
    "Combined Vertical Motion":             {"StrongUp": 0, "WeakUp": 1, "Neutral": 2, "Down": 3},
    "Meso-Alpha Lift Area":                 {"StrongUp": 0, "WeakUp": 1, "Neutral": 2, "Down": 3},

    # ───────── MOISTURE ─────────
    "Satellite Moisture Contribution":      {"VeryWet": 0, "Wet": 1, "Neutral": 2, "Dry": 3},
    "Forecast-Centre Moisture Reading":     {"VeryWet": 0, "Wet": 1, "Neutral": 2, "Dry": 3},
    "Combined Moisture":                    {"VeryWet": 0, "Wet": 1, "Neutral": 2, "Dry": 3},
    "Moisture–Dry-Air Area":                {"VeryWet": 0, "Wet": 1, "Neutral": 2, "Dry": 3},

    # ───────── CLOUD COVER & INSOLATION ─────────
    "Visible Cloud Cover":                  {"Cloudy": 0, "PC": 1, "Clear": 2},
    "Infra-Red Cloud Cover":                {"Cloudy": 0, "PC": 1, "Clear": 2},
    "Combined Cloud Cover":                 {"Cloudy": 0, "PC": 1, "Clear": 2},
    "Cloud Shading (Other)":                {"Cloudy": 0, "PC": 1, "Clear": 2},
    "Cloud Shading (Convection)":           {"None": 0, "Some": 1, "Marked": 2},

    # ───────── INSTABILITY & CAPPING ─────────
    "AM Instability (Mountains)":           {"None": 0, "Weak": 1, "Strong": 2},
    "Instability in Mountains":             {"None": 0, "Weak": 1, "Strong": 2},
    "Morning CIN":                          {"None": 0, "PartInhibit": 1, "Stifling": 2, "TotalInhibit": 3},
    "Latest CIN":                           {"None": 0, "PartInhibit": 1, "Stifling": 2, "TotalInhibit": 3},
    "Capping Change":                       {"Decreasing": 0, "LittleChange": 1, "Increasing": 2},
    "Low-Level Moisture Advection":         {"StrongPos": 0, "WeakPos": 1, "Neutral": 2, "Negative": 3},
    "Instability Change":                   {"Decreasing": 0, "LittleChange": 1, "Increasing": 2},
    "LLIW Index":                           {"Unfavorable": 0, "Weak": 1, "Moderate": 2, "Strong": 3},
    "Current Propensity to Convection":     {"None": 0, "Slight": 1, "Moderate": 2, "Strong": 3},

    # ───────── BOUNDARIES & MESOSCALE FEATURES ─────────
    "Wind Hodograph":                       {"DCVZFavor": 0, "StrongWest": 1, "Westerly": 2, "Other": 3},
    "Mountain Outflow":                     {"None": 0, "Weak": 1, "Strong": 2},
    "Morning Boundaries":                   {"None": 0, "Weak": 1, "Strong": 2},
    "Boundaries":                           {"None": 0, "Weak": 1, "Strong": 2},

    # ───────── SYNOPTIC & KINEMATIC FIELDS ─────────
    "Movement Features":                    {"StrongFront": 0, "MarkedUpper": 1, "OtherRapid": 2, "NoMajor": 3},
    "Synoptic Forcing":                     {"SigNegative": 0, "NegToPos": 1, "SigPositive": 2, "PosToNeg": 3, "LittleChange": 4},
    "Wind Aloft":                           {"LV": 0, "SWQuad": 1, "NWQuad": 2, "AllElse": 3},
    "Wind Fields (Mountains)":              {"Westerly": 0, "LVorOther": 1},
    "Wind Fields (Plains)":                 {"LV": 0, "DenvCyclone": 1, "LongAnticyc": 2, "E_NE": 3, "SEQuad": 4, "WidespdDnsl": 5},

    # ───────── CLOUD-MODULATED STABILITY METRICS ─────────
    "Low-Level Lapse Rate":                 {"CloseToDryAd": 0, "Steep": 1, "ModerateOrLe": 2, "Stable": 3},
    "Mid-Level Lapse Rate":                 {"CloseToDryAd": 0, "Steep": 1, "ModerateOrLe": 2},
    "Mean Relative Humidity":               {"VeryMoist": 0, "Average": 1, "Dry": 2},
    "RH Ratio":                             {"MoistMDryL": 0, "DryMMoistL": 1, "Other": 2},

    # ───────── DISCONTINUITIES ─────────
    "Surface Wind-Shift/Discontinuities":   {"DenvCyclone": 0, "E_W_N": 1, "E_W_S": 2, "MovingFtorOt": 3, "DryLine": 4, "None": 5, "Other": 6},
    "Temperature Discontinuity":            {"QStationary": 0, "Moving": 1, "None": 2, "Other": 3},

    # ───────── SCENARIOS & FORECAST PRODUCTS ─────────
    "Composite Plains Forecast":            {"IncCapDecIns": 0, "LittleChange": 1, "DecCapIncIns": 2},
    "Date":                                 {"May15_Jun14": 0, "Jun15_Jul1": 1, "Jul2_Jul15": 2,
                                             "Jul16_Aug10": 3, "Aug11_Aug20": 4, "Aug20_Sep15": 5},
    "Scenario":                             {"A": 0, "B": 1, "C": 2, "D": 3, "E": 4,
                                             "F": 5, "G": 6, "H": 7, "I": 8, "J": 9, "K": 10},
    "Scenario Rel. AM CIN":                 {"AB": 0, "CThruK": 1},
    "AM CIN in Scenario":                   {"LessThanAve": 0, "Average": 1, "MoreThanAve": 2},
    "Capping in Scenario":                  {"LessThanAve": 0, "Average": 1, "MoreThanAve": 2},
    "Scenario Rel. AM Instability":         {"ABI": 0, "CDEJ": 1, "F": 2, "G": 3, "H": 4, "K": 5},
    "Lifted Index DEN 12Z":                 {"LIGt0": 0, "N1GtLIGt_4": 1, "N5GtLIGt_8": 2, "LILt_8": 3},
    "AM Dew-Point (Plains)":                {"Instability": 0, "Neutral": 1, "Stability": 2},
    "AM Instability w/in Scenario":         {"LessUnstable": 0, "Average": 1, "MoreUnstable": 2},
    "Instability Scaling in Scenario":      {"LessUnstable": 0, "Average": 1, "MoreUnstable": 2},
    "Scenario Rel. Regions 2/3/4":          {"ACEFK": 0, "B": 1, "D": 2, "GJ": 3, "HI": 4},
    "Scenario Rel. Plains Forecast":        {"A": 0, "B": 1, "C": 2, "D": 3, "E": 4,
                                             "F": 5, "G": 6, "H": 7, "I": 8, "J": 9, "K": 10},
    "Mountain Forecast":                    {"XNIL": 0, "SIG": 1, "SVR": 2},
    "Plains Forecast":                      {"XNIL": 0, "SIG": 1, "SVR": 2},
    "Regions 2/3/4 Forecast":               {"XNIL": 0, "SIG": 1, "SVR": 2},
    "Region 5 Forecast":                    {"XNIL": 0, "SIG": 1, "SVR": 2},
    "Dew-Points":                           {"LowEvrywhere": 0, "LowAtStation": 1, "LowSHighN": 2,
                                             "LowNHighS": 3, "LowMtsHighPl": 4, "HighEvrywher": 5, "Other": 6},
}

variable_description_hailfinder = {

    # ───────── LIFT / VERTICAL MOTION ─────────
    "10.7µm Vertical Motion":
        "Four-level: StrongUp / WeakUp / Neutral / Down.  Vertical-motion proxy derived from GOES-IR 10.7 µm cloud-top-cooling rates "
        "(|dT_B/dt| > 4 K h⁻¹ → StrongUp).  Rapid cooling marks newly deepening cumulonimbus towers and thus flags locations "
        "where vigorous rising parcels are already present.",

    "Subjective Vertical Motion":
        "Four-level: StrongUp / WeakUp / Neutral / Down.  Vertical-motion estimate based on forecaster interpretation of satellite loops, "
        "surface charts, and profiler data.  Human pattern recognition can detect subtle, mesoscale ascent signatures—such as gravity-wave "
        "packets—that automated algorithms often overlook.",

    "Quasi-Geostrophic Vertical Motion":
        "Four-level: StrongUp / WeakUp / Neutral / Down.  Large-scale ω-diagnostic from quasi-geostrophic forcing terms in the 06 Z Eta run.  "
        "Positive contributions from differential cyclonic vorticity advection and warm-advection lift accumulate into the StrongUp bin, "
        "summarising synoptic-scale ascent.",

    "Combined Vertical Motion":
        "Four-level: StrongUp / WeakUp / Neutral / Down.  Composite vertical-motion indicator fusing the satellite, subjective, and Q-G fields.  "
        "Because each input emphasizes different spatial and temporal scales, this provides a balanced ‘is air rising?’ signal for the network.",

    "Meso-Alpha Lift Area":
        "Four-level: StrongUp / WeakUp / Neutral / Down.  Mesoscale-lift coverage score—fraction of the 200–500 km domain east of the Front Range "
        "with upward ω at 700 hPa.  A broad StrongUp footprint implies region-wide ascent supportive of organized convective episodes.",


    # ───────── MOISTURE ─────────
    "Satellite Moisture Contribution":
        "Four-level: VeryWet / Wet / Neutral / Dry.  Precipitable-water anomaly from GOES sounder retrievals, referenced to a late-spring "
        "climatology.  VeryWet (PW ≥ +1 σ) signals an unusually deep low-level moisture reservoir.",

    "Forecast-Centre Moisture Reading":
        "Four-level: VeryWet / Wet / Neutral / Dry.  Model total-column water-vapor at the DEN grid point in the 12 Z Eta forecast.  "
        "Captures sub-visible moisture plumes beneath upper-level cloud decks.",

    "Combined Moisture":
        "Four-level: VeryWet / Wet / Neutral / Dry.  Simple average of satellite and model PW anomalies.  By merging independent observations, "
        "this yields a steadier picture of boundary-layer humidity feeding developing storms.",

    "Moisture–Dry-Air Area":
        "Four-level: VeryWet / Wet / Neutral / Dry.  Areal extent of the moist sector east of the dryline, defined by 700 hPa mixing-ratio gradients.  "
        "A larger VeryWet area implies deeper inflow corridors where convection can sustain longer life cycles.",


    # ───────── CLOUD COVER & INSOLATION ─────────
    "Visible Cloud Cover":
        "Three-level: Cloudy / PC / Clear.  Cloud-cover fraction from the 0.65 µm GOES visible channel at 15 Z.  Overcast conditions limit solar heating, "
        "whereas Clear skies favor stronger boundary-layer warming and greater instability.",

    "Infra-Red Cloud Cover":
        "Three-level: Cloudy / PC / Clear.  Cloud-cover fraction from the 10.7 µm IR channel, which is insensitive to sun angle and better at detecting "
        "thick or high decks before sunrise.",

    "Combined Cloud Cover":
        "Three-level: Cloudy / PC / Clear.  Union of visible and IR masks to estimate net short-wave radiation reaching the surface—a first-order control "
        "on daytime destabilization.",

    "Cloud Shading (Other)":
        "Three-level: Cloudy / PC / Clear.  Flag for mid-level or anvil shading not tied to local convection.  Persistent shading can erode the mountain-plains "
        "temperature gradient crucial for upslope initiation.",

    "Cloud Shading (Convection)":
        "Three-level: None / Some / Marked.  Flag for shading by anvils from ongoing storms west or southwest of the plains.  Marked shading often delays "
        "additional surface-based convection until the debris moves east.",


    # ───────── INSTABILITY & CAPPING ─────────
    "AM Instability (Mountains)":
        "Three-level: None / Weak / Strong.  Morning (12 Z) CAPE at the Front Range crest from the RAP sounding.  Strong (> 500 J kg⁻¹) suggests early "
        "orographic storms that may propagate onto the plains.",

    "Instability in Mountains":
        "Three-level: None / Weak / Strong.  Mid-morning (15 Z) CAPE update using HRRR soundings blended with mesonet data.  Captures rapid "
        "diurnal warming altering the early-day stability profile.",

    "Capping Change":
        "Three-level: Decreasing / LittleChange / Increasing.  Trend in 700 hPa temperature above Denver between 12 Z and 18 Z.  Cooling aloft weakens "
        "the elevated mixed-layer cap, while warming strengthens it.",

    "Low-Level Moisture Advection":
        "Four-level: StrongPos / WeakPos / Neutral / Negative.  0–3 km mixing-ratio tendency from 06 Z to 18 Z in HRRR.  StrongPos entries indicate "
        "substantial boundary-layer moistening through the day.",

    "Instability Change":
        "Three-level: Decreasing / LittleChange / Increasing.  Morning trend in 0–6 km mean CAPE over the plains.  An Increasing tag (> +250 J kg⁻¹) "
        "highlights rapidly expanding energy for updrafts.",

    "Morning CIN":
        "Four-level: None / PartInhibit / Stifling / TotalInhibit.  Pre-sunrise convective inhibition at Denver (12 Z).  Higher classes denote stronger "
        "suppression of surface parcel ascent.",

    "Latest CIN":
        "Four-level: None / PartInhibit / Stifling / TotalInhibit.  Near-real-time CIN from the most recent special sounding or RUC analysis within two hours of forecast.",

    "LLIW Index":
        "Four-level: Unfavorable / Weak / Moderate / Strong.  Composite of 0–3 km CAPE, storm-relative helicity, and shear.  Higher values summarize "
        "environments favorable for rotating updrafts.",

    "Current Propensity to Convection":
        "Four-level: None / Slight / Moderate / Strong.  Rules-based initiation score blending CIN, CAPE, moisture trends, and observed towering cumulus.  "
        "Serves as the network’s trigger gauge for imminent storm development.",


    # ───────── BOUNDARIES & MESOSCALE FEATURES ─────────
    "Wind Hodograph":
        "Four-level: DCVZFavor / StrongWest / Westerly / Other.  Shape category of the 0–6 km hodograph from the Denver profiler.  DCVZFavor shows "
        "easterly backing with height—a classic Denver Convergence-Vorticity Zone signature.",

    "Mountain Outflow":
        "Three-level: None / Weak / Strong.  Strength of morning katabatic drainage off the Front Range from surface observations.  Strong outflow "
        "reinforces easterly upslope and sharpens low-level convergence.",

    "Morning Boundaries":
        "Three-level: None / Weak / Strong.  Presence and intensity of residual gust fronts, outflow pools, or stationary fronts at 12 Z.  "
        "Their strength guides where mesoscale lift may focus later.",

    "Boundaries":
        "Three-level: None / Weak / Strong.  Mid-afternoon (18 Z) update of boundary strength.  Intact, strong boundaries often act as foci for discrete supercells.",


    # ───────── SYNOPTIC & KINEMATIC FIELDS ─────────
    "Movement Features":
        "Four-level: StrongFront / MarkedUpper / OtherRapid / NoMajor.  Flag for the day’s most mobile synoptic feature—surface front, mid-level short wave, or none.  "
        "Rapid systems can quickly reshape stability and shear.",

    "Synoptic Forcing":
        "Five-level: SigNegative / NegToPos / SigPositive / PosToNeg / LittleChange.  Category of 500 hPa height tendencies over Colorado.  Significant height falls "
        "(SigNegative) denote large-scale ascent; rises imply subsidence.",

    "Wind Aloft":
        "Four-level: LV / SWQuad / NWQuad / AllElse.  500 hPa flow quadrant relative to a Four-Corners low.  Each quadrant conveys distinct shear and moisture transport.",

    "Wind Fields (Mountains)":
        "Two-level: Westerly / LVorOther.  700 hPa wind pattern immediately west of the Divide.  Westerly flow often brings a strong EML cap; LV allows deeper moisture aloft.",

    "Wind Fields (Plains)":
        "Six-level: LV / DenvCyclone / LongAnticyc / E_NE / SEQuad / WidespdDnsl.  850 hPa wind-pattern category across the plains.  SEQuad feeds Gulf moisture upslope; "
        "DenvCyclone captures the local Denver Cyclone circulation.",


    # ───────── CLOUD-MODULATED STABILITY METRICS ─────────
    "Low-Level Lapse Rate":
        "Four-level: CloseToDryAd / Steep / ModerateOrLe / Stable.  Surface-to-700 hPa lapse-rate descriptor from morning soundings.  "
        "CloseToDryAd (~9.8 K km⁻¹) suggests parcels stay buoyant; Steep indicates enhanced instability; Stable denotes weak lapse.",

    "Mid-Level Lapse Rate":
        "Three-level: CloseToDryAd / Steep / ModerateOrLe.  700–500 hPa lapse-rate descriptor.  Steep values (> 7 K km⁻¹) typify an Elevated Mixed Layer, "
        "adding CAPE above the boundary layer.",

    "Mean Relative Humidity":
        "Three-level: VeryMoist / Average / Dry.  Column-mean RH (surface to 500 hPa) from model analysis at 12 Z or 18 Z.  VeryMoist (> 70 %) supports storm longevity; "
        "Dry (< 40 %) can trigger evaporational cooling and stronger outflows.",

    "RH Ratio":
        "Three-level: MoistMDryL / DryMMoistL / Other.  Mid-vs-low RH contrast.  Moist aloft over a dry boundary layer (MoistMDryL) hints at elevated storms; "
        "the reverse supports classic surface-based hailers.",


    # ───────── DISCONTINUITIES ─────────
    "Surface Wind-Shift/Discontinuities":
        "Seven-level: DenvCyclone / E_W_N / E_W_S / MovingFtorOt / DryLine / None / Other.  Surface-boundary type/age code covering Denver Cyclone, drylines, "
        "outflow boundaries, Pacific fronts, and more—key convergence and gradient markers.",

    "Temperature Discontinuity":
        "Four-level: QStationary / Moving / None / Other.  Surface baroclinic-zone strength and motion.  A Moving tag often captures a Pacific front racing east.",


    # ───────── SCENARIOS & FORECAST PRODUCTS ─────────
    "Composite Plains Forecast":
        "Three-level: IncCapDecIns / LittleChange / DecCapIncIns.  Expert judgment on how plains capping (Cap) vs. instability (Ins) will evolve through 00 Z.",

    "Date":
        "Six-level: May15_Jun14 / Jun15_Jul1 / Jul2_Jul15 / Jul16_Aug10 / Aug11_Aug20 / Aug20_Sep15.  Coarse calendar bins to supply climatological priors.",

    "Scenario":
        "Eleven-level: A / B / C / D / E / F / G / H / I / J / K.  Synoptic-mesoscale pattern code distilled from 15 years of cases (e.g., A = post-frontal upslope).",

    "Scenario Rel. AM CIN":
        "Two-level: AB / CThruK.  Flags whether the scenario falls into low-CIN (A–B) or higher-CIN (C–K) groups for weighting the morning sounding.",

    "AM CIN in Scenario":
        "Three-level: LessThanAve / Average / MoreThanAve.  Observed 12 Z CIN compared to the scenario’s historical median.",

    "Capping in Scenario":
        "Three-level: LessThanAve / Average / MoreThanAve.  Observed 700 hPa cap strength compared to the scenario’s average.",

    "Scenario Rel. AM Instability":
        "Six-level: ABI / CDEJ / F / G / H / K.  Groups scenarios by their climatological morning CAPE distribution, helping gauge how unusual a sounding is.",

    "Lifted Index DEN 12Z":
        "Four-level: LIGt0 / N1GtLIGt_4 / N5GtLIGt_8 / LILt_8.  Bucketed 12 Z Lifted Index at Denver: > 0 K, −1 to −4 K, −5 to −8 K, < −8 K.",

    "AM Dew-Point (Plains)":
        "Three-level: Instability / Neutral / Stability.  Morning dew-point anomaly along I-25 vs. 30-year mean; positive anomalies boost CAPE.",

    "AM Instability w/in Scenario":
        "Three-level: LessUnstable / Average / MoreUnstable.  Morning CAPE percentile normalized by the scenario’s own climatology.",

    "Instability Scaling in Scenario":
        "Three-level: LessUnstable / Average / MoreUnstable.  Legacy CAPE-scaling variant retained for backward-compatibility testing.",

    "Scenario Rel. Regions 2/3/4":
        "Five-level: ACEFK / B / D / GJ / HI.  Maps each scenario to the primary NWS-Boulder forecast zones east of the Rockies.",

    "Scenario Rel. Plains Forecast":
        "Eleven-level: A / B / C / D / E / F / G / H / I / J / K.  Ties the current synoptic scenario to a baseline climatological plains forecast.",

    "Mountain Forecast":
        "Three-level: XNIL / SIG / SVR.  Categorical hail forecast west of I-25: no storms (XNIL), significant but sub-severe (SIG), or severe (SVR).",

    "Plains Forecast":
        "Three-level: XNIL / SIG / SVR.  Same categories for Region 1 (the Colorado plains).",

    "Regions 2/3/4 Forecast":
        "Three-level: XNIL / SIG / SVR.  Hail outlook for the adjoining NE-CO/WY/KS zones.",

    "Region 5 Forecast":
        "Three-level: XNIL / SIG / SVR.  Forecast for far-eastern Colorado and western Kansas fringe.",


    # ───────── DAMAGE-RELEVANT MOISTURE / TEMPERATURE FIELDS ─────────
    "Dew-Points":
        "Seven-level: LowEvrywhere / LowAtStation / LowSHighN / LowNHighS / LowMtsHighPl / HighEvrywher / Other.  "
        "18 Z surface dew-point at Denver bucketed in 2 K increments (< 0 °C up to ≥ 20 °C).  Higher bins denote richer boundary-layer moisture."
}

dataset_description_hailfinder = "The Hailfinder dataset was designed to forecast severe summer hail in northeastern Colorado."

hail_df_mapping = {
    'N07muVerMo': 'N0_7muVerMo',
    'SubjVertMo': 'SubjVertMo',
    'QGVertMotion': 'QGVertMotion',
    'CombVerMo': 'CombVerMo',
    'AreaMesoALS': 'AreaMeso_ALS',
    'SatContMoist': 'SatContMoist',
    'RaoContMoist': 'RaoContMoist',
    'CombMoisture': 'CombMoisture',
    'AreaMoDryAir': 'AreaMoDryAir',
    'VISCloudCov': 'VISCloudCov',
    'IRCloudCover': 'IRCloudCover',
    'CombClouds': 'CombClouds',
    'CldShadeOth': 'CldShadeOth',
    'AMInstabMt': 'AMInstabMt',
    'InsInMt': 'InsInMt',
    'WndHodograph': 'WndHodograph',
    'OutflowFrMt': 'OutflowFrMt',
    'MorningBound': 'MorningBound',
    'Boundaries': 'Boundaries',
    'CldShadeConv': 'CldShadeConv',
    'CompPlFcst': 'CompPlFcst',
    'CapChange': 'CapChange',
    'LoLevMoistAd': 'LoLevMoistAd',
    'InsChange': 'InsChange',
    'MountainFcst': 'MountainFcst',
    'Date': 'Date',
    'Scenario': 'Scenario',
    'ScenRelAMCIN': 'ScenRelAMCIN',
    'MorningCIN': 'MorningCIN',
    'AMCINInScen': 'AMCINInScen',
    'CapInScen': 'CapInScen',
    'ScenRelAMIns': 'ScenRelAMIns',
    'LIfr12ZDENSd': 'LIfr12ZDENSd',
    'AMDewptCalPl': 'AMDewptCalPl',
    'AMInsWliScen': 'AMInsWliScen',
    'InsSclInScen': 'InsSclInScen',
    'ScenRel34': 'ScenRel3_4',
    'LatestCIN': 'LatestCIN',
    'LLIW': 'LLIW',
    'CurPropConv': 'CurPropConv',
    'ScnRelPlFcst': 'ScnRelPlFcst',
    'PlainsFcst': 'PlainsFcst',
    'N34StarFcst': 'N34StarFcst',
    'R5Fcst': 'R5Fcst',
    'Dewpoints': 'Dewpoints',
    'LowLLapse': 'LowLLapse',
    'MeanRH': 'MeanRH',
    'MidLLapse': 'MidLLapse',
    'MvmtFeatures': 'MvmtFeatures',
    'RHRatio': 'RHRatio',
    'SfcWndShfDis': 'SfcWndShfDis',
    'SynForcng': 'SynForcng',
    'TempDis': 'TempDis',
    'WindAloft': 'WindAloft',
    'WindFieldMt': 'WindFieldMt',
    'WindFieldPln': 'WindFieldPln'
}

def fetch_hailfinder():
    df = pd.read_csv(f'/net/dali/home/mscbio/rul98/CausalLLM/data/hailfinder.csv')

    # Normalize all missing values (including string "<NA>")
    df = df.replace(["<NA>", "nan", pd.NA], "None")

    # Rename columns
    df = df.rename(columns=hail_df_mapping)
    df = df.rename(columns=rename_mapping_hailfinder)

    # Apply value mappings
    for col, mapping in value_mappings_hailfinder.items():
        if col in df.columns:
            df[col] = df[col].astype("str").map(mapping).astype("Int64")

    reader = BIFReader(f'/net/dali/home/mscbio/rul98/CausalLLM/data/hailfinder.bif')
    G_model = reader.get_model()

    # Create a directed graph from the edges
    GroundTruth = nx.DiGraph()
    GroundTruth.add_nodes_from(G_model.nodes())
    GroundTruth.add_edges_from(G_model.edges())
    GroundTruth = nx.relabel_nodes(GroundTruth, rename_mapping_hailfinder)
    pos_data = nx.spring_layout(GroundTruth)
    # print(set(GroundTruth.nodes()) - set(df.columns), set(df.columns) - set(GroundTruth.nodes()))
    return df, GroundTruth, pos_data