import numpy as np
import pandas as pd
import networkx as nx
from pgmpy.readwrite import BIFReader
rename_mapping_hepar2 = {
    'alcoholism'     : 'Alcoholism (risk)',
    'vh_amn'         : 'Viral Hepatitis (A/M/N)',
    'hepatotoxic'    : 'Hepatotoxic Exposure',
    'THepatitis'     : 'Toxic Hepatitis',
    'hospital'       : 'Hospitalisation',
    'surgery'        : 'Surgery',
    'gallstones'     : 'Gallstones',
    'choledocholithotomy': 'Choledocholithotomy',
    'injections'     : 'Therapeutic or Diagnostic Injections',
    'transfusion'    : 'Blood Transfusion',
    'ChHepatitis'    : 'Chronic Hepatitis Status',
    'sex'            : 'Sex',
    'age'            : 'Age Group',
    'PBC'            : 'Primary Biliary Cholangitis (risk)',
    'fibrosis'       : 'Fibrosis',
    'diabetes'       : 'Diabetes',
    'obesity'        : 'Obesity',
    'Steatosis'      : 'Steatosis (Fatty Liver)',
    'Cirrhosis'      : 'Cirrhosis Status',
    'Hyperbilirubinemia': 'Hyper-bilirubinaemia',
    'triglycerides'  : 'Triglycerides',
    'RHepatitis'     : 'Recurrent Hepatitis',
    'fatigue'        : 'Fatigue',
    'bilirubin'      : 'Bilirubin',
    'itching'        : 'Pruritus (Itching)',
    'upper_pain'     : 'Upper Abdominal Pain',
    'fat'            : 'Fat-related Finding',
    'pain_ruq'       : 'Pain in Right Upper Quadrant',
    'pressure_ruq'   : 'Pressure in Right Upper Quadrant',
    'phosphatase'    : 'Alkaline Phosphatase',
    'skin'           : 'Skin Symptoms',
    'ama'            : 'Anti-mitochondrial Antibodies',
    'le_cells'       : 'LE Cells (autoimmune sign)',
    'joints'         : 'Joint Symptoms',
    'pain'           : 'General Pain',
    'proteins'       : 'Protein Level',
    'edema'          : 'Edema',
    'platelet'       : 'Platelet Count',
    'inr'            : 'INR (Coagulation abnormality)',
    'bleeding'       : 'Bleeding',
    'flatulence'     : 'Flatulence',
    'alcohol'        : 'Current Alcohol Use',
    'encephalopathy' : 'Encephalopathy',
    'urea'           : 'Urea',
    'ascites'        : 'Ascites',
    'hepatomegaly'   : 'Hepatomegaly',
    'hepatalgia'     : 'Hepatalgia (Liver Pain)',
    'density'        : 'Abnormal Liver Density',
    'ESR'            : 'ESR (Inflammation marker)',
    'alt'            : 'ALT (Liver enzyme)',
    'ast'            : 'AST (Liver enzyme)',
    'amylase'        : 'Amylase (Pancreatic/liver enzyme)',
    'ggtp'           : 'GGT (Liver enzyme)',
    'cholesterol'    : 'Cholesterol Level',
    'hbsag'          : 'Hepatitis B Surface Antigen',
    'hbsag_anti'     : 'Hepatitis B Surface Antibody (anti-HBs)',
    'anorexia'       : 'Anorexia',
    'nausea'         : 'Nausea',
    'spleen'         : 'Splenomegaly',
    'consciousness'  : 'Altered Consciousness',
    'spiders'        : 'Spider Angioma',
    'jaundice'       : 'Jaundice',
    'albumin'        : 'Albumin',
    'edge'           : 'Liver Edge Abnormality',
    'irregular_liver': 'Irregular Liver Surface',
    'hbc_anti'       : 'Hepatitis B Core Antibody',
    'hcv_anti'       : 'Hepatitis C Antibody',
    'palms'          : 'Palmar Erythema',
    'hbeag'          : 'Hepatitis B e Antigen',
    'carcinoma'      : 'Carcinoma'
}

variable_description_hepar2 = {
    'Alcoholism (risk)': 'History of alcoholism (history of harmful drinking), a major risk factor for chronic liver disease and cirrhosis. Presence increases likelihood of hepatic injury.','Viral Hepatitis (A/M/N)': 'Infection with hepatitis viruses A, M, or N (viral hepatitis), which can cause acute or chronic liver inflammation.',
    'Hepatotoxic Exposure': 'Exposure to substances toxic to the liver (drugs, chemicals) that can cause liver injury.',
    'Toxic Hepatitis': 'Liver inflammation due to toxic insults (e.g., drugs, toxins).',
    'Hospitalisation': 'Previous hospitalization, indicating severity or complications requiring inpatient care.',
    'Surgery': 'Recent surgical history, which may relate to risk factors or precipitating events.',
    'Gallstones': 'Presence of gallstones, a potential cause of biliary obstruction and liver-related symptoms.',
    'Choledocholithotomy': 'Surgical removal of common bile duct stones, indicative of prior biliary tract disease.',
    'Therapeutic or Diagnostic Injections': 'Therapeutic or diagnostic injections (potential blood-borne exposure) increasing risk for hepatitis.',
    'Blood Transfusion': 'History of blood transfusion, a risk factor for transmissible liver infections (e.g., hepatitis B/C).',
    'Chronic Hepatitis Status': 'Status of chronic hepatitis: active, persistent, or absent; reflects ongoing liver inflammation.',
    'Sex': 'Biological sex; some liver conditions have sex-based prevalence differences.',
    'Age Group': 'Categorized age (older age often increases risk or modifies disease presentation).',
    'Primary Biliary Cholangitis (risk)': 'Primary biliary cholangitis is an auto-immune disease that is ~9 × more common in middle-aged females than males and is rare before age 30.',
    'Fibrosis': 'Fibrotic scarring of liver tissue, representing chronic injury progression.',
    'Diabetes': 'Presence of diabetes mellitus, a metabolic comorbidity that affects liver disease progression.',
    'Obesity': 'Obesity, a risk factor for nonalcoholic fatty liver disease and steatosis.',
    'Steatosis (Fatty Liver)': 'Fat accumulation in hepatocytes, often an early manifestation of metabolic liver injury.',
    'Cirrhosis Status': 'Liver structural damage categorized as absent, compensated, or decompensated cirrhosis.',
    'Hyper-bilirubinaemia': 'Elevated bilirubin in blood, reflecting impaired bilirubin handling by the liver (jaundice).',
    'Triglycerides': 'Blood triglyceride levels, reflecting lipid metabolism abnormalities.',
    'Recurrent Hepatitis': 'Repeated episodes of hepatitis, contributing to chronic liver stress.',
    'Fatigue': 'General fatigue, a common symptom in liver dysfunction.',
    'Bilirubin': 'Level of bilirubin, an indicator of liver excretory function; higher values reflect worsening cholestasis/jaundice.',
    'Pruritus (Itching)': 'Itching, often a sign of cholestasis or biliary obstruction.',
    'Upper Abdominal Pain': 'Pain in upper abdomen, can be due to hepatic or biliary pathology.',
    'Fat-related Finding': 'Clinical finding related to fat deposition or metabolism abnormalities in liver.',
    'Pain in Right Upper Quadrant': 'Localized liver-area pain, often from hepatic inflammation or biliary disease.',
    'Pressure in Right Upper Quadrant': 'Sensation of pressure over liver area, sign of organ enlargement or inflammation.',
    'Alkaline Phosphatase': 'Cholestatic liver enzyme; elevations may signal biliary obstruction or injury.',
    'Skin Symptoms': 'Dermatologic manifestations related to liver disease (e.g., jaundice, spider angiomas).',
    'Anti-mitochondrial Antibodies': 'Autoantibodies strongly associated with primary biliary cholangitis (autoimmune etiology).',
    'LE Cells (autoimmune sign)': 'Indicator of autoimmune activity, may be present in autoimmune liver disorders.',
    'Joint Symptoms': 'Arthralgias or joint complaints, which can accompany autoimmune liver disease.',
    'General Pain': 'Nonspecific pain which may reflect systemic or hepatic pathology.',
    'Protein Level': 'Serum protein levels, e.g., albumin/globulins, reflect synthetic function of liver.',
    'Edema': 'Fluid accumulation, often due to hypoalbuminemia or portal hypertension in liver disease.',
    'Platelet Count': 'Thrombocytopenia may reflect portal hypertension or hypersplenism in chronic liver disease.',
    'INR (Coagulation abnormality)': 'Impaired clotting cascade function; prolonged INR suggests synthetic liver dysfunction.',
    'Bleeding': 'Clinical bleeding tendency, consequence of coagulopathy or portal hypertension.',
    'Flatulence': 'Gastrointestinal symptom; may occur in hepatic dysfunction contexts.',
    'Current Alcohol Use': 'HEPAR II questionnaire: “Do you presently consume alcohol? (yes/no)”. In many end-stage cirrhotics alcohol intake had already stopped.',
    'Encephalopathy': 'Cognitive alteration from liver failure (hepatic encephalopathy).',
    'Urea': 'Blood urea reflects metabolic balance; may be altered in liver dysfunction or related comorbidities.',
    'Ascites': 'Abdominal fluid accumulation secondary to portal hypertension or hypoalbuminemia.',
    'Hepatomegaly': 'Enlarged liver, can signify inflammation, congestion, or infiltration.',
    'Hepatalgia (Liver Pain)': 'Pain localized to liver, indicating hepatic inflammation or capsular stretch.',
    'Abnormal Liver Density': 'Imaging finding suggestive of structural changes in liver tissue.',
    'ESR (Inflammation marker)': 'Erythrocyte sedimentation rate, nonspecific marker of systemic inflammation.',
    'ALT (Liver enzyme)': 'Alanine aminotransferase elevation indicates hepatocellular injury.',
    'AST (Liver enzyme)': 'Aspartate aminotransferase elevation indicates hepatocellular or muscular injury.',
    'Amylase (Pancreatic/liver enzyme)': 'Elevation may reflect pancreatic involvement or hepatic stress.',
    'GGT (Liver enzyme)': 'Gamma-glutamyl transferase; elevated in cholestasis and alcohol-related liver injury.',
    'Cholesterol Level': 'Lipid profile; low levels may reflect synthetic failure, high may reflect metabolic dysregulation.',
    'Hepatitis B Surface Antigen': 'Marker of active hepatitis B infection.', 
    'Hepatitis B Surface Antibody (anti-HBs)': 'Marker of immunity or recovery from hepatitis B; presence usually protective.', 
    'Anorexia': 'Loss of appetite, common in systemic or hepatic illness.',
    'Nausea': 'Gastrointestinal discomfort associated with liver disorders.',
    'Splenomegaly': 'Enlarged spleen typically from portal hypertension in chronic liver disease.',
    'Altered Consciousness': 'Changes in mental status, often due to hepatic encephalopathy.',
    'Spider Angioma': 'Vascular skin lesion indicative of chronic liver disease and hyperestrogenism.',
    'Jaundice': 'Yellowish discoloration of skin/eyes from hyperbilirubinemia.',
    'Albumin': 'Serum albumin; low levels signify impaired synthetic liver function.',
    'Liver Edge Abnormality': 'Palpable irregularity of liver edge on exam, sign of chronic structural disease.',
    'Irregular Liver Surface': 'Surface irregularity suggesting cirrhosis or fibrosis.',
    'Hepatitis B Core Antibody': 'Indicates past or ongoing hepatitis B infection (core exposure).',
    'Hepatitis C Antibody': 'Marker of exposure to hepatitis C virus (may indicate chronic infection).',
    'Palmar Erythema': 'Reddening of palms associated with chronic liver disease.',
    'Hepatitis B e Antigen': 'Marker of active viral replication and high infectivity in hepatitis B.',
    'Carcinoma': 'Presence of hepatocellular carcinoma or liver malignancy.'
}

# Value mappings: adverse=1, normal/benign=0; for multi-level labs: ordinal severity (higher → worse)
value_mappings_hepar2 = {
    # Binary risk/exposure/presence (1 adverse)
    'Alcoholism (risk)': {'present': 1, 'absent': 0},
    'Viral Hepatitis (A/M/N)': {'present': 1, 'absent': 0},
    'Hepatotoxic Exposure': {'present': 1, 'absent': 0},
    'Toxic Hepatitis': {'present': 1, 'absent': 0},
    'Hospitalisation': {'present': 1, 'absent': 0},
    'Surgery': {'present': 1, 'absent': 0},
    'Gallstones': {'present': 1, 'absent': 0},
    'Choledocholithotomy': {'present': 1, 'absent': 0},
    'Injections': {'present': 1, 'absent': 0},
    'Blood Transfusion': {'present': 1, 'absent': 0},
    'Chronic Hepatitis Status': {'active': 1, 'persistent': 1, 'absent': 0},
    'Sex': {'female': 0, 'male': 1},  # assuming higher risk/prevalence in males (contextual)
    'Age Group': {'age0_30': 0, 'age31_50': 1, 'age51_65': 2, 'age65_100': 3},  # increasing risk with age
    'Primary Biliary Cirrhosis': {'present': 1, 'absent': 0},
    'Fibrosis': {'present': 1, 'absent': 0},
    'Diabetes': {'present': 1, 'absent': 0},
    'Obesity': {'present': 1, 'absent': 0},
    'Steatosis (Fatty Liver)': {'present': 1, 'absent': 0},
    'Cirrhosis Status': {'absent': 0, 'compensate': 1, 'decompensate': 2},
    'Hyper-bilirubinaemia': {'present': 1, 'absent': 0},
    'Triglycerides': {'a1_0': 0, 'a3_2': 1, 'a17_4': 2},  # higher dyslipidemia worse
    'Recurrent Hepatitis': {'present': 1, 'absent': 0},
    'Fatigue': {'present': 1, 'absent': 0},
    'Bilirubin': {'a1_0': 0, 'a6_2': 1, 'a19_7': 2, 'a88_20': 3},  # higher bilirubin worse
    'Pruritus (Itching)': {'present': 1, 'absent': 0},
    'Upper Abdominal Pain': {'present': 1, 'absent': 0},
    'Fat-related Finding': {'present': 1, 'absent': 0},
    'Pain in Right Upper Quadrant': {'present': 1, 'absent': 0},
    'Pressure in Right Upper Quadrant': {'present': 1, 'absent': 0},
    'Alkaline Phosphatase': {'a239_0': 0, 'a699_240': 1, 'a4000_700': 2},  # elevated suggests cholestasis
    'Skin Symptoms': {'present': 1, 'absent': 0},
    'Anti-mitochondrial Antibodies': {'present': 1, 'absent': 0},
    'LE Cells (autoimmune sign)': {'present': 1, 'absent': 0},
    'Joint Symptoms': {'present': 1, 'absent': 0},
    'General Pain': {'present': 1, 'absent': 0},
    'Protein Level': {'a5_2': 1, 'a10_6': 0},  # low protein adverse
    'Edema': {'present': 1, 'absent': 0},
    'Platelet Count': {'a597_300': 0, 'a299_150': 1, 'a149_100': 2, 'a99_0': 3},  # thrombocytopenia worse
    'INR (Coagulation abnormality)': {'a69_0': 0, 'a109_70': 1, 'a200_110': 2},  # prolonged INR worse
    'Bleeding': {'present': 1, 'absent': 0},
    'Flatulence': {'present': 1, 'absent': 0},
    'Alcohol Use': {'present': 1, 'absent': 0},
    'Encephalopathy': {'present': 1, 'absent': 0},
    'Urea': {'a39_0': 0, 'a49_40': 1, 'a165_50': 2},
    'Ascites': {'present': 1, 'absent': 0},
    'Hepatomegaly': {'present': 1, 'absent': 0},
    'Hepatalgia (Liver Pain)': {'present': 1, 'absent': 0},
    'Abnormal Liver Density': {'present': 1, 'absent': 0},
    'ESR (Inflammation marker)': {'a14_0': 0, 'a49_15': 1, 'a200_50': 2},
    'ALT (Liver enzyme)': {'a34_0': 0, 'a99_35': 1, 'a199_100': 2, 'a850_200': 3},
    'AST (Liver enzyme)': {'a39_0': 0, 'a149_40': 1, 'a399_150': 2, 'a700_400': 3},
    'Amylase (Pancreatic/liver enzyme)': {'a299_0': 0, 'a499_300': 1, 'a1400_500': 2},
    'GGT (Liver enzyme)': {'a9_0': 0, 'a29_10': 1, 'a69_30': 2, 'a640_70': 3},
    'Cholesterol Level': {'a349_240': 0, 'a239_0': 1, 'a999_350': 1},  # low or high can be abnormal
    'Hepatitis B Surface Antigen': {'present': 1, 'absent': 0},
    'Hepatitis B Surface Antibody (anti-HBs)': {'present': 0, 'absent': 0},  # protective / neutral (contextual)
    'Anorexia': {'present': 1, 'absent': 0},
    'Nausea': {'present': 1, 'absent': 0},
    'Splenomegaly': {'present': 1, 'absent': 0},
    'Altered Consciousness': {'present': 1, 'absent': 0},
    'Spider Angioma': {'present': 1, 'absent': 0},
    'Jaundice': {'present': 1, 'absent': 0},
    'Albumin': {'a70_50': 0, 'a49_30': 1, 'a29_0': 2},  # low albumin worse
    'Liver Edge Abnormality': {'present': 1, 'absent': 0},
    'Irregular Liver Surface': {'present': 1, 'absent': 0},
    'Hepatitis B Core Antibody': {'present': 1, 'absent': 0},
    'Hepatitis C Antibody': {'present': 1, 'absent': 0},
    'Palmar Erythema': {'present': 1, 'absent': 0},
    'Hepatitis B e Antigen': {'present': 1, 'absent': 0},
    'Carcinoma': {'present': 1, 'absent': 0}
}

dataset_description_hepar2 = (
    "HEPAR2 (also called HEPAR II) is a causal Bayesian network model developed for the "
    "diagnosis of liver disorders, combining expert knowledge with clinical data from a "
    "well-maintained hepatology patient database. The model encodes risk factors (e.g., alcoholism, "
    "viral and toxic exposures), underlying diseases (chronic hepatitis, primary biliary cirrhosis, "
    "cirrhosis, carcinoma), symptoms/findings (jaundice, ascites, spider angioma, encephalopathy), "
    "and laboratory test results (bilirubin, liver enzymes, coagulation, synthetic function). "
    "The structure and parameters were elicited from hepatology experts and patient records to support "
    "clinical diagnosis and training. Severity is reflected via categorical/ordinal variable levels, and "
    "presence of pathology (e.g., active hepatitis, fibrosis, or abnormal labs) increases diagnostic risk. "
    "The provenance of the model is documented in Onisko’s Ph.D. dissertation and related publications on "
    "the HEPAR II system (Wasyluk et al. 2001; Onisko 2003)."
)

def fetch_hepar2():
    df = pd.read_csv(f'/net/dali/home/mscbio/rul98/CausalLLM/data/hepar2_20000.csv')
    df = df.rename(columns=rename_mapping_hepar2)

    # Apply value mappings
    for col, mapping in value_mappings_hepar2.items():
        if col in df.columns:
            df[col] = df[col].astype("str").map(mapping).astype("Int64")  

    reader = BIFReader(f'/net/dali/home/mscbio/rul98/CausalLLM/data/hepar2.bif')
    G_model = reader.get_model()

    # Create a directed graph from the edges
    GroundTruth = nx.DiGraph()
    GroundTruth.add_nodes_from(G_model.nodes())
    GroundTruth.add_edges_from(G_model.edges())
    GroundTruth = nx.relabel_nodes(GroundTruth, rename_mapping_hepar2)
    pos_data = nx.spring_layout(GroundTruth)
    # print(set(GroundTruth.nodes()) - set(df.columns), set(df.columns) - set(GroundTruth.nodes()))
    return df, GroundTruth, pos_data