import pandas as pd

CSV_FILE_PATH = "fda_ai_records.csv"

def load_data(file_path=CSV_FILE_PATH):
    """Loads data from the CSV file into a pandas DataFrame."""
    try:
        df = pd.read_csv(file_path)
        print(f"Successfully loaded data from {file_path}")
        return df
    except FileNotFoundError:
        print(f"Error: The file {file_path} was not found.")
        return None
    except Exception as e:
        print(f"An error occurred while loading the CSV file: {e}")
        return None

def display_head(df, n=5):
    """Displays the first n rows of the DataFrame."""
    if df is not None:
        print(f"\n--- First {n} Rows ---")
        print(df.head(n))
    else:
        print("DataFrame not loaded.")

def display_info(df):
    """Displays information about the DataFrame."""
    if df is not None:
        print("\n--- DataFrame Info ---")
        df.info()
    else:
        print("DataFrame not loaded.")

def display_description(df):
    """Displays descriptive statistics of the DataFrame."""
    if df is not None:
        print("\n--- DataFrame Description ---")
        # include='all' to get stats for non-numeric columns too
        print(df.describe(include='all')) 
    else:
        print("DataFrame not loaded.")

def get_record_by_submission_id(df, submission_id):
    """Finds and displays a record by its Submission Number."""
    if df is not None:
        print(f"\n--- Record for Submission Number: {submission_id} ---")
        record = df[df['submission_number'] == submission_id]
        if not record.empty:
            print(record.to_string()) # .to_string() to print full record if wide
        else:
            print(f"No record found with Submission Number: {submission_id}")
    else:
        print("DataFrame not loaded.")

def list_columns(df):
    """Lists all column names in the DataFrame."""
    if df is not None:
        print("\n--- Column Names ---")
        print(df.columns.tolist())
    else:
        print("DataFrame not loaded.")

def count_total_records(df):
    """Counts and prints the total number of records."""
    if df is not None:
        total_records = len(df)
        print(f"\n--- Total Records ---")
        print(f"Total number of records: {total_records}")
    else:
        print("DataFrame not loaded.")

def count_unique_submission_ids(df):
    """Counts and prints the number of unique Submission Numbers."""
    if df is not None:
        if 'submission_number' in df.columns:
            unique_ids = df['submission_number'].nunique()
            print(f"\n--- Unique Submission Numbers ---")
            print(f"Number of unique Submission Numbers: {unique_ids}")
        else:
            print("Error: 'submission_number' column not found.")
    else:
        print("DataFrame not loaded.")

def count_records_without_thesis(df):
    """Counts records without a thesis statement and those with a PDF file."""
    if df is not None:
        no_thesis = df['thesis'].isnull() | (df['thesis'] == '')
        print("No thesis", no_thesis)
        count_no_thesis = no_thesis.sum()
        count_no_thesis_with_pdf = df[no_thesis]['summary_pdf_link'].notnull().sum()

        print("\n--- Records Without Thesis Statement ---")
        print(f"Number of records without a thesis statement: {count_no_thesis}")
        print(f"Number of those records with a PDF file: {count_no_thesis_with_pdf}")
    else:
        print("DataFrame not loaded.")

def display_top_gemini_cost(df, n=5):
    """Displays the top n records by highest gemini_cost_summary."""
    if df is not None:
        if 'gemini_cost_summary' in df.columns:
            try:
                df_sorted = df.copy()
                # Convert the column to numeric in case it's stored as string
                df_sorted['gemini_cost_summary'] = pd.to_numeric(df_sorted['gemini_cost_summary'], errors='coerce')
                top_n = df_sorted.sort_values(by='gemini_cost_summary', ascending=False).head(n)
                print(f"\n--- Top {n} Records by Gemini Cost Summary ---")
                print(top_n[['submission_number', 'device_model', 'company', 'gemini_cost_summary']].to_string(index=False))
            except Exception as e:
                print(f"Error processing gemini_cost_summary: {e}")
        else:
            print("Error: 'gemini_cost_summary' column not found.")
    else:
        print("DataFrame not loaded.")


if __name__ == "__main__":
    # Load the data
    fda_data = load_data()

    if fda_data is not None:
        # Display some basic views
        list_columns(fda_data)
        display_head(fda_data, n=10)
        display_info(fda_data)
        # display_description(fda_data) # Can be verbose, uncomment if needed

        # Display counts
        count_total_records(fda_data)
        count_unique_submission_ids(fda_data)
        count_records_without_thesis(fda_data)

        display_top_gemini_cost(fda_data, n=5)

        # Example of fetching a specific record
        # Try with a known submission ID, e.g., the one that was manually updated
        # Or one from the head if you see one.
        # Replace 'K192004' with an actual ID from your CSV if needed for testing.
        example_submission_id = 'K192004' 
        if 'submission_number' in fda_data.columns and 'K192004' in fda_data['submission_number'].values: # Check column exists first
             get_record_by_submission_id(fda_data, example_submission_id)
        else:
            # If K192004 is not present for some reason, try first ID from head
            if not fda_data.empty and 'submission_number' in fda_data.columns: # Check column exists first
                first_id = fda_data.iloc[0]['submission_number']
                print(f"\nNote: K192004 not found or 'submission_number' column missing, trying with first ID: {first_id}")
                get_record_by_submission_id(fda_data, first_id)
            else:
                print("\nNote: DataFrame is empty, cannot fetch example record.")

        print("\n--- End of Data Viewer Script ---")
        print(f"To use these functions interactively, you can run python -i {__file__}")
