import pandas as pd
import re
import os
import re
import sys
from pathlib import Path


import pandas as pd 
import wandb
from tqdm import tqdm

wandb.login(key="769ce78fdc904bf194e2ccf5388ba9178218b898")

def get_df_from_wandb():
    api = wandb.Api()

    # Project is specified by <entity/project-name>
    runs = api.runs("reznov3395-simon-fraser-university/normal_train")

    summary_list, config_list, name_list, state_list, benchmarks = [], [], [], [], []
    for wrun in tqdm(runs, desc= 'going through runs'): 
        # .summary contains the output keys/values for metrics like accuracy.
        #  We call ._json_dict to omit large files 
        summary_list.append(wrun.summary._json_dict)

        # .config contains the hyperparameters.
        #  We remove special values that start with _.
        config_list.append(
            {k: v for k,v in wrun.config.items()
            if not k.startswith('_')})

        # .name is the human-readable name of the run.
        name_list.append(wrun.name)
        state_list.append(wrun.state)
        benchmarks.append(wrun.config['benchmark'])
        

    runs_df = pd.DataFrame({
        # "summary": summary_list,
        # "config": config_list,
        "Name": name_list,
        'State': state_list,
        'Benchmarks': benchmarks
        })
   
    return runs_df


def create_combined_latex_report(tasks, latex_dir, output_path, all_latex_content = None):
    """
    Combine multiple LaTeX tables into a single comprehensive report.
    
    Args:
        tasks: List of task names
        latex_dir: Directory containing individual LaTeX table files
        output_path: Path where the combined LaTeX document should be saved
    """
    # Create the document preamble
    preamble = [
    "\\documentclass{article}",
    "\\usepackage{graphicx} % Required for inserting images",
    "\\usepackage{xcolor}",
    "\\usepackage{tabularx}",
    "\\usepackage{makecell}",
    "\\usepackage{float}   % in the preamble",
    "\\setcellgapes{3pt}\\makegapedcells   % optional: nicer spacing",
    "\\title{Nips Plots}",
    "\\author{kka151}",
    "\\date{April 2025}",
    "\\begin{document}",
    "\\maketitle",
    "\\section{Normal Trains}",           # ← added comma
    (
        "\\textcolor{green}{Green}: Complete, "
        "\\textcolor{blue}{Blue}: Running, "
        "\\textcolor{orange}{Orange}: Failed, "
        "\\textcolor{black}{Black}: Missing, "
        "\\textcolor{red}{Red}: Not exist"
    ),
]

    
    # Group tasks by type for better organization if needed
    document_content = preamble.copy()
    
    # Add each task's table as a subsection
    for task in tasks:
        # Create a nicer formatted name for the subsection
        task_name = task.replace('_', ' ').title()
        document_content.append(f"\\subsection{{{task_name}}}")
        
        # Read the LaTeX table from the file
        if all_latex_content is None:
            latex_file_path = Path(latex_dir) / f"missing_runs_normal_table_{task}.tex"
            try:
                with open(latex_file_path, 'r') as f:
                    latex_content = f.read()
                
                # Add the table content to the document
                document_content.append(latex_content)
                document_content.append("") # Empty line for readability
                
            except FileNotFoundError:
                document_content.append(f"\\textit{{Table for {task_name} not found.}}")
                document_content.append("")
        else:
            document_content.append(all_latex_content[task])
            document_content.append("") # Empty line for readability
    
    # Close the document
    document_content.append("\\end{document}")
    
    # Write the complete document to file
    with open(output_path, 'w') as f:
        f.write('\n'.join(document_content))
    
    print(f"Combined LaTeX report saved to {output_path}")
    return document_content

# # Example usage after running your code:
# latex_dir = Path(__file__).parent / "results" / "missing_normal"
# output_path = Path(__file__).parent / "results" / "combined_runs_report.tex"

# create_combined_latex_report(tasks, latex_dir, output_path)



# --- pattern: one or more "(opt, val)" groups separated by commas -------------
TUPLE_LIST = re.compile(
    r"""
    (?P<list>
        \([^()]*?\)               # one "(…)" group, non-greedy
        (?:,\s*\([^()]*?\))*      # followed by zero or more ", (…)…" groups
    )
    """,
    re.VERBOSE,
)


def makecell_replacer(match: re.Match) -> str:
    cell = match.group("list")
    if "missing" in cell:                      # keep "missing" cells unchanged
        return cell

    # split the tuple list, strip outer parens/spaces, rejoin with line breaks
    items = [part.strip(" ()") for part in cell.split("),")]
    return "\\makecell[l]{" + r"\\".join(items) + "}"


def transform_table(tex: str) -> str:
    """Convert a tabular environment to tabularx + wrapped cells."""
    tex = re.sub(
        r"\\begin\{tabular\}\{\|l\|c\|c\|c\|\}",
        r"\\begin{tabularx}{\\textwidth}{|l|X|X|X|}",
        tex,
    )
    tex = TUPLE_LIST.sub(makecell_replacer, tex)
    return tex
# Update the parsing logic to work with the "name" column
def check_wandb_runs(csv_path=None, runs_df=None, desired_agents=None, output_path=None, latex_output_path=None, dont_consider_combination=False, task_name=''):
    # either Load the CSV file or use the runns_df
    if csv_path is not None:
        if not os.path.exists(csv_path):
            raise FileNotFoundError(f"The file {csv_path} does not exist.")
        
        df = pd.read_csv(csv_path)
    elif runs_df is not None:
        # only keep the rows with the given task name as benchmark
        if task_name != '':
            df = runs_df.loc[runs_df["Benchmarks"] == task_name].copy()
        else:
            df = runs_df
        
    
    # Check if "name" column exists
    if "Name" not in df.columns:
        raise ValueError("CSV file must contain a 'name' column with run names")
    
    # Extract agent information from the "name" column
    available_agents = set()
    runs_info = []
    
    pattern = r'([A-Za-z0-9]+)Agent_[^_]+_([a-z]+)_([0-9.]+)_.*?_(\d+)'
    
    for _, row in df[['Name', 'State']].iterrows():
        run_name = row['Name']
        state = row['State']

        match = re.search(pattern, run_name)
        if match:
            baseline, optimizer, lr, seed = match.groups()
            agent_name = f"{baseline}Agent"
            available_agents.add(agent_name)

            runs_info.append({
                'agent': agent_name,
                'optimizer': optimizer,
                'lr': float(lr),
                'seed': int(seed),
                'state': state
            })
    
    
    # Print some debug info
    print(f"Found {len(available_agents)} unique agents")
    print(f"Found {len(runs_info)} total runs")
    print(f"Available agents: {sorted(list(available_agents))}")
    
    available_agents = sorted(list(available_agents))
    
    # If desired_agents is not provided, use all available agents
    if desired_agents is None:
        desired_agents = available_agents
    else:
        # Ensure all desired agents end with "Agent"
        desired_agents = [agent if agent.endswith("Agent") else f"{agent}Agent" for agent in desired_agents]
    
    # Create structures to track runs by agent and seed
    run_status = {}
    running_runs = {}
    failed_runs = {}
    
    for agent in desired_agents:
        run_status[agent] = {0: [], 1: [], 2: []}
        running_runs[agent] = {0: [], 1: [], 2: []}
        failed_runs[agent] = {0: [], 1: [], 2: []}
        
        if agent == "NeuroSyncAgent":
            # For NeuroSync, we only track presence/absence
            for seed in [0, 1, 2]:
                # Start by assuming all combinations are missing
                run_status[agent][seed] = "not present"
                running_runs[agent][seed] = False
                failed_runs[agent][seed] = False
        else:
            # For other agents, track specific missing (optimizer, lr) pairs
            for seed in [0, 1, 2]:
                # Start by assuming all combinations are missing
                # run_status[agent][seed] = [("adam", 0.001), ("adam", 0.01), ("sgd", 0.001), ("sgd", 0.01)]
                run_status[agent][seed] = [("adam", 0.001), ("sgd", 0.01)]
                running_runs[agent][seed] = []
                failed_runs[agent][seed] = []
    
    # Mark runs by their status
    for run in runs_info:
        agent = run['agent']
        if agent not in desired_agents:
            continue
            
        seed = run['seed']
        pair = (run['optimizer'], run['lr'])
        
        if run['state'] == 'running':
            if agent == "NeuroSyncAgent":
                running_runs[agent][seed] = True
            else:
                if pair not in running_runs[agent][seed]:
                    running_runs[agent][seed].append(pair)
                # Also remove from missing list
                if pair in run_status[agent][seed]:
                    run_status[agent][seed].remove(pair)
                if dont_consider_combination:  # we only wanted one config for this run
                     run_status[agent][seed] = []
                
        elif run['state'] in ['failed', 'killed']:
            if agent == "NeuroSyncAgent":
                failed_runs[agent][seed] = True
            else:
                if pair not in failed_runs[agent][seed]:
                    failed_runs[agent][seed].append(pair)
                # Also remove from missing list
                if pair in run_status[agent][seed]:
                    run_status[agent][seed].remove(pair)
        else:  # completed runs
            if agent == "NeuroSyncAgent":
                run_status[agent][seed] = "present"
            else:
                # Remove the (optimizer, lr) pair from the missing list
                if pair in run_status[agent][seed]:
                    run_status[agent][seed].remove(pair)
                if dont_consider_combination:  # we only wanted one config for this run
                    run_status[agent][seed] = []
    
    # Format the text report
    report_lines = ["Baseline\tSeed 0\tSeed 1\tSeed 2"]
    
    for agent in desired_agents:
        # Extract the baseline name (remove 'Agent' suffix)
        baseline = agent[:-5] if agent.endswith("Agent") else agent
        
        # Format each seed's status
        seed_status = []
        for seed in [0, 1, 2]:
            status = run_status[agent][seed]
            running_status = running_runs[agent][seed]
            failed_status = failed_runs[agent][seed]
            
            if agent == "NeuroSyncAgent":
                if status == "present":
                    seed_status.append("all present")
                elif running_status:
                    seed_status.append("running")
                elif failed_status:
                    seed_status.append("failed")
                else:
                    seed_status.append("not present")
            else:
                if not status and not running_status and not failed_status:  # All runs present
                    seed_status.append("all present")
                elif len(status) == 4 and not running_status and not failed_status:  # All runs missing
                    seed_status.append("not present")
                else:
                    # Format status with missing, running, and failed info
                    status_parts = []
                    
                    if status:
                        missing_pairs = [f"({opt}, {lr}) missing" for opt, lr in status]
                        status_parts.extend(missing_pairs)
                    
                    if running_status:
                        running_pairs = [f"({opt}, {lr}) running" for opt, lr in running_status]
                        status_parts.extend(running_pairs)
                        
                    if failed_status:
                        failed_pairs = [f"({opt}, {lr}) failed" for opt, lr in failed_status]
                        status_parts.extend(failed_pairs)
                    
                    seed_status.append(", ".join(status_parts))
        
        report_lines.append(f"{baseline}\t{seed_status[0]}\t{seed_status[1]}\t{seed_status[2]}")
    
    report_text = "\n".join(report_lines)
    
    # Create LaTeX table with colors
    latex_lines = [
        "\\begin{table}[H]",
        "\\centering",
        "\\begin{tabular}{|l|c|c|c|}",
        "\\hline",
        "Baseline & Seed 0 & Seed 1 & Seed 2 \\\\",
        "\\hline"
    ]
    
    for agent in desired_agents:
        baseline = agent[:-5] if agent.endswith("Agent") else agent
        
        seed_status = []
        for seed in [0, 1, 2]:
            status = run_status[agent][seed]
            running_status = running_runs[agent][seed]
            failed_status = failed_runs[agent][seed]
            
            if agent == "NeuroSyncAgent":
                if status == "present":
                    latex_status = "\\textcolor{green}{complete}"
                elif running_status:
                    latex_status = "\\textcolor{blue}{running}"
                elif failed_status:
                    latex_status = "\\textcolor{orange}{failed}"
                else:
                    latex_status = "\\textcolor{red}{missing}"
            else:
                if not status and not running_status and not failed_status:  # All runs present
                    latex_status = "\\textcolor{green}{complete}"
                elif len(status) == 4 and not running_status and not failed_status:  # All runs missing
                    latex_status = "\\textcolor{red}{missing}"
                else:
                    # Format with colors for different states
                    status_parts = []
                    
                    if status:
                        missing_pairs = [f"{{({opt}, {lr})}}" for opt, lr in status]
                        status_parts.extend(missing_pairs)
                    
                    if running_status:
                        running_pairs = [f"\\textcolor{{blue}}{{({opt}, {lr})}}" for opt, lr in running_status]
                        status_parts.extend(running_pairs)
                        
                    if failed_status:
                        failed_pairs = [f"\\textcolor{{orange}}{{({opt}, {lr})}}" for opt, lr in failed_status]
                        status_parts.extend(failed_pairs)
                    
                    latex_status = ", ".join(status_parts)
            
            seed_status.append(latex_status)
        
        latex_lines.append(f"{baseline} & {seed_status[0]} & {seed_status[1]} & {seed_status[2]} \\\\")

    task_name_report = task_name.replace('_', '-')
    latex_lines.extend([
        "\\hline",
        "\\end{tabularx}",
        f"\\caption{{Runs status by baseline and seed for {task_name_report}}}",
        f"\\label{{tab:runs_status_{task_name_report}}}",
        "\\end{table}"
    ])
    
    latex_table = "\n".join(latex_lines)
    # For saving files next to the script
    # if output_path is None:
    #     script_dir = os.path.dirname(os.path.abspath(__file__))
    #     output_path = os.path.join(script_dir, "missing_runs_table.txt")

    # if latex_output_path is None:
    #     script_dir = os.path.dirname(os.path.abspath(__file__))
    #     latex_output_path = os.path.join(script_dir, "missing_runs_table.tex")

    # Save to files
    latex_table = transform_table(latex_table)
    if output_path is not None:
        with open(output_path, 'w', encoding='utf-8') as f:
            f.write(report_text)
        print(f"Text report saved to {output_path}")

    
    if latex_output_path is not None:
        with open(latex_output_path, 'w', encoding='utf-8') as f:
            f.write(latex_table)
        print(f"LaTeX table saved to {latex_output_path}")

    return report_text, latex_table, available_agents


# Usage example:
if __name__ == "__main__":

    tasks  = ['random_label_cifar10', 'random_MNIST', 'shuffle_cifar10', 'permuted_MNIST', 'continual_cifar100', 'continual_imagenet', 'new_continual_imagenet']
    single_run_tasks = ['permuted_MNIST', 'continual_cifar100', 'continual_imagenet', 'new_continual_imagenet']
    # csv_path = r'C:\Users\khash\OneDrive\Desktop\Research-Coding\17\result_anlysis\results\normal_train\wandb_shuffle_cifar10_normal.csv'  # Replace with your actual path
    # Replace with your actual path

    available_agents =  ['BaseAgent', 'CBPAgent', 'EWCAgent', 'L2Agent', 'L2InitAgent', 'L2InitPlusEWCAgent', 'LayerNormAgent', 'NeuroSyncAgent', 'ReDoAgent', 'CReLUAgent', 'DeepFourierAgent', 'PReLUAgent', 'SPAgent']
    runs_df = get_df_from_wandb()
    # Dictionary to store LaTeX content for each task
    all_latex_content = {}
    for task in tasks:
        # csv_path = (
        #     Path(__file__).parent                 # directory where this script lives
        #     / "results"
        #     / "normal_train"
        #     / f"wandb_{task}_normal.csv"
        # )
        
        
        output_path = (
            Path(__file__).parent                 # directory where this script lives
            / "results"
            / "missing_normal"
            / f"missing_runs_normal_table_{task}.txt"
        )
        latex_output_path = (
            Path(__file__).parent                 # directory where this script lives
            / "results"
            / "missing_normal"
            / f"missing_runs_normal_table_{task}.tex"
        )
    # To check all available agents and save both reports:
   
        report, latex, agents = check_wandb_runs(
            runs_df= runs_df, 
            output_path= None,
            latex_output_path= None,
            desired_agents = available_agents,
            dont_consider_combination= task in single_run_tasks,
            task_name= task
        )
        all_latex_content[task] = latex
        print("Available agents:", agents)
        print("\nRun Status Summary:")
        print(report)
        print("#####################################")

    # Create the combined LaTeX report
    latex_dir = Path(__file__).parent / "results" / "missing_normal"
    output_path = Path(__file__).parent / "results" / "combined_runs_report_normal.tex"

    create_combined_latex_report(tasks, latex_dir, output_path, all_latex_content= all_latex_content)


