import csv
from collections import defaultdict

def read_csv(file_path):
    data = defaultdict(lambda: defaultdict(dict))
    with open(file_path, 'r') as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            dataset = row['Tree'].rsplit('_', 1)[0]  # Remove '_data' suffix
            pruning_method = row['Tree'].split('-')[-1]  # Extract pruning method
            data[dataset][pruning_method] = row
    return data

def generate_latex_table(data):
    latex_content = [
        "% generated by convert-tree-stats-to-latex.py",
        "\\begin{table*}[!ht]",
        "\\centering",
        "\\caption{Decision trees used in our experiments: The first entry is for the unpruned tree $T$, the second for the tree $T$ computed by the replacement heuristic.}",
        "\\label{tab:tree-analysis-compact}",
        "\\small",
        # "\\begin{tabular}{lrrrrrr}",
        # "\\begin{tabular}{lrrrrr}",
        "\\begin{tabular}{lrrrrr}",
        "\\toprule",
        # "Dataset & Size $s$ & Dimensions $d$ & Dim.\ $d_T$ on Path & Domain $D$ & Thresh.\ $D_T$ on Path & Errors \\\\",
        # "Dataset & Size $s$ & Dimensions $d$ & Dim.\ $d_R$ on Path & Domain $D$ & Errors \\\\",
        # "Dataset & Size $s$ & \# Features $d$ & \# Feat.\ $d_R$ on Path & Domain $D$ & Errors \\\\",
        "Dataset & Size $s$ & \# Features $d$ used in $T$ & Domain $D$ used in $T$ & Errors \\\\",
        "\\midrule"
    ]

    for dataset in sorted(data.keys(), key=str.lower):
        dataset_latex = dataset.replace('_', '\\_')
        methods = data[dataset]
        
        # size = f"{methods['unpru']['Tree Size']} / {methods['repl']['Tree Size']} / {methods['rais']['Tree Size']}"
        # dimensions = f"{methods['unpru']['Num Dimensions']} / {methods['repl']['Num Dimensions']} / {methods['rais']['Num Dimensions']}"
        # max_thresholds = f"{methods['unpru']['Max Distinct Thresholds']} / {methods['repl']['Max Distinct Thresholds']} / {methods['rais']['Max Distinct Thresholds']}"
        # max_dim_path = f"{methods['unpru']['Max Dimensions on Path']} / {methods['repl']['Max Dimensions on Path']} / {methods['rais']['Max Dimensions on Path']}"
        # # max_thresh_path = f"{methods['unpru']['Max Thresholds on Path']} / {methods['repl']['Max Thresholds on Path']} / {methods['rais']['Max Thresholds on Path']}"
        # errors = f"{methods['unpru']['Classification Errors']} / {methods['repl']['Classification Errors']} / {methods['rais']['Classification Errors']}"
        size = f"{methods['unpru']['Tree Size']} / {methods['repl']['Tree Size']}"
        dimensions = f"{methods['unpru']['Num Dimensions']} / {methods['repl']['Num Dimensions']}"
        max_thresholds = f"{methods['unpru']['Max Distinct Thresholds']} / {methods['repl']['Max Distinct Thresholds']}"
        # max_dim_path = f"{methods['unpru']['Max Dimensions on Path']} / {methods['repl']['Max Dimensions on Path']}"
        # max_thresh_path = f"{methods['unpru']['Max Thresholds on Path']} / {methods['repl']['Max Thresholds on Path']} / {methods['rais']['Max Thresholds on Path']}"
        errors = f"{methods['unpru']['Classification Errors']} / {methods['repl']['Classification Errors']}"

        # latex_content.append(f"{dataset_latex} & {size} & {dimensions} & {max_dim_path} & {max_thresholds} & {max_thresh_path} & {errors} \\\\")
        # latex_content.append(f"{dataset_latex} & {size} & {dimensions} & {max_dim_path} & {max_thresholds} & {errors} \\\\")
        latex_content.append(f"{dataset_latex} & {size} & {dimensions} & {max_thresholds} & {errors} \\\\")

    latex_content.append("\\bottomrule")
    latex_content.extend([
        "\\end{tabular}",
        "\\end{table*}"
    ])

    return '\n'.join(latex_content)

def main():
    input_file = 'results/tree_analysis_results.csv'
    output_file = 'results/tree_analysis_results_table_compact.tex'

    data = read_csv(input_file)
    latex_table = generate_latex_table(data)

    with open(output_file, 'w') as f:
        f.write(latex_table)

    print(f"Compact LaTeX table has been generated and saved to {output_file}")

if __name__ == "__main__":
    main()
