import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import re

def extract_in(inout: str):
    m = re.search(r"in(\d+)", str(inout))
    return int(m.group(1)) if m else None

def extract_and(subdir: str):
    m = re.search(r"and(\d+)", str(subdir))
    return int(m.group(1)) if m else None

def plot_bar(ax, df_subdirs, in_values, title):
    df_subdirs["in"] = df_subdirs["inout"].apply(extract_in)
    df_subdirs["and"] = df_subdirs["subdir"].apply(extract_and)
    df_subdirs = df_subdirs.dropna(subset=["in", "and"])
    df_subdirs = df_subdirs.sort_values(["in", "and"])

    bar_width = 0.12
    in_positions = np.arange(len(in_values))

    for i, in_val in enumerate(in_values):
        subdf = df_subdirs[df_subdirs["in"] == in_val].sort_values("and")
        ands = subdf["and"].tolist()
        ys = subdf["unique_ratio"].tolist()
        colors = plt.cm.Blues(np.linspace(0.4, 0.9, len(ands)))
        start = i - (len(ands) * bar_width) / 2
        xs = [start + j * bar_width for j in range(len(ands))]
        ax.bar(xs, ys, width=bar_width, color=colors, edgecolor=None)

        # and value
        and_ypos = 0.36
        for xv, av in zip(xs, ands):
            ax.text(xv, and_ypos, str(av), ha="center", va="top",
                    fontsize=9, fontweight='bold', rotation=45)

    ax.set_xticks(in_positions)
    ax.set_xticklabels([str(v) for v in in_values], fontsize=11, fontweight='bold')
    ax.set_xlabel("in", fontsize=13, fontweight='bold', labelpad=20)
    ax.set_ylabel("unique_ratio", fontsize=13, fontweight='bold', labelpad=10)
    ax.set_ylim(0.4, 1.0)
    ax.set_title(title, fontsize=13, fontweight='bold')
    ax.grid(axis="y", linestyle="--", alpha=0.6)


def visualize_summary_dual(excel_AN: str, excel_ANO: str, out_pdf="AN_vs_ANO_unique_ratio.pdf"):
    in_values = [5, 10, 20, 40, 80]

    # read data
    df_AN = pd.read_excel(excel_AN, sheet_name="subdirs")
    df_ANO = pd.read_excel(excel_ANO, sheet_name="subdirs")

    fig, axes = plt.subplots(1, 2, figsize=(14, 6), sharey=True)

    plot_bar(axes[0], df_AN, in_values, "AN: unique_ratio by in and and-size")
    plot_bar(axes[1], df_ANO, in_values, "ANO: unique_ratio by in and and-size")

    plt.tight_layout()
    plt.savefig(out_pdf, dpi=300, bbox_inches="tight")
    plt.close()
    print(f"Saved: {out_pdf}")


if __name__ == "__main__":
    excel_AN = "summary_AN.xlsx"
    excel_ANO = "summary_ANO.xlsx"
    visualize_summary_dual(excel_AN, excel_ANO)
