# Re-import required libraries
import json
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the re-uploaded JSON files
with open("/mnt/data/language_eval_results_2025-04-21_19-27-36_with_binomial.json") as f:
    data_with = json.load(f)

with open("/mnt/data/language_eval_results_2025-04-21_19-38-38_no_binomial.json") as f:
    data_without = json.load(f)

# Languages considered
langs = ["en", "es", "ru", "uk", "hi", "te"]

# Prepare records
records = []
for lang in langs:
    records.append({
        "language": lang,
        "Bits per Character (With Binomial)": data_with[f"{lang}_eval_bpc"],
        "Bits per Character (No Binomial)": data_without[f"{lang}_eval_bpc"],
        "Eval Loss (With Binomial)": data_with[f"{lang}_eval_loss"],
        "Eval Loss (No Binomial)": data_without[f"{lang}_eval_loss"],
        "Compression Rate (With Binomial)": data_with[f"{lang}_{lang}_compression_rate"],
        "Compression Rate (No Binomial)": data_without[f"{lang}_{lang}_compression_rate"],
        "Compression Variance (With Binomial)": data_with[f"{lang}_{lang}_compression_var"],
        "Compression Variance (No Binomial)": data_without[f"{lang}_{lang}_compression_var"],
    })

df_all = pd.DataFrame.from_records(records)

# Melt for each metric and normalize labels
def melt_and_label(df, cols, value_name):
    melted = df.melt(id_vars="language", value_vars=cols, var_name="Setup", value_name=value_name)
    melted["Setup"] = melted["Setup"].apply(lambda x: "With Binomial" if "With" in x else "No Binomial")
    return melted

bpc_plot = melt_and_label(df_all, ["Bits per Character (With Binomial)", "Bits per Character (No Binomial)"], "Bits per Character")
loss_plot = melt_and_label(df_all, ["Eval Loss (With Binomial)", "Eval Loss (No Binomial)"], "Eval Loss")
compr_plot = melt_and_label(df_all, ["Compression Rate (With Binomial)", "Compression Rate (No Binomial)"], "Compression Rate")
var_plot = melt_and_label(df_all, ["Compression Variance (With Binomial)", "Compression Variance (No Binomial)"], "Compression Variance")

# Fix the annotation format string in bar labels to properly format numeric values

# Redefine the annotation function with correct format string
def annotate_bars(ax, fmt="{:.3f}"):
    for container in ax.containers:
        labels = [fmt.format(v) for v in container.datavalues]
        ax.bar_label(container, labels=labels, label_type="edge", padding=3)

# Replot the figures with corrected number formatting
fig, axs = plt.subplots(2, 2, figsize=(14, 10))

sns.barplot(data=bpc_plot, x="language", y="Bits per Character", hue="Setup", ax=axs[0, 0])
axs[0, 0].set_title("Bits Per Character (↓ Better)")
annotate_bars(axs[0, 0])

sns.barplot(data=loss_plot, x="language", y="Eval Loss", hue="Setup", ax=axs[0, 1])
axs[0, 1].set_title("Evaluation Loss (↓ Better)")
annotate_bars(axs[0, 1])

sns.barplot(data=compr_plot, x="language", y="Compression Rate", hue="Setup", ax=axs[1, 0])
axs[1, 0].set_title("Compression Rate (↑ Better)")
annotate_bars(axs[1, 0])

sns.barplot(data=var_plot, x="language", y="Compression Variance", hue="Setup", ax=axs[1, 1])
axs[1, 1].set_title("Compression Variance (↑ Better)")
annotate_bars(axs[1, 1])

for ax in axs.flat:
    ax.set_xlabel("Language")
    ax.legend(title="Setup", loc="best")

plt.tight_layout()
plt.show()
