from matplotlib import pyplot as plt
import seaborn as sns
from experiments.fns import get_project_data
from experiments.fns import do_coeff_analysis
from experiments.fns import get_baselines
from experiments.fns import rename_row
from experiments.fns import label_row

df = get_project_data(project="shuf_tree_cifar")
df = df[df["state"] == "finished"]
df["vec"] = df.apply(rename_row, axis=1)
df["label"] = df.apply(label_row, axis=1)
target_var = "train_loss_avg"

do_coeff_analysis(df)
df = df.sort_values(by=target_var, ascending=True)
print(df.head(20))
print(f"There are: {len(df['vec'].unique()):,d} symbols")

exprs = [
    "eg,bdg,deg->bd",
    "fg,dfg,dg->d",
    "efg,dfg,adeg->ad",
    "f,bdf,d->bd",
    "eg,dg,adeg->ad",
]
exprs = [exp + " (BMM0) Adam" for exp in exprs]
# df = df[df["vec"].isin(exprs)]
df = df.loc[df.groupby(["vec", "width"])[target_var].idxmin()]

dfb = get_baselines(project="lr_baselines", target_var=target_var)
exprs = ["none", "0.0-0.5-0.5-0.0-0.5-0.0-0.5"]
exprs += ["0.5-0.5-0.0-0.0-0.5-0.5-0.0"]
exprs = [exp + " (BMM0) Adam" for exp in exprs]
dfb = dfb[dfb["vec"].isin(exprs)]

# x_var = "cola_flops"
x_var = "cola_params"
hue = "flops/params"
# hue = "width"

sns.set(style="whitegrid", font_scale=2.0, rc={"lines.linewidth": 3.0})
sns.set_palette("Set2")
plt.figure(dpi=75, figsize=(25, 15))
sns.scatterplot(x=x_var, y=target_var, data=dfb, style="label", s=200)
sns.lineplot(x=x_var, y=target_var, data=dfb, style="label")
sns.scatterplot(x=x_var, y=target_var, data=df, style="label", hue=hue, s=200)
plt.ylabel("Train Loss" if target_var.startswith("train_loss") else "Error")
plt.xlabel("FLOPs" if x_var == "cola_flops" else "Params")
plt.xscale('log')
plt.yscale('log')
plt.legend(loc='upper left', bbox_to_anchor=(1, 1))
plt.tight_layout()
plt.show()
