import marimo

__generated_with = "0.12.10"
app = marimo.App(width="medium")


@app.cell
def _():
    import marimo as mo
    import altair as alt
    import polars as pl
    from notebooks import plotting_utils as pu
    from dataclasses import asdict
    return alt, asdict, mo, pl, pu


@app.cell
def _(pu):
    from configs.llama_3_3b.evaluation import sweep

    results = sweep.results()

    hop_keys = ["_".join(h) for h in pu.hop_labels.keys()]
    results = pu.merge_columns(results, hop_keys, key="Hop", value="Accuracy")
    return hop_keys, results, sweep


@app.cell
def _(alt, results):
    alt.Chart(results).mark_bar().encode(x="task_name", y="Accuracy", color="Hop", xOffset="Hop")
    return


if __name__ == "__main__":
    app.run()
