import pandas as pd

df = pd.read_csv('data/results_3107.csv')

# 1. Filter N between 1e6 and 1e7
mask = (df["N"] >= 1e6) & (df["N"] <= 1e7)
filtered = df.loc[mask]

# 2. Group and compute mean/std
grouped = filtered.groupby("sampler").agg(
	sampling_time_mean=("sampling_time", "mean"),
	sampling_time_std=("sampling_time", "std"),
	preprocessing_time_mean=("preprocessing_time", "mean"),
	preprocessing_time_std=("preprocessing_time", "std"),
	count=("sampler", "count")
).reset_index()

# 3. Format each time as mean ± std with desired precision
def fmt(mu, sigma, precision=3):
	if pd.isna(mu):
		return "-"
	return rf"${mu:.{precision}f}\pm{sigma:.{precision}f}$"

grouped["Sampling time"] = grouped.apply(
	lambda r: fmt(r["sampling_time_mean"], r["sampling_time_std"]), axis=1
)
grouped["Preprocessing time"] = grouped.apply(
	lambda r: fmt(r["preprocessing_time_mean"], r["preprocessing_time_std"]), axis=1
)

# 4. Build LaTeX table
lines = []
lines.append(r"\begin{table}[t]")
lines.append(r"\centering")
lines.append(r"\caption{Sampling and preprocessing times (mean $\pm$ std) for each method on the subset with $N\in[10^6,10^7]$.}")
lines.append(r"\label{tab:time_summary}")
lines.append(r"\begin{tabular}{lcc}")
lines.append(r"\toprule")
lines.append(r"Method & Sampling time (s) & Preprocessing time (s) \\")
lines.append(r"\midrule")
for _, row in grouped.iterrows():
	method = row["sampler"]
	samp = row["Sampling time"]
	prep = row["Preprocessing time"]
	lines.append(f"{method} & {samp} & {prep} \\\\")
lines.append(r"\bottomrule")
lines.append(r"\end{tabular}")
lines.append(r"\end{table}")

latex_table = "\n".join(lines)
print(latex_table)
