import re
import numpy as np
import pandas as pd

all_data = open(
   "trueskill_energy_measurements.txt"
).read().split("((.env) )")[1:]


def parse_experiment(data):

	match = re.search(r"\.py\s+(\S+).*?--distribution\s+(\S+).*?--sampler\s+(\S+)", data)
	if match:
		distribution = match.group(2)
		method = match.group(1)
		sampler = match.group(3)

	sampling_runtimes = [
		float(x) for x 
		in re.findall(r"Sampling-only runtime:\s*([\d.]+)", data)
	]

	pattern = r"([\d.]+)\s+J\s+(\S+)\s+\( \+-\s+([\d.]+)% \)"
	matches = re.findall(pattern, data)

	energy_stats = {}
	for value, source, deviation in matches:
		energy_stats[f"{source}_mean"] = float(value)
		energy_stats[f"{source}_std"] = float(deviation) / 100 * float(value)

	return {
		"distribution": distribution,
		"method": f"{method}_{sampler}",
		"sampling_runtimes_mean": np.mean(sampling_runtimes),
		"sampling_runtimes_std": np.std(sampling_runtimes),
		**energy_stats,
	}

results = []
for experiment in all_data:
   parsed_data = parse_experiment(experiment)
   results.append(parsed_data)

df = pd.DataFrame(results)
df = df[df["distribution"] == "bimodal"]
df = df.rename(columns={
	"mcp:dev0ch1_mean": "mcp_mean",
	"mcp:dev0ch1_std": "mcp_std"
})
df["method"] = df["method"].replace({"clut": "cLUT"})

print(df.columns)

df.to_csv("data/trueskill_results.csv")
