import json
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np


infile = "output.jsonl"
date = "2024-03"

durations = []

with open(infile) as f:
    for line in f:
        data = json.loads(line)
        l = data["left"]["finish"]
        r = data["right"]["finish"]
        v = data["timestamp"]
        durations.append(v - max(l, r))

print(
    f"Avg: {np.mean(durations)}, Median: {np.median(durations)}, Max: {np.max(durations)}"
)

cutoff = 200.0
num_bins_inside_cutoff = 20

for i, n in enumerate(durations):
    if n > cutoff:
        durations[i] = cutoff + 0.5 * cutoff / num_bins_inside_cutoff

bin_edges = np.linspace(0, cutoff, num_bins_inside_cutoff + 1)

overflow_cap = (
    cutoff + cutoff / num_bins_inside_cutoff
)
bin_edges = np.append(bin_edges, overflow_cap)

sns.histplot(
    durations, bins=bin_edges, kde=False
)
plt.title(f'Distribution of "time to vote" {date}')
plt.xlabel("Duration (seconds)")
plt.ylabel("Frequency")

plt.axvline(x=cutoff, color="red", linestyle="--")
plt.text(
    cutoff + 1, plt.ylim()[1] * 0.9, "Overflow", color="red", ha="left"
)

ax = plt.gca()
labels = [item.get_text() for item in ax.get_xticklabels()]
if "110" in labels:
    labels[labels.index("110")] = ""
ax.set_xticklabels(labels)

plt.tight_layout()

plt.savefig(f"duration_distribution_time_to_vote_{date}.png", dpi=300)
