# Generate paper-ready figures with matplotlib (no seaborn; one chart per plot; default colors).
import os
import numpy as np
import matplotlib.pyplot as plt

out_dir = "."
os.makedirs(out_dir, exist_ok=True)

# 1) Operator entropy reduction schematic (bar chart): H(X), H(X|π), H(X|π,C)
labels = ["H(X)", "H(X|π)", "H(X|π, C)"]
# Hypothetical values (arbitrary units): unconditional > prompt-conditioned > prompt+retrieval
values = [5.0, 3.8, 2.9]

plt.figure(figsize=(5,3.2))
plt.bar(labels, values)
plt.ylabel("Entropy (bits/token)")
plt.title("Operator Effect on Conditional Entropy (Illustrative)")
plt.tight_layout()
plt.savefig(os.path.join(out_dir, "entropy_reduction.png"), dpi=300)
plt.savefig(os.path.join(out_dir, "entropy_reduction.pdf"))
plt.close()

# 2) Best-of-N success curve for multiple base success probabilities (p in {0.05, 0.1, 0.2})
N = np.arange(1, 51)
plt.figure(figsize=(5.2,3.4))
for p in [0.05, 0.10, 0.20]:
    success = 1 - (1 - p)**N
    plt.plot(N, success, label=f"base p={p:.2f}")
plt.xlabel("N (independent samples)")
plt.ylabel("Success probability ≥1 hit")
plt.title("Best-of-N Improves Hit Probability (Analytic)")
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig(os.path.join(out_dir, "best_of_n_success.png"), dpi=300)
plt.savefig(os.path.join(out_dir, "best_of_n_success.pdf"))
plt.close()

# 3) Hallucination decomposition: compare scenarios
# Scenarios: (c, alpha, beta) -> HR >= (1-c)(1-alpha) + c*beta
scenarios = {
    "Low cov,\nno abst.": (0.3, 0.0, 0.10),
    "Med cov,\nmed abst.": (0.6, 0.3, 0.08),
    "High cov,\nlow beta": (0.9, 0.1, 0.02),
}
hr = []
uncovered_err = []
covered_err = []
for name, (c, a, b) in scenarios.items():
    hr_val = (1-c)*(1-a) + c*b
    hr.append(hr_val)
    uncovered_err.append((1-c)*(1-a))
    covered_err.append(c*b)

x = np.arange(len(scenarios))
width = 0.5

plt.figure(figsize=(5.2,3.4))
plt.bar(x, uncovered_err, width, label="Uncovered error (1-c)(1-α)")
plt.bar(x, covered_err, width, bottom=uncovered_err, label="Covered error cβ")
plt.xticks(x, list(scenarios.keys()))
plt.ylabel("Lower bound on hallucination risk HR")
plt.title("Hallucination Risk Decomposition (Illustrative)")
plt.legend()
plt.tight_layout()
plt.savefig(os.path.join(out_dir, "hallucination_decomposition.png"), dpi=300)
plt.savefig(os.path.join(out_dir, "hallucination_decomposition.pdf"))
plt.close()

# 4) Submodular greedy gain vs random: simulate diminishing returns
np.random.seed(0)
m = 100  # items
k_vals = np.arange(1, 31)

# Construct a synthetic submodular utility via coverage over latent features
# 20 latent features; each item covers a random subset; utility = covered feature count
F = 20
item_feats = (np.random.rand(m, F) < 0.15).astype(int)

def greedy_curve(kmax=30):
    remaining = set(range(m))
    covered = np.zeros(F, dtype=int)
    gains = []
    for k in range(1, kmax+1):
        best_item = None
        best_gain = -1
        for i in list(remaining):
            gain = np.sum(((covered == 0) & (item_feats[i] == 1)))
            if gain > best_gain:
                best_gain = gain
                best_item = i
        if best_item is None:
            gains.append(0)
            continue
        covered = np.maximum(covered, item_feats[best_item])
        remaining.remove(best_item)
        gains.append(best_gain)
    return np.cumsum(gains)

def random_curve(kmax=30, trials=50):
    totals = np.zeros(kmax)
    for _ in range(trials):
        perm = np.random.permutation(m)
        covered = np.zeros(F, dtype=int)
        gains = []
        for k in range(1, kmax+1):
            i = perm[k-1]
            gain = np.sum(((covered == 0) & (item_feats[i] == 1)))
            covered = np.maximum(covered, item_feats[i])
            gains.append(gain)
        totals += np.cumsum(gains)
    return totals / trials

greedy = greedy_curve(len(k_vals))
rand = random_curve(len(k_vals))

plt.figure(figsize=(5.2,3.4))
plt.plot(k_vals, greedy, label="Greedy (≈ submodular proxy)")
plt.plot(k_vals, rand, label="Random")
plt.xlabel("k (retrieved items)")
plt.ylabel("Utility (coverage proxy)")
plt.title("Greedy Retrieval Approaches Near-Optimal Utility (Illustrative)")
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig(os.path.join(out_dir, "submodular_greedy_gain.png"), dpi=300)
plt.savefig(os.path.join(out_dir, "submodular_greedy_gain.pdf"))
plt.close()

# 5) Energy per hit vs success prob (E = constant per sample) -> E_total = E/p
p = np.linspace(0.01, 0.5, 200)
E_per_sample = 1.0  # arbitrary units
E_total = E_per_sample / p

plt.figure(figsize=(5.2,3.4))
plt.plot(p, E_total)
plt.xlabel("Success probability p_f(𝒪)")
plt.ylabel("Expected energy to first hit (arbitrary units)")
plt.title("Energy per Hit Lower Bound: E/p (Illustrative)")
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig(os.path.join(out_dir, "energy_per_hit.png"), dpi=300)
plt.savefig(os.path.join(out_dir, "energy_per_hit.pdf"))
plt.close()

print("Figures written to:", out_dir)
