from statsmodels.stats.proportion import proportion_confint


def compute_intervals_margin(name, tp, fp):
    n = fp + tp
    alpha = 0.05  # 95% confidence

    # Estimate
    fdr = fp / n

    # Wilson interval
    ci_low, ci_high = proportion_confint(fp, n, alpha=alpha, method='wilson')
    margin = (ci_high - ci_low) / 2

    # Print
    print(f"{name} -> FDR = {fdr:.3f} ± {margin:.3f}")


def compute_intervals(name, tp, fp):
    n = fp + tp  # total predicted positives
    alpha = 0.05  # 95% confidence level

    # Wilson score interval for FDR
    lower, upper = proportion_confint(fp, n, alpha=alpha, method='wilson')

    print(f"Method: {name}")
    print(f"Observed FDR: {100* fp / n:.2f}")
    print(f"95% CI for FDR (Wilson): [{100*lower:.2f}, {100*upper:.2f}]")


compute_intervals_margin("LC BG", 48, 100)
compute_intervals_margin("LC BW", 54, 294)
compute_intervals_margin("LC TF Const", 162, 120)
compute_intervals_margin("LC TF LLM", 196, 112)
compute_intervals_margin("LC TF", 144, 26)


print("=========================")
compute_intervals_margin("Comp BG", 51, 191)
compute_intervals_margin("Comp BW", 98, 405)
compute_intervals_margin("Comp TF Const", 172, 161)
compute_intervals_margin("Comp TF LLM", 198, 177)
compute_intervals_margin("Comp TF", 127, 76)


from statsmodels.stats.proportion import proportion_confint

def compute_intervals(name, successes, n):

    # Wilson interval
    ci_low, ci_high = proportion_confint(count=successes, nobs=n, alpha=0.05, method='wilson')

    # Print in requested format
    print(f"{name} | Pass rate: [{ci_low:.2f}, {ci_high:.2f}]")


print("===========Terminal===========")
compute_intervals("Benchmark Baseline", 77, 256)
compute_intervals("Benchmark TF", 101, 256)
compute_intervals("Benchmark DRAFT", 99, 256)
compute_intervals("Benchmark TF + DRAFT", 83, 256)


print("===========FMTB===========")
compute_intervals("Benchmark Baseline", 86, 256)
compute_intervals("Benchmark TF", 105, 256)
compute_intervals("Benchmark DRAFT", 92, 256)
compute_intervals("Benchmark TF + DRAFT", 85, 256)


print("===========GHB===========")
compute_intervals("Benchmark Baseline", 11, 48)
compute_intervals("Benchmark TF", 17, 48)
compute_intervals("Benchmark DRAFT", 14, 48)
compute_intervals("Benchmark TF + DRAFT", 12, 48)