"""
Compute the distributions of the Binoculars' scores on Gen-Review dataset
"""
import sqlite3

import numpy as np
from matplotlib import pyplot as plt
from binoculars.detector import BINOCULARS_FPR_THRESHOLD
from config import DB_PATH
plt.rcParams.update({'font.size': 18})

def get_scores_human_ai():
    years = list(range(2018, 2026))

    query_human_scores = """
        select binocular_score from review
    """

    query_ai_scores = """
        select binocular_score from genai_review where type='neutral'
    """

    plt.figure()
    with sqlite3.connect(str(DB_PATH)) as connection:
        scores_human = connection.execute(query_human_scores).fetchall()
        scores_ai = connection.execute(query_ai_scores).fetchall()

    scores_human = np.array(scores_human).flatten()
    scores_ai = np.array(scores_ai).flatten()

    return scores_human, scores_ai

if __name__ == '__main__':
    scores_human, scores_ai = get_scores_human_ai()
    counts, bins = np.histogram(scores_human, bins=100, density=True)
    plt.stairs(counts, bins, label='Human')
    plt.bar(bins[:-1], counts, width=np.diff(bins), align='edge', alpha=0.2, color='blue')

    counts, bins = np.histogram(scores_ai, bins=100, density=True)
    plt.stairs(counts, bins, label='GenAI')
    plt.bar(bins[:-1], counts, width=np.diff(bins), align='edge', alpha=0.2, color='orange')

    plt.axvline(x=BINOCULARS_FPR_THRESHOLD, color='red', linestyle='--', label="threshold")
    plt.xlabel("Binocular score")
    plt.ylabel("Density")
    plt.legend(loc='upper center', bbox_to_anchor=(0.5, 1.30), ncol=3)
    plt.tight_layout()
    plt.show()
