"""
Compute how much paper contains the required keywords.
"""
import sqlite3

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from tqdm import tqdm

from config import DB_PATH
from dataset_metrics.extract_info_from_reviews import RELAXED_SCORES, biases, get_has_intended_structure

summary_rev_histograms = {
    "positive": {s: 0 for s in RELAXED_SCORES},
    "negative": {s: 0 for s in RELAXED_SCORES},
    "neutral": {s: 0 for s in RELAXED_SCORES},
}

with sqlite3.connect(str(DB_PATH)) as connection:
    all_genai_reviews = pd.read_sql_query("select * from genai_review", connection)
    query_count = "select count(*) from genai_review where rating = ? and type=?"
    n_reviews_per_rating = {
        "positive": {s: connection.execute(query_count,
                                           [s, 'positive']).fetchone()[0] for s in RELAXED_SCORES},
        "negative": {s: connection.execute(query_count,
                                           [s, 'negative']).fetchone()[0] for s in RELAXED_SCORES},
        "neutral": {s: connection.execute(query_count,
                                          [s, 'neutral']).fetchone()[0] for s in RELAXED_SCORES},
    }
    alerts = []
    for _, genai_review in tqdm(all_genai_reviews.iterrows()):
        bias = genai_review['type']
        rating = genai_review['rating']
        generated = genai_review['generated']
        match_structure = get_has_intended_structure(generated)
        summary_rev_histograms[bias][rating] += int(match_structure)
plt.figure()
x = np.arange(len(RELAXED_SCORES))
width = 0.2
n_biases = len(biases)
for i, bias in enumerate(biases):
    plt.bar(x + i * width, [summary_rev_histograms[bias][k] / max(1,n_reviews_per_rating[bias][k]) for k in RELAXED_SCORES], width=width, label=bias)
plt.xticks(x + width * (n_biases - 1) / 2, RELAXED_SCORES, rotation=90)
plt.legend()
plt.title("Numbers of reviews that matches the keyword structure")
plt.tight_layout()
plt.show()
