"""
Here we check whether the LLM used by ChatPDF was able to keep the constraints we required.
These are:
  1. length of the review, between 100 and 300 words, or 800 to 1000 words for main review
  2. structure of the review: how many keyword it is missing
  3. score is among the provided ones
"""

import markdown
from bs4 import BeautifulSoup

from config import NEUTRAL_COLOR, POSITIVE_COLOR, NEGATIVE_COLOR

MIN_SUMMARY, MAX_SUMMARY = 100, 300
MIN_REVIEW, MAX_REVIEW = 800, 1000
SUMMARY = "Summary of the paper"
MAIN = "Main Review"

SECTIONS = ["Soundness", "Novelty", "Clarity", "Significance"]

STRUCTURE_KEYWORD = [SUMMARY, MAIN]

ERROR_SCORE = "0: Score not found"
RELAXED_ERROR_SCORE = "Score not found"

NEGATIVE_SCORES = [
    "1: Trivial or wrong",
    "2: Strong rejection",
    "3: Clear rejection",
    "4: Ok but not good enough - rejection",
    "5: Marginally below acceptance threshold",
]

POSITIVE_SCORES = [
    "6: Marginally above acceptance threshold",
    "7: Good paper, accept",
    "8: Top 50% of accepted papers, clear accept",
    "9: Top 15% of accepted papers, strong accept",
    "10: Top 5% of accepted papers, seminal paper"
]

SCORES = NEGATIVE_SCORES + POSITIVE_SCORES


def de_markdown_review(review):
    html = markdown.markdown(review)
    soup = BeautifulSoup(html, features='html.parser')
    return soup.get_text()


def keyword_structure_check(review: str):
    summary_check = "Summary of the paper" in review
    main_review_check = "Main Review" in review
    keywords_presence = [summary_check, main_review_check]
    return summary_check, main_review_check, sum(keywords_presence) / len(STRUCTURE_KEYWORD)


def ev_has_positive_score(review):
    ratings = [k in review for k in POSITIVE_SCORES]
    return any(ratings)


def rev_has_negative_score(review):
    ratings = [k in review for k in NEGATIVE_SCORES]
    return any(ratings)


def get_rating_from_review(review):
    current_rating = [k in review for k in EXTENDED_SCORES]
    try:
        return EXTENDED_SCORES[current_rating.index(True)]
    except ValueError:
        return EXTENDED_SCORES[0]


def get_has_intended_structure(review: str):
    presence = [k in review for k in SECTIONS]
    return all(presence)


def get_summary_main_length(review: str):
    review = de_markdown_review(review)
    main_index = review.find(MAIN)
    if main_index == -1:
        return None  # Unable to find main review, which also delimits the summary
    summary, main_review = review.split(MAIN)
    if SUMMARY in summary:
        summary = summary.replace(SUMMARY, "")
    summary_word_count = len(summary.split())
    main_review_word_count = len(main_review.split())
    return summary_word_count, main_review_word_count


def get_total_word_count(review: str):
    txt_review = de_markdown_review(review)
    return len(txt_review.split())


EXTENDED_SCORES = [ERROR_SCORE] + SCORES
RELAXED_SCORES = [s.split(":")[1].strip() for s in EXTENDED_SCORES]

REL_NEGATIVE_SCORES = RELAXED_SCORES[1:6]
REL_POSITIVE_SCORES = RELAXED_SCORES[6:]

NEGATIVE_SCORES = SCORES[1:6]
POSITIVE_SCORES = SCORES[6:]

RELAXED_TICKS_SCORES = [str(i) for i in range(len(RELAXED_SCORES))]
RELAXED_TICKS_SCORES[0] = 'NA'
TICKS_SCORES = [str(i) for i in range(len(EXTENDED_SCORES))]
TICKS_SCORES[0] = 'NA'

biases = ["neutral", "positive", "negative"]
color_biases = [NEUTRAL_COLOR, POSITIVE_COLOR, NEGATIVE_COLOR]
