import re
from pathlib import Path

import pandas as pd


BASE_DIR = Path(__file__).resolve().parent
CSV_PATH = BASE_DIR / "kaggles-all-completed-competition-dataset" / "source" / "kaggle comp_submission.csv"
RESULT_PATH = BASE_DIR / "result.txt"


def _parse_start_dt(df: pd.DataFrame) -> pd.Series:
    """Parse a competition start date from (start_year, start_month, start_date)."""

    # Dataset stores abbreviated month names with trailing spaces (e.g. "Sep ").
    month_str = df["start_month"].astype(str).str.strip()
    month_num = pd.to_datetime(month_str, format="%b", errors="coerce").dt.month

    return pd.to_datetime(
        {"year": df["start_year"], "month": month_num, "day": df["start_date"]},
        errors="coerce",
    )


def _build_keyword_regex() -> re.Pattern:
    # Minimum required keyword set (plus simple variants like pluralization/hyphenation).
    patterns = [
        r"\bdeep\s*-?\s*fakes?\b",
        r"\bdiffusions?\b",
        r"\bgpt(?:-\d+)?\b",
        r"\bllms?\b",
        r"\bgenerative\s*-?\s*models?\b",
        r"\bimage\s*-?\s*matching\b",
        r"\bgans?\b",
        # Direct textual expansion of GAN.
        r"\bgenerative\s*-?\s*adversarial\s*-?\s*networks?\b",
    ]

    return re.compile("|".join(f"(?:{p})" for p in patterns), flags=re.IGNORECASE)


def main() -> int:
    df = pd.read_csv(CSV_PATH)
    df = df.assign(start_dt=_parse_start_dt(df))

    lo = pd.Timestamp("2021-05-01")
    hi = pd.Timestamp("2023-06-26")
    window = df[df["start_dt"].between(lo, hi, inclusive="both")].copy()

    regex = _build_keyword_regex()
    haystack = (
        window["comp_name"].fillna("")
        + " "
        + window["Tag"].fillna("")
        + " "
        + window["desc"].fillna("")
    )

    matched = haystack.apply(lambda s: bool(regex.search(s)))

    # Each row corresponds to a competition in this dataset.
    return int(matched.sum())


if __name__ == "__main__":
    count = main()
    RESULT_PATH.write_text(f"{count}\n", encoding="utf-8")
    print(count)
