# -*- coding: utf-8 -*-
from typing import List, Tuple, Dict
import time, requests, concurrent.futures as cf
from config import WIKI_TIMEOUT_SEC
from io_utils import normalize_text, split_to_sentences

_HTTP = requests.Session()
_HTTP.headers.update({"accept":"application/json","user-agent":"facts-first/mini"})

def _get_with_retry(url: str, params: dict = None, tries: int = 2, backoff: float = 0.6):
    last_exc = None
    for i in range(tries):
        try:
            r = _HTTP.get(url, params=params or {}, timeout=WIKI_TIMEOUT_SEC)
            if r.status_code in (429, 500, 502, 503, 504): raise requests.HTTPError(f"{r.status_code}")
            r.raise_for_status(); return r
        except Exception as e:
            last_exc = e; time.sleep(backoff * (2**i))
    raise last_exc  # type: ignore

def _keywords(text: str, k: int = 6) -> str:
    import re
    toks = re.findall(r"[A-Za-z][A-Za-z0-9\-]+", (text or ""))
    cnt: Dict[str, int] = {}
    for t in toks:
        t = t.lower()
        if len(t) < 3: continue
        cnt[t] = cnt.get(t, 0) + 1
    top = sorted(cnt.items(), key=lambda x: (-x[1], x[0]))[:max(2, k)]
    return " ".join([w for w, _ in top]) if top else "general knowledge"

def search_titles(q: str, lang: str, limit: int) -> List[str]:
    url = f"https://{lang}.wikipedia.org/w/api.php"
    params = {"action":"query","list":"search","format":"json","srlimit": str(limit),"srsearch": q}
    try:
        data = _get_with_retry(url, params=params).json()
        hits = data.get("query",{}).get("search",[]) or []
        return [h.get("title","").strip() for h in hits if h.get("title")]
    except Exception:
        return []

def page_summary(title: str, lang: str) -> str:
    import requests as rq
    url1 = f"https://{lang}.wikipedia.org/api/rest_v1/page/summary/{rq.utils.quote(title)}"
    try:
        data = _get_with_retry(url1).json()
        text = (data.get("extract") or "").strip()
        if text: return text
    except Exception:
        pass
    url2 = f"https://{lang}.wikipedia.org/w/api.php"
    params = {"action":"query","prop":"extracts","exintro":"1","explaintext":"1","format":"json","titles": title}
    try:
        data = _get_with_retry(url2, params=params).json()
        pages = (data.get("query") or {}).get("pages") or {}
        for _, pg in pages.items():
            ex = (pg.get("extract") or "").strip()
            if ex: return ex
    except Exception:
        pass
    return ""

def fetch_sentences_for_anchor(anchor_text: str, lang: str, k_pages: int, sent_max: int,
                               workers: int = 1) -> List[str]:
    q = _keywords(anchor_text, 6)
    titles = search_titles(q, lang, k_pages)
    out: List[str] = []; seen = set()
    if not titles: return out

    def _fetch(title: str) -> Tuple[str, str]:
        try: return title, page_summary(title, lang)
        except Exception: return title, ""

    if workers and workers > 1:
        with cf.ThreadPoolExecutor(max_workers=workers) as ex:
            for title, summ in ex.map(_fetch, titles):
                if not summ: continue
                sents = split_to_sentences(summ)
                for s in sents[:max(1, sent_max)]:
                    n = normalize_text(s)
                    if n and n not in seen:
                        out.append(s); seen.add(n)
    else:
        for t in titles:
            summ = page_summary(t, lang)
            if not summ: continue
            sents = split_to_sentences(summ)
            for s in sents[:max(1, sent_max)]:
                n = normalize_text(s)
                if n and n not in seen:
                    out.append(s); seen.add(n)
    return out
