# -*- coding: utf-8 -*-
import os, json, re
from typing import Any, List, Tuple

def normalize_text(s: str) -> str:
    return " ".join((s or "").strip().split()).lower()

def split_to_sentences(text: str) -> List[str]:
    parts = re.split(r"(?<=[.!?。！？])\s+", (text or "").strip())
    return [p.strip() for p in parts if p.strip()]

def _jsonl_path(base_path: str) -> str:
    return base_path if base_path.lower().endswith(".jsonl") else base_path + ".jsonl"

def init_jsonl(path: str, truncate: bool) -> None:
    if truncate and os.path.exists(path):
        try: os.remove(path)
        except Exception: pass
    else:
        os.makedirs(os.path.dirname(os.path.abspath(path)), exist_ok=True)

def append_jsonl(path: str, obj: Any) -> None:
    os.makedirs(os.path.dirname(os.path.abspath(path)), exist_ok=True)
    with open(path, "a", encoding="utf-8") as f:
        f.write(json.dumps(obj, ensure_ascii=False) + "\n")
        f.flush()
        try: os.fsync(f.fileno())
        except Exception: pass

def select_by_sim_threshold(anchor: str, pool: List[str], sims: List[float],
                            sim_threshold: float, max_snippets: int = 0,
                            fallback_top_m: int = 3) -> List[str]:
    pairs = [(s, sim) for s, sim in zip(pool, sims) if sim >= sim_threshold]
    if not pairs and fallback_top_m > 0:
        pairs = sorted([(s, sim) for s, sim in zip(pool, sims)], key=lambda x: -x[1])[:fallback_top_m]
    if not pairs:
        return []
    pairs.sort(key=lambda x: -x[1])
    if max_snippets and len(pairs) > max_snippets:
        pairs = pairs[:max_snippets]
    return [s for s, _ in pairs]
