#!/usr/bin/env python3
"""Export Supabase Go review rows reviewed by players ranked 1 dan or higher.

Required environment variables:
  SUPABASE_URL
  SUPABASE_KEY

The key can be an anon key if RLS permits reads, otherwise use a service-role key.
"""

from __future__ import annotations

import argparse
import csv
import json
import os
import re
import sys
import urllib.parse
import urllib.request
from pathlib import Path
from typing import Any


DEFAULT_COLUMNS = [
    "id",
    "game_file",
    "move_number",
    "comment",
    "reviewer",
    "reviewer_rank",
    "reviewer_strength",
    "board_state",
    "embedding",
]


def is_rank_1d_or_higher(rank: Any, strength: Any = None) -> bool:
    """Return True for 1d+ ranks.

    Handles explicit Go dan rank forms in reviewer_rank like 1d through 7d.
    Blank/null ranks and non-dan ranks are skipped.
    """
    if rank is not None:
        text = str(rank).strip().lower()
        if text and text not in {"null", "none", "nan"}:
            dan_match = re.fullmatch(r"([1-7])d", text)
            if dan_match:
                return int(dan_match.group(1)) >= 1
    return False


def supabase_get(
    url: str,
    key: str,
    table: str,
    columns: list[str],
    page_size: int,
    offset: int,
) -> list[dict[str, Any]]:
    endpoint = url.rstrip("/") + "/rest/v1/" + urllib.parse.quote(table)
    params = {
        "select": ",".join(columns),
        "order": "id.asc",
        "limit": str(page_size),
        "offset": str(offset),
    }
    request_url = endpoint + "?" + urllib.parse.urlencode(params)
    request = urllib.request.Request(
        request_url,
        headers={
            "apikey": key,
            "Authorization": f"Bearer {key}",
            "Accept": "application/json",
        },
    )
    with urllib.request.urlopen(request, timeout=60) as response:
        return json.loads(response.read().decode("utf-8"))


def write_jsonl(path: Path, rows: list[dict[str, Any]]) -> None:
    with path.open("w", encoding="utf-8") as handle:
        for row in rows:
            handle.write(json.dumps(row, ensure_ascii=False) + "\n")


def write_csv(path: Path, rows: list[dict[str, Any]], columns: list[str]) -> None:
    with path.open("w", newline="", encoding="utf-8") as handle:
        writer = csv.DictWriter(handle, fieldnames=columns, extrasaction="ignore")
        writer.writeheader()
        writer.writerows(rows)


def main() -> int:
    parser = argparse.ArgumentParser()
    parser.add_argument("--table", required=True, help="Supabase table name.")
    parser.add_argument("--out-dir", default="openairft")
    parser.add_argument("--page-size", type=int, default=1000)
    parser.add_argument("--columns", nargs="+", default=DEFAULT_COLUMNS)
    args = parser.parse_args()

    url = os.environ.get("SUPABASE_URL")
    key = os.environ.get("SUPABASE_KEY")
    if not url or not key:
        print("Missing SUPABASE_URL or SUPABASE_KEY.", file=sys.stderr)
        return 2

    all_rows: list[dict[str, Any]] = []
    offset = 0
    while True:
        rows = supabase_get(url, key, args.table, args.columns, args.page_size, offset)
        if not rows:
            break
        all_rows.extend(rows)
        print(f"Fetched {len(all_rows)} rows...", file=sys.stderr)
        if len(rows) < args.page_size:
            break
        offset += args.page_size

    filtered = [
        row
        for row in all_rows
        if is_rank_1d_or_higher(row.get("reviewer_rank"), row.get("reviewer_strength"))
    ]

    out_dir = Path(args.out_dir)
    out_dir.mkdir(parents=True, exist_ok=True)
    jsonl_path = out_dir / "dan_reviews_1d_plus.jsonl"
    csv_path = out_dir / "dan_reviews_1d_plus.csv"
    meta_path = out_dir / "dan_reviews_1d_plus.meta.json"

    write_jsonl(jsonl_path, filtered)
    write_csv(csv_path, filtered, args.columns)
    meta_path.write_text(
        json.dumps(
            {
                "table": args.table,
                "total_rows_fetched": len(all_rows),
                "rows_rank_1d_plus": len(filtered),
                "columns": args.columns,
            },
            indent=2,
        )
        + "\n",
        encoding="utf-8",
    )

    print(f"Saved {len(filtered)} rows to {jsonl_path}")
    print(f"Saved CSV to {csv_path}")
    print(f"Saved metadata to {meta_path}")
    return 0


if __name__ == "__main__":
    raise SystemExit(main())
