# config.py
from __future__ import annotations

# --- Reproducibility path setup (auto-generated for structured bundle) ---
from pathlib import Path as _Path
_REPO_ROOT = _Path(__file__).resolve().parent.parent  # code/ -> repo root
# -----------------------------------------------------------------------

import os
from dataclasses import dataclass
from pathlib import Path
from typing import List

from env_loader import load_dotenv

# Load .env early (silent no-op if missing)
ROOT = Path(__file__).resolve().parent
load_dotenv(ROOT / ".env")

# -------------------------
# Paths
# -------------------------
DATA_DIR = _REPO_ROOT / "data"
RUNS_DIR = DATA_DIR / "runs"
AUDIT_DIR = DATA_DIR / "audit"

TRAPS_JSONL = DATA_DIR / "eduframetrap_traps.jsonl"
DEV_JSONL = DATA_DIR / "eduframetrap_dev.jsonl"
TEST_JSONL = DATA_DIR / "eduframetrap_test.jsonl"

# Ensure base dirs exist
DATA_DIR.mkdir(parents=True, exist_ok=True)
RUNS_DIR.mkdir(parents=True, exist_ok=True)
AUDIT_DIR.mkdir(parents=True, exist_ok=True)

# -------------------------
# Helpers
# -------------------------
def _env_int(name: str, default: int) -> int:
    v = os.getenv(name, "").strip()
    return int(v) if v else default


def _env_float(name: str, default: float) -> float:
    v = os.getenv(name, "").strip()
    return float(v) if v else default


def _env_str(name: str, default: str) -> str:
    v = os.getenv(name, "").strip()
    return v if v else default


def _env_csv_ints(name: str, default: List[int]) -> List[int]:
    v = os.getenv(name, "").strip()
    if not v:
        return list(default)
    out: List[int] = []
    for part in v.split(","):
        part = part.strip()
        if part:
            out.append(int(part))
    return out if out else list(default)

# -------------------------
# Dataset / run knobs
# -------------------------
RANDOM_SEED = _env_int("RANDOM_SEED", 1337)

# Use snake_case domain IDs to avoid spaces/punctuation issues in
# file names, grouping keys, plotting, and prompt templating.
DOMAINS = [
    "math",
    "physics",
    "economics",
    "chemistry",
    "biology",
    "computer_science",
]

# Prefer explicit per-domain counts to avoid silent mistakes when
# changing the number of domains.
# Default: 60 traps per domain -> 360 total for 6 domains.
N_TRAP_FAMILIES_PER_DOMAIN = _env_int("N_TRAP_FAMILIES_PER_DOMAIN", 60)
N_TRAP_FAMILIES_TOTAL = N_TRAP_FAMILIES_PER_DOMAIN * max(1, len(DOMAINS))

DEV_FRACTION = _env_float("DEV_FRACTION", 0.3)

# Confidence levels: 1,2,3 (low/med/high)
CONFIDENCE_LEVELS = _env_csv_ints("CONFIDENCE_LEVELS", [1, 2, 3])

SLEEP_BETWEEN_CALLS_SEC = _env_float("SLEEP_BETWEEN_CALLS_SEC", 0.4)
MAX_RETRIES = _env_int("MAX_RETRIES", 3)

# Temperatures (builder/validator/judges/tutors)
OPENAI_TEMP_BUILDER = _env_float("OPENAI_TEMP_BUILDER", 0.2)

# build_dataset.py expects VALIDATOR_TEMP by name:
VALIDATOR_TEMP = _env_float("VALIDATOR_TEMP", 0.0)  # <-- REQUIRED name
# Optional alias (backward compatibility)
OPENAI_TEMP_VALIDATOR = VALIDATOR_TEMP

TUTOR_TEMP = _env_float("TUTOR_TEMP", 0.2)
JUDGE_TEMP = _env_float("JUDGE_TEMP", 0.0)

TUTOR_MODE = _env_str("TUTOR_MODE", "baseline").strip().lower()

# -------------------------
# Canonical label set (keep consistent across prompts / label_app / merge)
# -------------------------
ALLOWED_LABELS = ["PASS", "CS-SYC", "AUTH-SYC", "FACE-SYC", "DIR-SYC", "EVADE"]
DISAGREE_LABEL = "DISAGREE"
PENDING_HUMAN_LABEL = "PENDING_HUMAN"

# -------------------------
# Models
# -------------------------
OPENAI_API_KEY = _env_str("OPENAI_API_KEY", "")
ANTHROPIC_API_KEY = _env_str("ANTHROPIC_API_KEY", "")

# Baseline OpenAI model (small/cheap)
OPENAI_MODEL_MAIN = _env_str("OPENAI_MODEL_MAIN", "gpt-5.2-2025-12-11")

# Advanced OpenAI tutor model (GPT-5). Override in .env if needed.

#OPENAI_MODEL_ADVANCED = _env_str("OPENAI_MODEL_ADVANCED", "gpt-5.2-2025-12-11")

ANTHROPIC_MODEL_MAIN = _env_str("ANTHROPIC_MODEL_MAIN", "claude-3-5-sonnet-20241022")

# -------------------------
# Model specs
# -------------------------
@dataclass(frozen=True)
class ModelSpec:
    vendor: str  # "openai" or "anthropic"
    model: str
    max_tokens: int


# dataset builder / validator (keep these on baseline OpenAI unless you want to pay more)
BUILDER = ModelSpec(vendor="openai", model=OPENAI_MODEL_MAIN, max_tokens=1200)
VALIDATOR = ModelSpec(vendor="openai", model=OPENAI_MODEL_MAIN, max_tokens=900)

# tutors to evaluate
EVALUATED_TUTORS: List[ModelSpec] = [
    # OpenAI tutor
    ModelSpec(vendor="openai", model=OPENAI_MODEL_MAIN, max_tokens=1200),

    # Anthropic tutor
    ModelSpec(vendor="anthropic", model=ANTHROPIC_MODEL_MAIN, max_tokens=1200),
]

# judges (2-judge setup) - keep stable for comparability
JUDGE_A = ModelSpec(vendor="openai", model=OPENAI_MODEL_MAIN, max_tokens=900)
JUDGE_B = ModelSpec(vendor="anthropic", model=ANTHROPIC_MODEL_MAIN, max_tokens=900)

# Present for backward compatibility only (your updated run_eval should not use it)
TIE_BREAK = ModelSpec(vendor="openai", model=OPENAI_MODEL_MAIN, max_tokens=900)
