#!/usr/bin/env python3
"""
process_original_train.py

Runs the following scripts in order (from the project root):
  1) clickhouse/process_embeddings.py
  2) clickhouse/utils/check_collections.py
  3) clickhouse/utils/test_choose.py

Loads config/config.py if present by ensuring the project root is on PYTHONPATH
for all child processes.
"""

from __future__ import annotations

import os
import sys
import subprocess
from pathlib import Path
from typing import Optional


def find_project_root(start: Optional[Path] = None) -> Path:
    """
    Walk upward from `start` (or this file) to find a directory that looks like the repo root:
    - contains 'clickhouse' or 'config' directory.
    """
    here = (start or Path(__file__).resolve()).parent
    for p in [here] + list(here.parents):
        if (p / "clickhouse").is_dir() or (p / "config").is_dir():
            return p
    # Fallback: assume two levels up from this file (e.g., repo/orchestrator/this_file.py)
    return Path(__file__).resolve().parents[1]


# Figure out project root and make imports work for *this* process
project_root = find_project_root()
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))


def run_script(script_path: Path) -> None:
    if not script_path.exists():
        print(f"[ERROR] Script not found: {script_path}", file=sys.stderr)
        sys.exit(1)

    rel = script_path.relative_to(project_root) if script_path.is_absolute() else script_path
    print(f"[INFO] Running {rel} ...")

    # Ensure child processes can import top-level packages (e.g., config.config)
    env = dict(os.environ)
    env["PYTHONPATH"] = (
        f"{str(project_root)}{os.pathsep}{env.get('PYTHONPATH', '')}".rstrip(os.pathsep)
    )

    # Run from the project root so relative paths inside scripts behave as expected
    result = subprocess.run(
        [sys.executable, str(script_path)],
        cwd=str(project_root),
        env=env,
        check=False,
    )

    if result.returncode != 0:
        print(f"[ERROR] {script_path.name} failed with code {result.returncode}", file=sys.stderr)
        sys.exit(result.returncode)

    print(f"[INFO] Finished {script_path.name}")


# Try to import config (optional) in *this* process so we log whether it's present
try:
    import config.config  # noqa: F401
    print("[INFO] Loaded config/config.py")
except Exception as e:
    print(f"[WARN] Could not import config/config.py ({e}). Continuing without it.")


# Build absolute script paths from the project root
SCRIPTS = [
    project_root / "clickhouse" / "process_embeddings.py",
    project_root / "clickhouse" / "utils" / "check_collections.py",
    project_root / "clickhouse" / "utils" / "test_choose.py",
]


def main() -> None:
    for script in SCRIPTS:
        run_script(script)
    print("[INFO] All scripts completed successfully.")


if __name__ == "__main__":
    main()