#!/usr/bin/env python3
"""
Clip YouTube-sourced video segments based on a JSON specification.

Features:
- Reads a JSON file containing a list of objects.
- For each object where meta_data.source == "YouTube":
  * Extracts the YouTube video ID from meta_data.url.
  * Looks for a source video named "<YouTubeID>.mp4" in the source directory.
  * Cuts the segment defined by meta_data.interval [start, end] (in seconds).
  * Saves the clipped video using the first element of input_path into the target directory.

Dependencies:
- Requires ffmpeg installed and available in PATH.

Usage:
    python clip_youtube_segments.py data.json /path/to/source_videos /path/to/target_clips
"""

import json
import re
import argparse
import subprocess
from pathlib import Path
from typing import Optional, List, Dict, Any

YOUTUBE_ID_PATTERNS = [
    r"(?:v=)([A-Za-z0-9_-]{6,})",
    r"(?:youtu\.be/)([A-Za-z0-9_-]{6,})",
    r"(?:youtube\.com/embed/)([A-Za-z0-9_-]{6,})"
]

def extract_youtube_id(url: str) -> Optional[str]:
    """Extract YouTube video ID from a URL."""
    for pattern in YOUTUBE_ID_PATTERNS:
        m = re.search(pattern, url)
        if m:
            return m.group(1)
    return None

def run_ffmpeg_clip(src: Path, dst: Path, start: float, end: float) -> bool:
    """Clip a segment using ffmpeg. Returns True on success."""
    if not src.exists():
        print(f"[ERROR] Source video not found: {src}")
        return False
    if end <= start:
        print(f"[WARN] Invalid interval (end <= start): {start} - {end} (skipped)")
        return False

    duration = end - start
    dst.parent.mkdir(parents=True, exist_ok=True)

    cmd = [
        "ffmpeg",
        "-hide_banner",
        "-loglevel", "error",
        "-ss", f"{start:.3f}",
        "-i", str(src),
        "-t", f"{duration:.3f}",
        "-c", "copy",
        str(dst)
    ]
    try:
        subprocess.run(cmd, check=True)
        print(f"[OK] Created clip: {dst.name}  ({start:.3f}s -> {end:.3f}s)")
        return True
    except subprocess.CalledProcessError as e:
        print(f"[ERROR] ffmpeg failed for {src.name}: {e}")
        return False

def process_item(item: Dict[str, Any], src_dir: Path, tgt_dir: Path) -> Optional[Path]:
    """Process a single JSON item if it is YouTube-sourced."""
    meta = item.get("meta_data") or {}
    source_type = meta.get("source")
    if source_type != "YouTube":
        return None

    url = meta.get("url")
    interval = meta.get("interval") or []
    if len(interval) != 2:
        print(f"[WARN] Invalid interval format: {interval}")
        return None
    start, end = interval
    try:
        start = float(start)
        end = float(end)
    except (TypeError, ValueError):
        print(f"[WARN] Non-numeric interval: {interval}")
        return None

    yt_id = extract_youtube_id(url or "")
    if not yt_id:
        print(f"[WARN] Could not extract YouTube ID from URL: {url}")
        return None

    src_video = src_dir / f"{yt_id}.mp4"

    input_path = item.get("input_path") or []
    if not input_path:
        print(f"[WARN] No input_path specified for item with YouTube ID {yt_id}")
        return None

    out_name = input_path[0]
    dst_video = tgt_dir / out_name

    if dst_video.exists():
        print(f"[INFO] Output already exists, skipping: {dst_video.name}")
        return dst_video

    success = run_ffmpeg_clip(src_video, dst_video, start, end)
    return dst_video if success else None

def load_json(path: Path) -> List[Dict[str, Any]]:
    with path.open("r", encoding="utf-8") as f:
        data = json.load(f)
    if isinstance(data, dict):
        # Allow single-object JSON by wrapping it
        data = [data]
    if not isinstance(data, list):
        raise ValueError("JSON root must be a list or an object.")
    return data

def main():
    parser = argparse.ArgumentParser(description="Clip YouTube video segments based on JSON spec.")
    parser.add_argument("json_path", help="Path to the input JSON file.")
    parser.add_argument("source_dir", help="Directory containing source YouTube MP4 files.")
    parser.add_argument("target_dir", help="Directory where clipped segments will be saved.")
    args = parser.parse_args()

    json_path = Path(args.json_path)
    src_dir = Path(args.source_dir)
    tgt_dir = Path(args.target_dir)

    if not json_path.exists():
        print(f"[FATAL] JSON file not found: {json_path}")
        return
    if not src_dir.exists():
        print(f"[FATAL] Source directory not found: {src_dir}")
        return
    tgt_dir.mkdir(parents=True, exist_ok=True)

    try:
        items = load_json(json_path)
    except Exception as e:
        print(f"[FATAL] Failed to load JSON: {e}")
        return

    created = 0
    processed = 0
    for item in items:
        result = process_item(item, src_dir, tgt_dir)
        if result:
            created += 1
        processed += 1

    print(f"\nSummary:")
    print(f"  Total items in JSON: {len(items)}")
    print(f"  Processed (YouTube or attempted): {processed}")
    print(f"  Clips created: {created}")
    print(f"  Target directory: {tgt_dir}")

if __name__ == "__main__":
    main()
