"""Blocking PID file lock with timeout and jitter.

Implements a single-writer lock using a sidecar ``*.lock`` file next to a target path
(typically an HDF5 file). Acquisition blocks until either the lock is obtained or a
deadline passes, raising ``LockTimeoutError``.

Notes
-----
- Lock files store JSON metadata: ``pid``, ``host``, ``started_at``.
- A stale PID on the same host is cleaned up opportunistically before retrying.
- Filename convention: ``<target>.lock``.

"""

from __future__ import annotations

import contextlib
import json
import logging
import os
import random
import socket
import time
from dataclasses import dataclass
from pathlib import Path
from typing import Self

logger = logging.getLogger(__name__)


class LockTimeoutError(TimeoutError):
    """Raised when a lock cannot be acquired within the timeout."""


def _lock_path_for(target: Path) -> Path:
    return target.with_name(target.name + ".lock")


@dataclass
class _LockInfo:
    pid: int
    host: str
    started_at: float


class PIDLock:
    """Blocking process-level lock using a PID sidecar file.

    This lock is intended to protect single-writer regions for resources like HDF5 files.

    Args:
        target (str | Path): The target path to guard; the lock file will live next to it.
        timeout (float): Maximum seconds to wait for the lock. Defaults to 300.0.
        poll (float): Base poll interval in seconds. Defaults to 0.25.
        jitter (float): Uniform random jitter added/subtracted from each poll. Defaults to 0.05.

    Usage:
        - Context manager:
            with PIDLock("/path/to/file.h5"):
                # write operations
        - Explicit:
            lock = PIDLock("/path/to/file.h5")
            lock.acquire()
            # write operations
            lock.release()

    """

    def __init__(
        self,
        target: str | Path,
        *,
        timeout: float = 300.0,
        poll: float = 0.25,
        jitter: float = 0.05,
    ) -> None:
        """Initialize the lock.

        Args:
            target (str | Path): The target path to guard; the lock file will live next to it.
            timeout (float, optional): Maximum seconds to wait for the lock. Defaults to 300.0.
            poll (float, optional): Base poll interval in seconds. Defaults to 0.25.
            jitter (float, optional): Uniform random jitter added/subtracted from each poll.
                Defaults to 0.05.

        """
        self.target = Path(target)
        self.lock_path = _lock_path_for(self.target)
        self.timeout = float(timeout)
        self.poll = float(poll)
        self.jitter = float(jitter)
        self._held = False
        self._acquire_started_at: float | None = None

    def __enter__(self) -> Self:
        """Enter the context and acquire the lock."""
        self.acquire()
        return self

    def __exit__(self, exc_type, exc, tb) -> None:  # noqa: ANN001
        """Exit the context and release the lock."""
        self.release()

    def acquire(self) -> None:
        """Acquire the lock, blocking with a deadline.

        Raises:
            LockTimeoutError: If the lock is not obtained before the deadline.

        """
        deadline = time.monotonic() + max(0.0, self.timeout)
        self._acquire_started_at = time.monotonic()

        attempt = 0
        while True:
            attempt += 1
            try:
                self._try_write_lock_file()
                self._held = True
                waited = time.monotonic() - self._acquire_started_at
                logger.debug(
                    "PIDLock acquired after %.3fs (attempt %d): %s",
                    waited,
                    attempt,
                    self.lock_path,
                )
            except FileExistsError:
                # Check for staleness and retry after cleanup if stale
                if self._cleanup_if_stale():
                    continue

                # Reentrant acquire: if we already hold the lock in this PID, succeed
                if self._is_held_by_self():
                    self._held = True
                    return

                # Not stale -> backoff until next poll or timeout
                now = time.monotonic()
                if now >= deadline:
                    break
                sleep_for = max(0.0, self.poll + random.uniform(-self.jitter, self.jitter))  # noqa: S311
                # Clip sleep so we don't overshoot the deadline
                sleep_for = min(sleep_for, max(0.0, deadline - now - 1e-3))
                if sleep_for > 0:
                    time.sleep(sleep_for)
            else:
                return

        waited = time.monotonic() - self._acquire_started_at
        msg = f"Timed out after {waited:.3f}s waiting for lock {self.lock_path}"
        logger.warning(msg)
        raise LockTimeoutError(msg)

    def release(self) -> None:
        """Release the lock if held by this process."""
        if not self._held:
            return
        try:
            with self.lock_path.open("r") as f:
                data = json.load(f)
            if int(data.get("pid", -1)) == os.getpid():
                with contextlib.suppress(FileNotFoundError):
                    self.lock_path.unlink()
        finally:
            self._held = False

    def _try_write_lock_file(self) -> None:
        """Attempt to create the lock file once, raising on contention."""
        flags = os.O_CREAT | os.O_EXCL | os.O_WRONLY
        fd: int | None = None
        try:
            fd = os.open(self.lock_path, flags, 0o644)
            info = _LockInfo(pid=os.getpid(), host=socket.gethostname(), started_at=time.time())
            os.write(fd, json.dumps(info.__dict__).encode("utf-8"))
        finally:
            if fd is not None:
                os.close(fd)

    def _cleanup_if_stale(self) -> bool:
        """Return True if a stale lock was found and cleaned up."""
        try:
            with self.lock_path.open("r") as f:
                data = json.load(f)
        except FileNotFoundError:
            return False
        except Exception:  # noqa: BLE001
            # Corrupted lock; remove
            with contextlib.suppress(FileNotFoundError):
                self.lock_path.unlink()
            return True

        pid = int(data.get("pid", -1))
        host = str(data.get("host", ""))
        if host == socket.gethostname() and pid > 0:
            try:
                os.kill(pid, 0)  # Does not send a signal, just checks if the process exists
            # Process no longer exists
            except ProcessLookupError:
                with contextlib.suppress(FileNotFoundError):
                    self.lock_path.unlink()
                return True
        return False

    def _is_held_by_self(self) -> bool:
        try:
            with self.lock_path.open("r") as f:
                data = json.load(f)
        except Exception:  # noqa: BLE001
            return False
        pid = int(data.get("pid", -1))
        return pid == os.getpid()
