""""""

from __future__ import annotations

import hashlib
import zipfile
from pathlib import Path
from typing import Dict
from urllib.request import urlretrieve

from . import base

ZENODO_URL = "https://zenodo.org/record/4725906/files/avmnist.zip?download=1"
ZENODO_MD5 = "1a9dc27d86c55c14bb54e0f979f74141"


def _md5(path: Path) -> str:
    h = hashlib.md5()
    with path.open("rb") as f:
        for chunk in iter(lambda: f.read(8192), b""):
            h.update(chunk)
    return h.hexdigest()


def download_and_prepare(root: str | Path | None = None, download: bool = True) -> Dict:
    """\nroot: str | Path | None\ndownload: bool\n    """
    data_root = base.ensure_dir(root or base.default_data_root() / "avmnist")
    zip_path = data_root / "avmnist.zip"

    if download and not zip_path.exists():
        urlretrieve(ZENODO_URL, zip_path)

    if zip_path.exists():
        md5 = _md5(zip_path)
        if md5 != ZENODO_MD5:
            raise RuntimeError(f"MD5 mismatch for AV-MNIST zip (got {md5}, expected {ZENODO_MD5})")

        extract_dir = data_root / "raw"
        if not extract_dir.exists():
            with zipfile.ZipFile(zip_path, "r") as zf:
                zf.extractall(extract_dir)
    else:
        raise FileNotFoundError(f"{zip_path} not found. Set download=True to fetch.")

    train_dir = next(data_root.glob("raw/**/train"), None)
    test_dir = next(data_root.glob("raw/**/test"), None)
    if train_dir is None or test_dir is None:
        raise RuntimeError("Could not locate train/test folders after extraction.")

    splits = {"train": train_dir, "test": test_dir}
    return base.build_metadata("AVMNIST", data_root, splits)


__all__ = ["download_and_prepare", "ZENODO_URL"]
