# Original file: https://github.com/jelford/webwatcher/blob/master/src/webwatcher/storage.py

from pathlib import Path
from typing import Collection, Dict
from typing_extensions import Protocol

import base64
from datetime import datetime, date
import json
import os
import shutil
from urllib.parse import urlparse, unquote


class Persistable(Protocol):
    # typet5: () -> Dict[str, Path]
    # type4py: () -> None
    def artefacts(self) -> Dict[str, str]:
        ...

    # typet5: () -> Dict[str, str]
    # type4py: () -> None
    def get_meta_info(self) -> Dict[str, object]:
        ...


class StorageFailureException(Exception):
    def __init__(self, msg):
        self.msg = msg


class Storage:
    def __init__(self, storage_root=None):
        if storage_root is None:
            self._storage_dir = data_folder("storage")
        else:
            self._storage_dir = storage_root

        self._meta_info_path = self._storage_dir / "record.dat"
        self._artefact_storage_dir = self._storage_dir / "artefacts"

    # typet5: (persistable: Persistable) -> None
    # type4py: (persistable: dict) -> None
    def persist(self, persistable: Persistable):
        try:
            os.makedirs(self._artefact_storage_dir)
        except FileExistsError:
            pass

        persisted_locations = dict()
        for name, location in persistable.artefacts().items():
            try:
                storage_filename = _storage_filename_for(location)
                storage_location = self._artefact_storage_dir / storage_filename

                shutil.copy(location, storage_location)
                persisted_locations[name] = storage_location.as_uri()
            except:
                raise StorageFailureException(msg="While persisting {}".format(name))

        meta_info = _json_safe(persistable.get_meta_info())
        if persisted_locations:
            meta_info["_storage"] = persisted_locations

        with open(self._meta_info_path, mode="a", encoding="utf-8") as f:
            f.write(json.dumps(meta_info))
            f.write("\n")

    def find(self, **kwargs):
        return StorageQuery(self, kwargs)


class FromPersistence:
    def __init__(self, data):
        self.data = data

    def __getitem__(self, key):
        return self.data.get(key)

    def fetch_local(self, key):
        if "_storage" not in self.data:
            return None
        elif key not in self.data["_storage"]:
            return None

        storage_url = self.data["_storage"][key]
        local_path = urlparse(storage_url).path
        return unquote(local_path)


class StorageQuery:
    def __init__(self, backing_storage, filter_args):
        self.storage = backing_storage
        self.filter_args = filter_args
        self.required_fields = []
        self.order_fields = []
        self.desc = False

    def having(self, *args):
        self.required_fields = args
        return self

    def order_by(self, *order_fields, desc=False):
        self.order_fields = order_fields
        self.desc = desc
        return self

    def fetch(self):
        try:

            all_data = []
            with open(self.storage._meta_info_path, mode="r", encoding="utf-8") as f:

                for line in f:
                    all_data.append(_de_jsonsafe(json.loads(line)))

        except FileNotFoundError:
            return []

        filtered = [
            d
            for d in all_data
            if _filter_match(self.filter_args, d)
            and all(required in d for required in self.required_fields)
        ]

        sorted_data = sorted(
            filtered, key=lambda d: [d[k] for k in self.order_fields], reverse=self.desc
        )

        return [FromPersistence(d) for d in sorted_data]


_json_dateformat = "%Y-%m-%d %H:%M:%S.%f%z"


# typet5: (jsonsafe_data: dict) -> dict
# type4py: (jsonsafe_data: Dict[str, Any]) -> bool
def _de_jsonsafe(jsonsafe_data) -> Dict[str, object]:
    result = dict()
    for k, v in jsonsafe_data.items():
        if isinstance(v, dict):
            if "__date" in v:
                v = datetime.strptime(v["__date"], _json_dateformat)
        result[k] = v
    return result


def _filter_match(filters, data):
    for filter_key, filter_value in filters.items():
        try:
            if data[filter_key] != filter_value:
                return False
        except KeyError:
            return False
    else:
        return True


def _json_safe(v):
    if type(v) in (str, int, float, bool):
        return v
    if isinstance(v, dict):
        return {k: _json_safe(nested_val) for k, nested_val in v.items()}

    if isinstance(v, datetime):
        return {"__date": v.strftime(_json_dateformat)}

    raise RuntimeError("Don't know how to jsonize: {v}".format(v=type(v)))


def _storage_filename_for(existing_file):
    if os.path.getsize(existing_file) == 0:
        return "_empty_file"

    hasher = file_hash(existing_file)

    return base64.b64encode(hasher.digest(), altchars=b"_-").decode("utf-8")


from hashlib import sha256


def _read_file_chunks(fileobject):
    while True:
        chunk = fileobject.read(8192)
        if not chunk:
            break
        yield chunk


def file_hash(path):

    hasher = sha256()
    try:
        with open(path, "rb") as f:
            for chunk in _read_file_chunks(f):
                hasher.update(chunk)
    except FileNotFoundError:
        raise ComparisonFailureException(
            "Unable to open {path} for hashing".format(path=path)
        )

    return hasher


""" 
environment.py - contains methods for interacting with local
environment, e.g. local cache dirs, state directories, ...
"""
import functools
import os
from pathlib import Path
import tempfile
from typing import Callable

import appdirs


# typet5: (name: str) -> Path
# type4py: (name: str) -> dict
def cache_folder(name) -> Path:
    d = _cache_root() / name
    _ensure_exists(d)
    return d


# typet5: (name: str) -> Path
# type4py: (name: str) -> Dict[str, bool]
def data_folder(name) -> Path:
    d = _data_root() / name
    _ensure_exists(d)
    return d


# typet5: (folder: Path) -> None
# type4py: (folder: pathlib.Path) -> Union[tuple, list]
def _ensure_exists(folder: Path) -> None:
    try:
        os.makedirs(folder)
    except FileExistsError:
        if folder.is_dir():
            return
        raise


# typet5: (env_var_name: str, default_app_dir: Path) -> Path
# type4py: (env_var_name: str, default_app_dir: str) -> Optional[datetime.datetime]
@functools.lru_cache()
def _appdir_with_override(env_var_name: str, default_app_dir: str) -> Path:

    user_supplied_path = os.getenv("WEBWATCHER_{env_var_name}".format(**locals()))

    if user_supplied_path:
        root = Path(user_supplied_path)
    else:
        root = Path(default_app_dir)

    try:
        _ensure_exists(root)
    except:
        root = Path(tempfile.gettempdir()) / "webwatcher"
        _ensure_exists(root)

    return root


# typet5: () -> Path
# type4py: () -> MutableMapping[str, Any]
def _data_root() -> Path:
    r = _appdir_with_override("DATA_ROOT", appdirs.user_data_dir("webwatcher"))
    return r


# typet5: () -> Path
# type4py: () -> bool
def _cache_root() -> Path:
    return _appdir_with_override("CACHE_ROOT", appdirs.user_cache_dir("webwatcher"))
