import atexit
from collections import OrderedDict
import hashlib
import json
import os
import threading
import time
from typing import Any, Dict, Optional


class WebPageCache:
    """
    A simple persistent LRU cache for webpage content, modeled after ASearcher.
    - Keys are md5(url)
    - Values store url, content, timestamp
    - Periodically saved to a JSON file preserving LRU order
    """

    def __init__(self, max_size: int = 100000, cache_file: str = "./webpage_cache.json", save_interval: int = 10):
        self.max_size = max_size
        self.cache_file = cache_file
        self.cache: "OrderedDict[str, Dict[str, Any]]" = OrderedDict()
        self.lock = threading.Lock()
        self.stats = {"hits": 0, "misses": 0, "evictions": 0}
        self.save_interval = save_interval
        self.operations_since_save = 0

        self.load_from_file()
        atexit.register(self.save_to_file)

    def _generate_cache_key(self, url: str) -> str:
        return hashlib.md5(url.encode()).hexdigest()

    def put(self, url: str, content: str):
        if not url or not content:
            return

        cache_key = self._generate_cache_key(url)
        with self.lock:
            if cache_key in self.cache:
                del self.cache[cache_key]

            while len(self.cache) >= self.max_size:
                self.cache.popitem(last=False)
                self.stats["evictions"] += 1

            self.cache[cache_key] = {
                "url": url,
                "content": content,
                "timestamp": time.time(),
            }

            self.operations_since_save += 1
            if self.operations_since_save >= self.save_interval:
                self.operations_since_save = 0
                threading.Thread(target=self._background_save, daemon=True).start()

    def get(self, url: str) -> Optional[str]:
        cache_key = self._generate_cache_key(url)
        with self.lock:
            if cache_key in self.cache:
                entry = self.cache.pop(cache_key)
                self.cache[cache_key] = entry  # move to end (MRU)
                self.stats["hits"] += 1
                return entry.get("content")
            else:
                self.stats["misses"] += 1
                return None

    def has(self, url: str) -> bool:
        cache_key = self._generate_cache_key(url)
        with self.lock:
            return cache_key in self.cache

    def clear(self):
        with self.lock:
            self.cache.clear()
            self.stats = {"hits": 0, "misses": 0, "evictions": 0}
            self.operations_since_save = 0

    def force_save(self):
        self.save_to_file()
        self.operations_since_save = 0

    def get_stats(self) -> Dict[str, Any]:
        with self.lock:
            total_requests = self.stats["hits"] + self.stats["misses"]
            hit_rate = self.stats["hits"] / total_requests if total_requests > 0 else 0
            return {
                "cache_size": len(self.cache),
                "max_size": self.max_size,
                "hits": self.stats["hits"],
                "misses": self.stats["misses"],
                "evictions": self.stats["evictions"],
                "hit_rate": hit_rate,
                "total_requests": total_requests,
            }

    def _background_save(self):
        try:
            self.save_to_file()
        except Exception as e:
            print(f"[ERROR] WebPageCache: Background save failed: {e}")

    def save_to_file(self):
        try:
            with self.lock:
                ordered_cache = []
                for key, value in self.cache.items():
                    ordered_cache.append((key, value))

                cache_data = {
                    "cache_ordered": ordered_cache,
                    "stats": self.stats,
                    "max_size": self.max_size,
                    "saved_at": time.time(),
                }

            # Ensure directory exists for absolute paths like /workspace/webcache/webpage_cache.json
            try:
                dirpath = os.path.dirname(self.cache_file)
                if dirpath:
                    os.makedirs(dirpath, exist_ok=True)
            except Exception:
                pass

            with open(self.cache_file, "w", encoding="utf-8") as f:
                json.dump(cache_data, f, indent=2, ensure_ascii=False)

            print(f"[DEBUG] WebPageCache: Saved {len(self.cache)} entries to {self.cache_file}")

        except Exception as e:
            print(f"[ERROR] WebPageCache: Failed to save cache to {self.cache_file}: {e}")

    def load_from_file(self):
        if not os.path.exists(self.cache_file):
            print(f"[DEBUG] WebPageCache: No existing cache file {self.cache_file}, starting fresh")
            return

        try:
            with open(self.cache_file, "r", encoding="utf-8") as f:
                cache_data = json.load(f)

            with self.lock:
                if "cache_ordered" in cache_data:
                    ordered_cache = cache_data["cache_ordered"]
                    self.cache = OrderedDict(ordered_cache)
                    print("[DEBUG] WebPageCache: Loaded ordered cache format")
                else:
                    loaded_cache = cache_data.get("cache", {})
                    self.cache = OrderedDict(loaded_cache)
                    print("[DEBUG] WebPageCache: Loaded legacy cache format (LRU order may be lost)")

                self.stats = cache_data.get("stats", {"hits": 0, "misses": 0, "evictions": 0})

                while len(self.cache) > self.max_size:
                    self.cache.popitem(last=False)
                    self.stats["evictions"] += 1

            saved_at = cache_data.get("saved_at", 0)
            saved_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(saved_at))
            print(
                f"[DEBUG] WebPageCache: Loaded {len(self.cache)} entries from {self.cache_file} (saved at {saved_time})"
            )

        except Exception as e:
            print(f"[ERROR] WebPageCache: Failed to load cache from {self.cache_file}: {e}")
            with self.lock:
                self.cache = OrderedDict()
                self.stats = {"hits": 0, "misses": 0, "evictions": 0}
