import asyncio
from meta_researcher.tool.tools.search_engine.log import logger
from urllib.parse import urlparse

import httpx
from asyncache import cached as acached
from cachetools import TTLCache

from meta_researcher.tool.tools.search_engine.base_search import BaseSearch, SearchConfig


class BochaSearch(BaseSearch):
    def __init__(self, args: SearchConfig):
        super().__init__(args)
        self.topk = args.topk
        self.pages = args.pages
        self.name = "BochaSearch"

    async def _search(self, query: str) -> dict:
        bocha_search_url = "https://api.bochaai.com/v1/web-search"
        params = {"query": query, "summary": True, "count": self.topk, "page": self.pages}
        headers = {
            "Authorization": f"Bearer {self.args.bocha_search_key}",
            "Content-Type": "application/json",
        }
        async with httpx.AsyncClient(timeout=None) as client:
            response = await client.post(bocha_search_url, headers=headers, json=params)
            response.raise_for_status()
            return response.json()

    def _parse_response(self, response) -> list[dict]:
        webpages = {w["id"]: w for w in response.get("data", {}).get("webPages", {}).get("value", [])}
        results = []
        for item in response.get("data", {}).get("webPages", {}).get("value", []):
            if item:
                webpage = webpages.get(item["id"])
                if webpage:
                    url_info = urlparse(webpage["url"])
                    icon = webpage["siteIcon"]
                    if not icon:
                        icon = f"{url_info.scheme}://{url_info.netloc}/favicon.ico"
                    site_name = webpage.get("siteName", "")
                    if not site_name:
                        site_name = url_info.netloc
                    results.append(
                        {
                            "url": webpage["url"],
                            "summ": webpage.get("snippet", ""),
                            "content": webpage.get("summary", ""),
                            "title": webpage.get("name", ""),
                            "site_name": site_name,
                            "icon": icon,
                            "published_date": webpage.get("dateLastCrawled", ""),
                            "meta": {
                                "from": self.name,
                            },
                        }
                    )
        return self._filter_results(results)

    @acached(cache=TTLCache(maxsize=100, ttl=600))
    async def search(self, query: str) -> list[dict]:
        for attempt in range(self.args.max_retry):
            try:
                response = await self._search(query)
                if len(response.get("data", {}).get("webPages", {}).get("value", {})) == 0:
                    raise ValueError(f"call {self.name} failed.")
                return self._parse_response(response)
            except Exception as e:
                logger.warning(str(e))
                await asyncio.sleep(1)
        raise Exception("Failed to get search results from Bing Search after retries.")
