from meta_researcher.tool.tools.search_engine.log import logger
import os

import aiohttp
from asyncache import cached as acached
from cachetools import TTLCache

from meta_researcher.tool.tools.search_engine.base_search import SearchConfig
from meta_researcher.tool.tools.search_engine.website_crawler.base_crawler import BaseContentFetch, FetchResult, FetchResultMeta


class JinaContentFetch(BaseContentFetch):
    def __init__(self, args: SearchConfig):
        self.args = args
        self.jina_api_token = (
            os.getenv("JINA_API_TOKEN")
            or os.getenv("JINAAI_API_KEY")
            or "jina_4c7972780b4b471c9d4c2e586b69a35cQRnqvbR9gw_oDjQBkd_UjJNG4mD6"
        )
        if not self.jina_api_token:
            raise ValueError("Missing JINA_API_TOKEN environment variable")

    @acached(cache=TTLCache(maxsize=100, ttl=600))
    async def fetch(self, url: str, query: str | None = None) -> FetchResult:
        meta = FetchResultMeta(
            url=url,
            query=query,
            crawler="jina",
        )
        headers = {
            "Authorization": f"Bearer {self.jina_api_token}",
            "X-Return-Format": "markdown",
        }

        try:
            async with aiohttp.ClientSession() as session:
                async with session.get(f"https://r.jina.ai/{url}", headers=headers) as response:
                    resp = await response.text()
                    return FetchResult(content=resp, meta=meta)
        except Exception as e:
            logger.warning(f"{e}")
            meta.error = str(e)
            return FetchResult(content="", meta=meta)
