import asyncio
from meta_researcher.tool.tools.search_engine.log import logger
from meta_researcher.tool.tools.search_engine.base_search import SearchConfig
from typing import Any

from pydantic import BaseModel

class FetchResultMeta(BaseModel):
    raw_content: Any | None = None
    soup: Any | None = None
    url: str | None = None
    new_url: str | None = None
    is_rag: bool | None = None
    special_url: str | None = None
    crawler: list[str] | None = None
    query: str | None = None
    title: str | None = None
    error: str | None = None
    fetch_spend: float | None = None
    extra: dict | None = None


class FetchResult(BaseModel):
    content: Any
    meta: FetchResultMeta

class BaseContentFetch:
    def __init__(self, args: SearchConfig):
        self.args = args

    async def fetch(self, url: str, query: str | None = None) -> tuple[str, dict]:
        raise NotImplementedError

    async def batch_fetch(
        self, urls: list[str], querys: str | None = None
    ) -> list[tuple[str, dict]]:
        tasks = [asyncio.create_task(self.fetch(url, query=querys)) for url in urls]

        try:
            gaf = asyncio.gather(*tasks)
            all_contents = await asyncio.wait_for(
                gaf, self.args.timeout * self.args.max_retry * 0.7
            )
        except (asyncio.TimeoutError, asyncio.CancelledError, Exception):
            e = gaf.exception()  # type: ignore
            logger.warning(f"fetch content gather all tasks timeout{e}")
            all_contents = []
            for task in tasks:
                if task.done():
                    try:
                        all_contents.append(task.result())
                    except (
                        asyncio.TimeoutError,
                        asyncio.CancelledError,
                        Exception,
                    ) as e:
                        logger.warning(f"cancel done task{e}")
                        all_contents.append(("", {}))
                else:
                    logger.warning(f"cancel timeout task")
                    task.cancel()
                    all_contents.append(("", {}))
        return all_contents