import os
import xml.etree.ElementTree as ET
from typing import Any, Dict, List, Literal, Optional, TypeAlias, Union
from functools import wraps
import time
import requests

from configs import logger


def retry(max_retries=3, delay=1):
    def decorator(func):
        @wraps(func)
        def wrapper(*args, **kwargs):
            retries = 0
            while retries < max_retries:
                try:
                    result = func(*args, **kwargs)
                    # Check if result is empty or contains error
                    if (result and not (isinstance(result, (list, dict)) and len(result) == 0) and
                            not (isinstance(result, dict) and "error" in result)):
                        return result
                except Exception as e:
                    if retries == max_retries - 1:
                        return {"error": f"Max retries reached. Last error: {str(e)}"}
                retries += 1
                time.sleep(delay)
            return {"error": "Max retries reached. No valid result found."}
        return wrapper

    return decorator


class CSWikipediaSearch:
    def __init__(self):
        self.tool_json_schema = {
            "name": 'search_wikipedia',
            "description": 'Searches Wikipedia.',
            "parameters": {
                "type": "object",
                "properties": {
                    "entity": {
                        "type": "string",
                        "description": "The topic, concept, or page title for Wikipedia."
                    }
                },
                "required": ["entity"]
            }
        }

    @retry()
    async def call_tool(self, arguments: dict, **kwargs):
        r"""Search the entity in WikiPedia and return the summary of the
            required page, containing factual information about
            the given entity.

        Args:
            entity (str): The entity to be searched.

        Returns:
            str: The search result. If the page corresponding to the entity
                exists, return the summary of this entity in a string.
        """
        import wikipedia
        result: str

        is_success = True
        try:
            entity = arguments["entity"]
            result = wikipedia.summary(entity, sentences=5, auto_suggest=False)
        except wikipedia.exceptions.DisambiguationError as e:
            result = wikipedia.summary(
                e.options[0], sentences=5, auto_suggest=False
            )
            is_success = False
        except wikipedia.exceptions.PageError:
            result = (
                "Error: There is no page in Wikipedia corresponding to entity "
                f"{entity}, please specify another word to describe the"
                " entity to be searched."
            )
            is_success = False
        return result, is_success


class CSGoogleSearch:
    def __init__(self, GOOGLE_API_KEY, SEARCH_ENGINE_ID):
        # https://developers.google.com/custom-search/v1/overview
        # https://cse.google.com/cse/all
        self.GOOGLE_API_KEY = GOOGLE_API_KEY
        self.SEARCH_ENGINE_ID = SEARCH_ENGINE_ID
        self.tool_json_schema = {
            "name": 'search_google',
            "description": 'Searches Google.',
            "parameters": {
                "type": "object",
                "properties": {
                    "query": {
                        "type": "string",
                        "description": "The search term or question for Google."
                    }
                },
                "required": ["query"]
            }
        }

    @retry()
    async def call_tool(self, arguments: dict, **kwargs):
        r"""Use Google search engine to search information for the given query.

        Args:
            query (str): The query to be searched.

        Returns:
            List[Dict[str, Any]]: A list of dictionaries where each dictionary
            represents a website.
                Each dictionary contains the following keys:
                - 'result_id': A number in order.
                - 'title': The title of the website.
                - 'description': A brief description of the website.
                - 'long_description': More detail of the website.
                - 'url': The URL of the website.

                Example:
                {
                    'result_id': 1,
                    'title': 'OpenAI',
                    'description': 'An organization focused on ensuring that
                    artificial general intelligence benefits all of humanity.',
                    'long_description': 'OpenAI is a non-profit artificial
                    intelligence research company. Our goal is to advance
                    digital intelligence in the way that is most likely to
                    benefit humanity as a whole',
                    'url': 'https://www.openai.com'
                }
            title, description, url of a website.
        """
        import requests

        query = arguments["query"]

        # Using the first page
        start_page_idx = 1
        # Different language may get different result
        search_language = "en"
        # How many pages to return
        num_result_pages = 3
        # Constructing the URL
        # Doc: https://developers.google.com/custom-search/v1/using_rest
        url = (
            f"https://www.googleapis.com/customsearch/v1?"
            f"key={self.GOOGLE_API_KEY}&cx={self.SEARCH_ENGINE_ID}&q={query}&start="
            f"{start_page_idx}&lr={search_language}&num={num_result_pages}"
            f"&gl=us"
        )

        responses = []
        # Fetch the results given the URL
        # Make the get
        result = requests.get(url)
        data = result.json()

        is_success = True
        # Get the result items
        if "items" in data:
            search_items = data.get("items")
            # Iterate over 10 results found
            for i, search_item in enumerate(search_items, start=1):
                # Check metatags are present
                if "pagemap" not in search_item:
                    continue
                if "metatags" not in search_item["pagemap"]:
                    continue
                if ("og:description" in search_item["pagemap"]["metatags"][0]):
                    long_description = search_item["pagemap"]["metatags"][0]["og:description"]
                else:
                    long_description = "N/A"
                # Get the page title
                title = search_item.get("title")
                # Page snippet
                snippet = search_item.get("snippet")

                # Extract the page url
                link = search_item.get("link")
                response = {
                    "result_id": i,
                    "title": title,
                    "description": snippet,
                    "long_description": long_description,
                    "url": link
                }
                responses.append(response)
        else:
            is_success = False
            return f"Error: Google search failed. 'items' not in data: {data}", is_success

        # If no answer found, return an empty list
        # logger.info(f'Google search execute result = {responses}')
        return responses, is_success


class CSWolframAlphaSearch:
    def __init__(self, WOLFRAMALPHA_APP_ID):
        # https://products.wolframalpha.com/api
        self.WOLFRAMALPHA_APP_ID = WOLFRAMALPHA_APP_ID
        self.tool_json_schema = {
            "name": 'search_wolfram_alpha',
            "description": 'Solves complex math, answers factual questions, and gets real-time data from Wolfram Alpha.',
            "parameters": {
                "type": "object",
                "properties": {
                    "query": {
                        "type": "string",
                        "description": "The question or calculation for Wolfram Alpha."
                    },
                    "is_detailed": {
                        "type": "boolean",
                        "description": "If `true`, requests a detailed, step-by-step solution. Defaults to `false`."
                    }
                },
                "required": ["query"]
            }
        }

    @retry()
    async def call_tool(self, arguments: dict, **kwargs):
        r"""Queries Wolfram|Alpha and returns the result. Wolfram|Alpha is an
        answer engine developed by Wolfram Research. It is offered as an online
        service that answers factual queries by computing answers from
        externally sourced data.

        Args:
            query (str): The query to send to Wolfram Alpha.
            is_detailed (bool): Whether to include additional details
                including step by step information in the result.
                (default: :obj:`False`)

        Returns:
            Union[str, Dict[str, Any]]: The result from Wolfram Alpha.
                Returns a string if `is_detailed` is False, otherwise returns
                a dictionary with detailed information.
        """
        import wolframalpha
        query = arguments["query"]
        is_detailed = arguments.get("is_detailed", False)

        # try:
        client = wolframalpha.Client(self.WOLFRAMALPHA_APP_ID)
        res = await client.aquery(query)

        # except Exception as e:
        #     return f"Error: {str(e)}"

        is_success = True
        parsed_result = self._parse_wolfram_result(res)

        if is_detailed:
            step_info = self._get_wolframalpha_step_by_step_solution(
                self.WOLFRAMALPHA_APP_ID, query
            )
            parsed_result["steps"] = step_info
            return parsed_result, is_success

        return parsed_result["final_answer"], is_success

    def _parse_wolfram_result(self, result) -> Dict[str, Any]:
        r"""Parses a Wolfram Alpha API result into a structured dictionary
        format.

        Args:
            result: The API result returned from a Wolfram Alpha
                query, structured with multiple pods, each containing specific
                information related to the query.

        Returns:
            dict: A structured dictionary with the original query and the
                final answer.
        """

        # Extract the original query
        query = result.get("@inputstring", "")

        # Initialize a dictionary to hold structured output
        output = {"query": query, "pod_info": [], "final_answer": None}

        # Loop through each pod to extract the details
        for pod in result.get("pod", []):
            # Handle the case where subpod might be a list
            subpod_data = pod.get("subpod", {})
            if isinstance(subpod_data, list):
                # If it's a list, get the first item for 'plaintext' and 'img'
                description, image_url = next(
                    (
                        (data["plaintext"], data["img"])
                        for data in subpod_data
                        if "plaintext" in data and "img" in data
                    ),
                    ("", ""),
                )
            else:
                # Otherwise, handle it as a dictionary
                description = subpod_data.get("plaintext", "")
                image_url = subpod_data.get("img", {}).get("@src", "")

            pod_info = {
                "title": pod.get("@title", ""),
                "description": description,
                "image_url": image_url,
            }

            # For Results pod, collect all plaintext values from subpods
            if pod.get("@title") == "Results":
                results_text = []
                if isinstance(subpod_data, list):
                    for subpod in subpod_data:
                        if subpod.get("plaintext"):
                            results_text.append(subpod["plaintext"])
                else:
                    if description:
                        results_text.append(description)
                pod_info["description"] = "\n".join(results_text)

            # Add to steps list
            output["pod_info"].append(pod_info)

            # Get final answer
            if pod.get("@primary", False):
                output["final_answer"] = description

        return output

    def _get_wolframalpha_step_by_step_solution(
        self, app_id: str, query: str
    ) -> dict:
        r"""Retrieve a step-by-step solution from the Wolfram Alpha API for a
        given query.

        Args:
            app_id (str): Your Wolfram Alpha API application ID.
            query (str): The mathematical or computational query to solve.

        Returns:
            dict: The step-by-step solution response text from the Wolfram
                Alpha API.
        """
        # Define the base URL
        url = "https://api.wolframalpha.com/v2/query"

        # Set up the query parameters
        params = {
            "appid": app_id,
            "input": query,
            "podstate": ["Result__Step-by-step solution", "Show all steps"],
            "format": "plaintext",
        }

        # Send the request
        response = requests.get(url, params=params)
        root = ET.fromstring(response.text)

        # Extracting step-by-step steps, including 'SBSStep' and 'SBSHintStep'
        steps = []
        # Find all subpods within the 'Results' pod
        for subpod in root.findall(".//pod[@title='Results']//subpod"):
            # Check if the subpod has the desired stepbystepcontenttype
            content_type = subpod.find("stepbystepcontenttype")
            if content_type is not None and content_type.text in [
                "SBSStep",
                "SBSHintStep",
            ]:
                plaintext = subpod.find("plaintext")
                if plaintext is not None and plaintext.text:
                    step_text = plaintext.text.strip()
                    cleaned_step = step_text.replace(
                        "Hint: |", ""
                    ).strip()  # Remove 'Hint: |' if present
                    steps.append(cleaned_step)

        # Structuring the steps into a dictionary
        structured_steps = {}
        for i, step in enumerate(steps, start=1):
            structured_steps[f"step{i}"] = step

        return structured_steps


# class SearchToolkit:
#     r"""A class representing a toolkit for web search.

#     This class provides methods for searching information on the web using
#     search engines like Google, DuckDuckGo, Wikipedia and Wolfram Alpha, Brave.
#     """

#     @retry()
#     def search_linkup(
#             self,
#             query: str,
#             depth: Literal["standard", "deep"] = "standard",
#             output_type: Literal[
#                 "searchResults", "sourcedAnswer", "structured"
#             ] = "searchResults",
#             structured_output_schema: Optional[str] = None,
#     ) -> Dict[str, Any]:
#         r"""Search for a query in the Linkup API and return results in various
#         formats.

#         Args:
#             query (str): The search query.
#             depth (Literal["standard", "deep"]): The depth of the search.
#                 "standard" for a straightforward search, "deep" for a more
#                 comprehensive search.
#             output_type (Literal["searchResults", "sourcedAnswer",
#                 "structured"]): The type of output:
#                 - "searchResults" for raw search results,
#                 - "sourcedAnswer" for an answer with supporting sources,
#                 - "structured" for output based on a provided schema.
#             structured_output_schema (Optional[str]): If `output_type` is
#                 "structured", specify the schema of the output. Must be a
#                 string representing a valid object JSON schema.

#         Returns:
#             Dict[str, Any]: A dictionary representing the search result. The
#                 structure depends on the `output_type`. If an error occurs,
#                 returns an error message.
#         """
#         try:
#             from linkup import LinkupClient

#             # Initialize the Linkup client with the API key
#             LINKUP_API_KEY = os.getenv("LINKUP_API_KEY")
#             client = LinkupClient(api_key=LINKUP_API_KEY)

#             # Perform the search using the specified output_type
#             response = client.search(
#                 query=query,
#                 depth=depth,
#                 output_type=output_type,
#                 structured_output_schema=structured_output_schema,
#             )

#             if output_type == "searchResults":
#                 results = [
#                     item.__dict__
#                     for item in response.__dict__.get('results', [])
#                 ]
#                 return {"results": results}

#             elif output_type == "sourcedAnswer":
#                 answer = response.__dict__.get('answer', '')
#                 sources = [
#                     item.__dict__
#                     for item in response.__dict__.get('sources', [])
#                 ]
#                 return {"answer": answer, "sources": sources}

#             elif output_type == "structured" and structured_output_schema:
#                 return response.__dict__

#             else:
#                 return {"error": f"Invalid output_type: {output_type}"}

#         except Exception as e:
#             return {"error": f"An unexpected error occurred: {e!s}"}

#     @retry()
#     def search_duckduckgo(
#             self, query: str, source: str = "text", max_results: int = 5
#     ) -> List[Dict[str, Any]]:
#         r"""Use DuckDuckGo search engine to search information for
#         the given query.

#         This function queries the DuckDuckGo API for related topics to
#         the given search term. The results are formatted into a list of
#         dictionaries, each representing a search result.

#         Args:
#             query (str): The query to be searched.
#             source (str): The type of information to query (e.g., "text",
#                 "images", "videos"). Defaults to "text".
#             max_results (int): Max number of results, defaults to `5`.

#         Returns:
#             List[Dict[str, Any]]: A list of dictionaries where each dictionary
#                 represents a search result.
#         """
#         from duckduckgo_search import DDGS
#         from requests.exceptions import RequestException

#         ddgs = DDGS()
#         responses: List[Dict[str, Any]] = []

#         if source == "text":
#             try:
#                 results = ddgs.text(keywords=query, max_results=max_results)
#             except RequestException as e:
#                 # Handle specific exceptions or general request exceptions
#                 responses.append({"error": f"duckduckgo search failed.{str(e)}"})

#             # Iterate over results found
#             for i, result in enumerate(results, start=1):
#                 # Creating a response object with a similar structure
#                 response = {
#                     "result_id": i,
#                     "title": result["title"],
#                     "description": result["body"],
#                     "url": result["href"],
#                 }
#                 responses.append(response)

#         elif source == "images":
#             try:
#                 results = ddgs.images(keywords=query, max_results=max_results)
#             except RequestException as e:
#                 # Handle specific exceptions or general request exceptions
#                 responses.append({"error": f"duckduckgo search failed.{str(e)}"})

#             # Iterate over results found
#             for i, result in enumerate(results, start=1):
#                 # Creating a response object with a similar structure
#                 response = {
#                     "result_id": i,
#                     "title": result["title"],
#                     "image": result["image"],
#                     "url": result["url"],
#                     "source": result["source"],
#                 }
#                 responses.append(response)

#         elif source == "videos":
#             try:
#                 results = ddgs.videos(keywords=query, max_results=max_results)
#             except RequestException as e:
#                 # Handle specific exceptions or general request exceptions
#                 responses.append({"error": f"duckduckgo search failed.{str(e)}"})

#             # Iterate over results found
#             for i, result in enumerate(results, start=1):
#                 # Creating a response object with a similar structure
#                 response = {
#                     "result_id": i,
#                     "title": result["title"],
#                     "description": result["description"],
#                     "embed_url": result["embed_url"],
#                     "publisher": result["publisher"],
#                     "duration": result["duration"],
#                     "published": result["published"],
#                 }
#                 responses.append(response)

#         # If no answer found, return an empty list
#         return responses

#     @retry()
#     def search_brave(
#             self,
#             q: str,
#             country: str = "US",
#             search_lang: str = "en",
#             ui_lang: str = "en-US",
#             count: int = 20,
#             offset: int = 0,
#             safesearch: str = "moderate",
#             freshness: Optional[str] = None,
#             text_decorations: bool = True,
#             spellcheck: bool = True,
#             result_filter: Optional[str] = None,
#             goggles_id: Optional[str] = None,
#             units: Optional[str] = None,
#             extra_snippets: Optional[bool] = None,
#             summary: Optional[bool] = None,
#     ) -> Dict[str, Any]:
#         r"""This function queries the Brave search engine API and returns a
#         dictionary, representing a search result.
#         See https://api.search.brave.com/app/documentation/web-search/query
#         for more details.

#         Args:
#             q (str): The user's search query term. Query cannot be empty.
#                 Maximum of 400 characters and 50 words in the query.
#             country (str): The search query country where results come from.
#                 The country string is limited to 2 character country codes of
#                 supported countries. For a list of supported values, see
#                 Country Codes. (default: :obj:`US `)
#             search_lang (str): The search language preference. The 2 or more
#                 character language code for which search results are provided.
#                 For a list of possible values, see Language Codes.
#             ui_lang (str): User interface language preferred in response.
#                 Usually of the format '<language_code>-<country_code>'. For
#                 more, see RFC 9110. For a list of supported values, see UI
#                 Language Codes.
#             count (int): The number of search results returned in response.
#                 The maximum is 20. The actual number delivered may be less than
#                 requested. Combine this parameter with offset to paginate
#                 search results.
#             offset (int): The zero based offset that indicates number of search
#                 results per page (count) to skip before returning the result.
#                 The maximum is 9. The actual number delivered may be less than
#                 requested based on the query. In order to paginate results use
#                 this parameter together with count. For example, if your user
#                 interface displays 20 search results per page, set count to 20
#                 and offset to 0 to show the first page of results. To get
#                 subsequent pages, increment offset by 1 (e.g. 0, 1, 2). The
#                 results may overlap across multiple pages.
#             safesearch (str): Filters search results for adult content.
#                 The following values are supported:
#                 - 'off': No filtering is done.
#                 - 'moderate': Filters explicit content, like images and videos,
#                     but allows adult domains in the search results.
#                 - 'strict': Drops all adult content from search results.
#             freshness (Optional[str]): Filters search results by when they were
#                 discovered:
#                 - 'pd': Discovered within the last 24 hours.
#                 - 'pw': Discovered within the last 7 Days.
#                 - 'pm': Discovered within the last 31 Days.
#                 - 'py': Discovered within the last 365 Days.
#                 - 'YYYY-MM-DDtoYYYY-MM-DD': Timeframe is also supported by
#                     specifying the date range e.g. '2022-04-01to2022-07-30'.
#             text_decorations (bool): Whether display strings (e.g. result
#                 snippets) should include decoration markers (e.g. highlighting
#                 characters).
#             spellcheck (bool): Whether to spellcheck provided query. If the
#                 spellchecker is enabled, the modified query is always used for
#                 search. The modified query can be found in altered key from the
#                 query response model.
#             result_filter (Optional[str]): A comma delimited string of result
#                 types to include in the search response. Not specifying this
#                 parameter will return back all result types in search response
#                 where data is available and a plan with the corresponding
#                 option is subscribed. The response always includes query and
#                 type to identify any query modifications and response type
#                 respectively. Available result filter values are:
#                 - 'discussions'
#                 - 'faq'
#                 - 'infobox'
#                 - 'news'
#                 - 'query'
#                 - 'summarizer'
#                 - 'videos'
#                 - 'web'
#                 - 'locations'
#             goggles_id (Optional[str]): Goggles act as a custom re-ranking on
#                 top of Brave's search index. For more details, refer to the
#                 Goggles repository.
#             units (Optional[str]): The measurement units. If not provided,
#                 units are derived from search country. Possible values are:
#                 - 'metric': The standardized measurement system
#                 - 'imperial': The British Imperial system of units.
#             extra_snippets (Optional[bool]): A snippet is an excerpt from a
#                 page you get as a result of the query, and extra_snippets
#                 allow you to get up to 5 additional, alternative excerpts. Only
#                 available under Free AI, Base AI, Pro AI, Base Data, Pro Data
#                 and Custom plans.
#             summary (Optional[bool]): This parameter enables summary key
#                 generation in web search results. This is required for
#                 summarizer to be enabled.

#         Returns:
#             Dict[str, Any]: A dictionary representing a search result.
#         """

#         import requests

#         BRAVE_API_KEY = os.getenv("BRAVE_API_KEY")

#         url = "https://api.search.brave.com/res/v1/web/search"
#         headers = {
#             "Content-Type": "application/json",
#             "X-BCP-APIV": "1.0",
#             "X-Subscription-Token": BRAVE_API_KEY,
#         }

#         ParamsType: TypeAlias = Dict[
#             str,
#             Union[str, int, float, List[Union[str, int, float]], None],
#         ]

#         params: ParamsType = {
#             "q": q,
#             "country": country,
#             "search_lang": search_lang,
#             "ui_lang": ui_lang,
#             "count": count,
#             "offset": offset,
#             "safesearch": safesearch,
#             "freshness": freshness,
#             "text_decorations": text_decorations,
#             "spellcheck": spellcheck,
#             "result_filter": result_filter,
#             "goggles_id": goggles_id,
#             "units": units,
#             "extra_snippets": extra_snippets,
#             "summary": summary,
#         }

#         response = requests.get(url, headers=headers, params=params)
#         data = response.json()["web"]
#         return data

#     @retry()
#     def tavily_search(
#             self, query: str, num_results: int = 5, **kwargs
#     ) -> List[Dict[str, Any]]:
#         r"""Use Tavily Search API to search information for the given query.

#         Args:
#             query (str): The query to be searched.
#             num_results (int): The number of search results to retrieve
#                 (default is `5`).
#             **kwargs: Additional optional parameters supported by Tavily's API:
#                 - search_depth (str): "basic" or "advanced" search depth.
#                 - topic (str): The search category, e.g., "general" or "news."
#                 - days (int): Time frame in days for news-related searches.
#                 - max_results (int): Max number of results to return
#                   (overrides `num_results`).
#                 See https://docs.tavily.com/docs/python-sdk/tavily-search/
#                 api-reference for details.

#         Returns:
#             List[Dict[str, Any]]: A list of dictionaries representing search
#                 results. Each dictionary contains:
#                 - 'result_id' (int): The result's index.
#                 - 'title' (str): The title of the result.
#                 - 'description' (str): A brief description of the result.
#                 - 'long_description' (str): Detailed information, if available.
#                 - 'url' (str): The URL of the result.
#                 - 'content' (str): Relevant content from the search result.
#                 - 'images' (list): A list of related images (if
#                   `include_images` is True).
#                 - 'published_date' (str): Publication date for news topics
#                   (if available).
#         """
#         from tavily import TavilyClient  # type: ignore[import-untyped]

#         Tavily_API_KEY = os.getenv("TAVILY_API_KEY")
#         if not Tavily_API_KEY:
#             raise ValueError(
#                 "`TAVILY_API_KEY` not found in environment variables. "
#                 "Get `TAVILY_API_KEY` here: `https://www.tavily.com/api/`."
#             )

#         client = TavilyClient(Tavily_API_KEY)

#         try:
#             results = client.search(query, max_results=num_results, **kwargs)
#             return results
#         except Exception as e:
#             return [{"error": f"An unexpected error occurred: {e!s}"}]
