import os
import json
import requests
from typing import Union, List
from qwen_agent.tools.base import BaseTool
from concurrent.futures import ThreadPoolExecutor


GOOGLE_SEARCH_KEY = os.getenv("IDEALAB_SEARCH_KEY", None)


class Scholar(BaseTool):
    name = "google_scholar"
    description = "Leverage Google Scholar to retrieve relevant information from academic publications. Accepts multiple queries."
    parameters = {
            "type": "object",
            "properties": {
                "query": {
                    "type": "array",
                    "items": {"type": "string", "description": "The search query."},
                    "minItems": 1,
                    "description": "The list of search queries for Google Scholar."
                },
            },
        "required": ["query"],
    }

    def google_scholar(self, query: str):
        url = "YOUR_URL_HERE"
        headers = {
            'X-AK': GOOGLE_SEARCH_KEY,
            'Content-Type': 'application/json',
        }
        data = {
            "query": query,
            "num": 10,
            "extendParams": {
                "country": "us",
                "locale": "en-us",
                "location": "United States",
                "page": 1
            },
            "platformInput": {
                "model": "google-search"
            }
        }

        for i in range(5):
            try:
                response = requests.post(url, headers=headers, data=json.dumps(data), timeout=100)
                results = response.json()
                break
            except Exception as e:
                print(e)
                if i == 4:
                    return f"Google Scholar Timeout, return None, Please try again later.", False
                continue
    
        if response.status_code != 200:
            raise Exception(f"Error: {response.status_code} - {response.text}")

        try:
            if "organic" not in results["data"]["originalOutput"]:
                raise Exception(f"No results found for query: '{query}'. Use a less specific query.")

            web_snippets = list()
            idx = 0
            if "organic" in results["data"]["originalOutput"]:
                for page in results["data"]["originalOutput"]["organic"]:
                    idx += 1
                    date_published = ""
                    if "year" in page:
                        date_published = "\nDate published: " + str(page["year"])

                    publicationInfo = ""
                    if "publicationInfo" in page:
                        publicationInfo = "\npublicationInfo: " + page["publicationInfo"]

                    snippet = ""
                    if "snippet" in page:
                        snippet = "\n" + page["snippet"]
                    
                    link_info = "no available link"
                    if "pdfUrl" in page: # htmlUrl
                        link_info = "pdfUrl: " + page["pdfUrl"]
                    
                    citedBy = ""
                    if "citedBy" in page:
                        citedBy = "\ncitedBy: " + str(page["citedBy"])
                    
                    redacted_version = f"{idx}. [{page['title']}]({link_info}){publicationInfo}{date_published}{citedBy}\n{snippet}"

                    redacted_version = redacted_version.replace("Your browser can't play this video.", "") # 这里要改
                    web_snippets.append(redacted_version)

            content = f"A Google scholar for '{query}' found {len(web_snippets)} results:\n\n## Scholar Results\n" + "\n\n".join(web_snippets)
            return content, True
        except:
            return f"No results found for '{query}'. Try with a more general query.", False

    def call(self, params: Union[str, dict], **kwargs) -> Union[str, bool]:
        assert GOOGLE_SEARCH_KEY is not None, "Please set the IDEALAB_SEARCH_KEY environment variable."
        try:
            params = self._verify_json_format_args(params)
            query = params["query"]
        except:
            return "[google_scholar] Invalid request format: Input must be a JSON object containing 'query' field", False
        
        if isinstance(query, str):
            response, tool_status = self.google_scholar(query)
        else:
            assert isinstance(query, List)
            with ThreadPoolExecutor(max_workers=3) as executor:
                # 使用线程池并行执行搜索
                tool_response = list(executor.map(self.google_scholar, query))
            
            response = [_[0] for _ in tool_response]
            tool_status = [_[1] for _ in tool_response]
            response = "\n=======\n".join(response)
        
        if len(tool_status) == 0:
            tool_status = [False]
        return response, all(tool_status)

if __name__ == "__main__":
    tool = Scholar()
    print(tool.call({"query": ["On the weakness of an ordered set"]})) # , "Ordered structures and partitions"
