import json
import sys
from urllib.parse import urljoin
import time 
from argparse import Namespace
import re

from Tea.exceptions import TeaException
from alibabacloud_iqs20241111 import models
from alibabacloud_iqs20241111.client import Client
from alibabacloud_tea_openapi import models as open_api_models
from typing import List, Dict, Any

def quark_search(query) -> List[Dict[str, Any]]:
    """
    使用阿里云 IQS 接口进行统一搜索，返回搜索结果列表。
    :param query: 搜索关键词，例如"杭州美食"
    :return: 包含搜索结果的字典列表
    """
    if query == "":
        return []
    query = query[:100]
    def create_client() -> Client:
        config = open_api_models.Config(
            # 推荐使用环境变量方式管理 AK/SK
            access_key_id="xxx",
            access_key_secret="xxx"
        )
        config.endpoint = 'iqs.cn-zhangjiakou.aliyuncs.com'
        return Client(config)

    client = create_client()
    run_instances_request = models.UnifiedSearchRequest(
        body=models.UnifiedSearchInput(
            query=query,
            time_range='NoLimit',
            contents=models.RequestContents(
                summary=True,
                main_text=True,
            )
        )
    )

    try:
        response = client.unified_search(run_instances_request)
        results = []
        # 遍历搜索结果
        for item in response.body.page_items:
            # 处理每个搜索结果
            results.append(item.__dict__)
        return results

    except TeaException as e:
        print(e)
        return []

def deep_search(search_query, top_k=5, use_jina=False, jina_api_key="empty", bing_subscription_key="xxxxx", bing_endpoint="xxxxx/search"):
    args = Namespace(
        dataset_name='qa',
        split='test',
        subset_num=-1,
        max_search_limit=15,
        top_k=top_k,  
        use_jina=use_jina,  
        jina_api_key=jina_api_key,  
        temperature=0.7,
        top_p=0.8,
        min_p=0.05,
        top_k_sampling=20,
        repetition_penalty=1.05,
        max_tokens=4096,
        bing_subscription_key=bing_subscription_key,  
        bing_endpoint=bing_endpoint,  
        eval=False,
        seed=1742208600,
        api_base_url='xxxxx',  
        model_name='search-agent',
        concurrent_limit=200
    )
    
    search_cache = {}

    question = search_query

    try:
        # results = bing_web_search(question, args.bing_subscription_key, args.bing_endpoint) 
        results = quark_search(question)
        search_cache[question] = results
    except Exception as e:
        print(f"Error during search query '{question}': {e}")
        results = {}
    
    useful_info = []
    for id, result in enumerate(results):
        info = {
            'id': id + 1,
            'title': result.get('title', ''),
            'url': result.get('link', ''),
            'site_name': result.get('hostname', ''),
            'date': result.get('published_time', ''),
            'snippet': result.get('snippet', ''),
            'summary': result.get('summary', ''),
            'context': result.get('main_text', '')
        }
        useful_info.append(info)
    relevant_info = useful_info[:args.top_k]
    print("--------------------------------get bing search result--------------------------------")

    result = ""
    for info in relevant_info:
        id = info['id']
        title = info['title']
        summary = info['summary']
        clean_summary = re.sub('<[^<]+?>', '', summary)  
        result+= f"搜索结果{id}-标题：{title}\n内容：{clean_summary}\n"

    extracted_info = result

    return extracted_info


if __name__ == "__main__":

    extracted_info = ''
    question = "1954-莱纳斯·卡尔·鲍林"

    result = deep_search(
        question
    )
    print('-------------------------------------')
    print(result)
    print('-------------------------------------')
