import os
import sys
import time
import math
import json
import logging
from typing import Dict, List, Any

logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
logger = logging.getLogger(__name__)

project_root = "./futuremind"
sys.path.insert(0, project_root)
logger.info(f"Added to PYTHONPATH: {project_root}")

try:
    from futuremind.tool.base import BaseTool
    from futuremind.tool.tools.search.google_search import get_search_handler
except ImportError as e:
    logger.error(f"Import failed: {e}")
    raise

try:
    search_handler = get_search_handler(config_path="../search/config", config_name="eval_search")
except Exception as e:
    logger.error(f"Failed to initialize handlers: {e}")
    raise

class ParallelSearchTool(BaseTool):
    name = "Parallel_Search"
    description = """
You should invoke the Parallel_Search Tool (google) whenever the user’s query falls into one of the following categories:
1. Your internal knowledge base and training data are insufficient to answer the question accurately.
2. The user asks about a specific example, product, or piece of information that you can retrieve in greater detail via the web.
3. The question involves the latest data, dynamic information, or any knowledge that postdates your training cutoff and requires real-time updates.
4. The answer exists in external knowledge sources you cannot directly access; you must search to retrieve it.
5. Although you possess general knowledge of the topic, an online search would yield more detailed or up-to-date information (e.g. current buzzwords or trending topics).
6. You encounter an unfamiliar term or concept and must avoid fabrication by verifying it through the search tool.
7. You need to consult a product manual or official specification to support your response.

The search tool supports both parallel and sequential queries:
1. If multiple searches are independent, you may issue them in parallel.
2. If queries depend on each other (i.e. require ordered steps), perform them sequentially.
"""
    parameters = {
        "type": "object",
        "properties": {
            # Supports one or more queries
            "queries": {
                "type": "array",
                "items": { "type": "string" },
                "description": (
                    "List of search keywords:\n"
                    "- Parallel search: supply multiple keywords at once;\n"
                    "- Iterative search: supply a single-element array."
                ),
                "examples": [
                    { "queries": ["SU7 Ultra official price", "Tesla Model S latest price"] },
                    { "queries": ["Mishi wolffin fish namer"] }
                ]
            }
        },
        "required": ["queries"]
    }

    def __init__(self):
        super().__init__()

    def execute(self, args: Dict[str, Any]) -> Dict[str, Any]:
        results = self.batch_execute([args])
        if results and isinstance(results, list):
            return results[0]
        return {"content": "Batch execution did not return any valid results.", "success": False}

    def batch_execute(self, args_list: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
        queries_list = []
        for args in args_list:
            qs = args.get("queries")
            if not isinstance(qs, list) or len(qs) == 0:
                # return [ {"content": "参数 'queries' 必须是非空列表。", "success": False} for _ in args_list ]
                return [ {"content": "The 'queries' parameter must be a non-empty list.", "success": False} for _ in args_list ]
            queries_list.append(qs)

        flat_queries = [q for qs in queries_list for q in qs]
        total = len(flat_queries)
        if total == 0:
            return [{"content": "", "success": True} for _ in args_list]

        CHUNK_SIZE = 100
        num_chunks = math.ceil(total / CHUNK_SIZE)
        flat_results: List[Any] = []

        for idx_chunk in range(num_chunks):
            start = idx_chunk * CHUNK_SIZE
            end = min(start + CHUNK_SIZE, total)
            chunk_queries = flat_queries[start:end]
            logging.info(f"Batch-search chunk {idx_chunk+1}/{num_chunks}, queries {start}-{end-1} of {total}.")
            try:
                chunk_results = search_handler.batch_google_search_formatted(chunk_queries, topk=5)
                flat_results.extend(chunk_results)
            except Exception as e:
                logging.error(f"Chunk {idx_chunk+1} failed: {e}")
                flat_results.extend([{"information": str(e)}] * len(chunk_queries))
            if idx_chunk < num_chunks - 1:
                logging.info("Sleeping 60s to respect rate limit...")
                time.sleep(10)

        outputs: List[Dict[str, Any]] = []
        cursor = 0
        for qs in queries_list:
            length = len(qs)
            segment = flat_results[cursor:cursor+length]
            cursor += length
            lines = []
            doc_counter = 1
            for idx, res in enumerate(segment, start=1):
                info_hits = res.get("information", []) if isinstance(res, dict) else []
                for hit in info_hits:
                    hit['doc_index'] = doc_counter
                    doc_counter += 1
                lines.append(f"{idx}. Query:{qs[idx-1]} -> The corresponding search results are presented below:\n{info_hits}")
            outputs.append({"content": "\n\n".join(lines), "success": True})
        return outputs

if __name__ == "__main__":
    tool = ParallelSearchTool()
    batch_args = [{"queries": ["The Very Best of Cher release year"]}]
    batch_results = tool.batch_execute(batch_args)
    print(json.dumps(batch_results, ensure_ascii=False, indent=2))
