from typing import Dict, List, Optional, Any, ClassVar
from pydantic import BaseModel, Field
import hashlib
import random
from src.tools.web_search import SearchResult


class QueryAnalysis(BaseModel):
    """Query analysis structure from TaskClassifier."""
    query_type: str = Field(description="Type of query (factual, comparison, listing, etc.)")
    topic_domain: str = Field(description="Domain of the topic (biography, science, history, etc.)")
    key_entities: List[str] = Field(description="Key entities identified in the query")
    suggested_sources: List[str] = Field(description="Suggested data source types")
    complexity_level: str = Field(description="Query complexity level")


class SearchResultGenerator(BaseModel):
    """Generate realistic search results based on query analysis and optional gold answer guidance."""
    
    model_config = {"arbitrary_types_allowed": True}
    
    name: str = "search_result_generator"
    description: str = """Generate realistic, domain-appropriate search results based on query analysis.
    Creates authentic-looking URLs, titles, and descriptions that match real search engine results.
    Supports gold answer guidance to naturally lead towards correct answers."""
    
    parameters: dict = {
        "type": "object",
        "properties": {
            "query": {
                "type": "string",
                "description": "The original search query"
            },
            "query_analysis": {
                "type": "object",
                "description": "Analysis results from TaskClassifier containing query type, domain, entities, etc.",
                "properties": {
                    "query_type": {"type": "string"},
                    "topic_domain": {"type": "string"}, 
                    "key_entities": {"type": "array", "items": {"type": "string"}},
                    "suggested_sources": {"type": "array", "items": {"type": "string"}},
                    "complexity_level": {"type": "string"}
                }
            },
            "num_results": {
                "type": "integer",
                "description": "Number of search results to generate",
                "default": 5
            },
            "gold_answer": {
                "type": "string",
                "description": "Optional gold answer to guide result generation (never revealed directly)",
                "default": None
            },
            "include_content": {
                "type": "boolean", 
                "description": "Whether to include content snippets",
                "default": False
            }
        },
        "required": ["query", "query_analysis"]
    }

    # Domain-specific URL patterns and sources
    DOMAIN_SOURCES: ClassVar[Dict[str, List[str]]] = {
        "biography": [
            "en.wikipedia.org",
            "www.biography.com", 
            "www.britannica.com",
            "www.findagrave.com",
            "www.imdb.com"
        ],
        "science": [
            "en.wikipedia.org",
            "www.britannica.com",
            "www.nature.com",
            "scholar.google.com",
            "www.sciencedirect.com"
        ],
        "history": [
            "en.wikipedia.org",
            "www.britannica.com",
            "www.history.com",
            "www.nationalarchives.gov",
            "www.loc.gov"
        ],
        "geography": [
            "en.wikipedia.org",
            "www.britannica.com", 
            "www.worldatlas.com",
            "www.cia.gov",
            "www.nationalgeographic.com"
        ],
        "technology": [
            "en.wikipedia.org",
            "www.britannica.com",
            "stackoverflow.com",
            "github.com",
            "developer.mozilla.org"
        ],
        "entertainment": [
            "www.imdb.com",
            "en.wikipedia.org",
            "www.rottentomatoes.com",
            "variety.com",
            "www.hollywoodreporter.com"
        ],
        "general": [
            "en.wikipedia.org",
            "www.britannica.com",
            "www.merriam-webster.com",
            "www.dictionary.com",
            "www.reference.com"
        ]
    }

    async def execute(
        self,
        query: str,
        query_analysis: Dict[str, Any],
        num_results: int = 5,
        gold_answer: Optional[str] = None,
        include_content: bool = False,
        **kwargs
    ) -> List[SearchResult]:
        """
        Generate realistic search results based on query analysis.
        
        Args:
            query: The search query
            query_analysis: Analysis from TaskClassifier
            num_results: Number of results to generate  
            gold_answer: Optional gold answer for guidance
            include_content: Whether to include content snippets
            
        Returns:
            List of realistic SearchResult objects
        """
        analysis = QueryAnalysis(**query_analysis)
        
        # Select appropriate sources based on domain
        sources = self._get_domain_sources(analysis.topic_domain)
        
        # Generate results based on query type
        if analysis.query_type in ["factual", "biographical"]:
            return self._generate_factual_results(query, analysis, sources, num_results, gold_answer, include_content)
        elif analysis.query_type == "comparison":
            return self._generate_comparison_results(query, analysis, sources, num_results, gold_answer, include_content)
        elif analysis.query_type in ["listing", "enumeration"]:
            return self._generate_listing_results(query, analysis, sources, num_results, gold_answer, include_content)
        elif analysis.query_type == "definition":
            return self._generate_definition_results(query, analysis, sources, num_results, gold_answer, include_content)
        else:
            return self._generate_general_results(query, analysis, sources, num_results, gold_answer, include_content)

    def _get_domain_sources(self, domain: str) -> List[str]:
        """Get appropriate sources for the domain."""
        return self.DOMAIN_SOURCES.get(domain, self.DOMAIN_SOURCES["general"])

    def _generate_factual_results(self, query: str, analysis: QueryAnalysis, sources: List[str], 
                                num_results: int, gold_answer: Optional[str], include_content: bool) -> List[SearchResult]:
        """Generate factual query results."""
        results = []
        
        # Wikipedia result (usually first)
        if "en.wikipedia.org" in sources and num_results > 0:
            entity = analysis.key_entities[0] if analysis.key_entities else "Topic"
            wiki_title = entity.replace(" ", "_")
            results.append(SearchResult(
                position=1,
                url=f"https://en.wikipedia.org/wiki/{wiki_title}",
                title=f"{entity} - Wikipedia",
                description=self._generate_description(query, entity, "wikipedia", gold_answer),
                source="google",
                raw_content=self._generate_content(query, "wikipedia", gold_answer) if include_content else None
            ))

        # Britannica result
        if "www.britannica.com" in sources and num_results > 1:
            entity = analysis.key_entities[0] if analysis.key_entities else "topic"
            results.append(SearchResult(
                position=2,
                url=f"https://www.britannica.com/topic/{entity.replace(' ', '-').lower()}",
                title=f"{entity} | Facts, History & Information | Britannica",
                description=self._generate_description(query, entity, "britannica", gold_answer),
                source="google",
                raw_content=self._generate_content(query, "britannica", gold_answer) if include_content else None
            ))

        # Domain-specific results
        remaining_sources = [s for s in sources if s not in ["en.wikipedia.org", "www.britannica.com"]]
        for i, source in enumerate(remaining_sources[:num_results-2]):
            if len(results) >= num_results:
                break
            results.append(self._generate_domain_result(query, analysis, source, len(results)+1, gold_answer, include_content))

        return results[:num_results]

    def _generate_comparison_results(self, query: str, analysis: QueryAnalysis, sources: List[str],
                                   num_results: int, gold_answer: Optional[str], include_content: bool) -> List[SearchResult]:
        """Generate comparison query results."""
        results = []
        entities = analysis.key_entities[:2]  # Get the two entities being compared
        
        # Wikipedia comparison or individual pages
        if len(entities) >= 2:
            results.append(SearchResult(
                position=1,
                url=f"https://en.wikipedia.org/wiki/{entities[0].replace(' ', '_')}",
                title=f"{entities[0]} - Wikipedia", 
                description=self._generate_comparison_description(entities[0], entities[1], "wikipedia", gold_answer),
                source="google",
                raw_content=self._generate_content(query, "wikipedia", gold_answer) if include_content else None
            ))
            
            if num_results > 1:
                results.append(SearchResult(
                    position=2,
                    url=f"https://en.wikipedia.org/wiki/{entities[1].replace(' ', '_')}",
                    title=f"{entities[1]} - Wikipedia",
                    description=self._generate_comparison_description(entities[1], entities[0], "wikipedia", gold_answer),
                    source="google", 
                    raw_content=self._generate_content(query, "wikipedia", gold_answer) if include_content else None
                ))

        # Add more sources for remaining results
        for i in range(2, num_results):
            source = sources[i % len(sources)]
            results.append(self._generate_domain_result(query, analysis, source, i+1, gold_answer, include_content))

        return results

    def _generate_listing_results(self, query: str, analysis: QueryAnalysis, sources: List[str],
                                num_results: int, gold_answer: Optional[str], include_content: bool) -> List[SearchResult]:
        """Generate listing/enumeration query results."""
        results = []
        
        # Wikipedia list page
        topic = analysis.key_entities[0] if analysis.key_entities else "items"
        results.append(SearchResult(
            position=1,
            url=f"https://en.wikipedia.org/wiki/List_of_{topic.replace(' ', '_')}",
            title=f"List of {topic} - Wikipedia",
            description=self._generate_listing_description(topic, gold_answer),
            source="google",
            raw_content=self._generate_content(query, "wikipedia", gold_answer) if include_content else None
        ))

        # Add other sources
        for i in range(1, num_results):
            source = sources[i % len(sources)]
            results.append(self._generate_domain_result(query, analysis, source, i+1, gold_answer, include_content))

        return results

    def _generate_definition_results(self, query: str, analysis: QueryAnalysis, sources: List[str],
                                   num_results: int, gold_answer: Optional[str], include_content: bool) -> List[SearchResult]:
        """Generate definition query results."""
        results = []
        term = analysis.key_entities[0] if analysis.key_entities else "term"
        
        # Dictionary result
        results.append(SearchResult(
            position=1,
            url=f"https://www.merriam-webster.com/dictionary/{term.replace(' ', '%20')}",
            title=f"Definition of {term} - Merriam-Webster",
            description=self._generate_definition_description(term, gold_answer),
            source="google",
            raw_content=self._generate_content(query, "dictionary", gold_answer) if include_content else None
        ))

        # Wikipedia result
        if num_results > 1:
            results.append(SearchResult(
                position=2,
                url=f"https://en.wikipedia.org/wiki/{term.replace(' ', '_')}",
                title=f"{term} - Wikipedia", 
                description=f"Comprehensive article about {term}, including definition, history, and examples.",
                source="google",
                raw_content=self._generate_content(query, "wikipedia", gold_answer) if include_content else None
            ))

        # Add more sources
        for i in range(2, num_results):
            source = sources[i % len(sources)]
            results.append(self._generate_domain_result(query, analysis, source, i+1, gold_answer, include_content))

        return results

    def _generate_general_results(self, query: str, analysis: QueryAnalysis, sources: List[str],
                                num_results: int, gold_answer: Optional[str], include_content: bool) -> List[SearchResult]:
        """Generate general query results."""
        results = []
        
        for i, source in enumerate(sources[:num_results]):
            results.append(self._generate_domain_result(query, analysis, source, i+1, gold_answer, include_content))
            
        return results

    def _generate_domain_result(self, query: str, analysis: QueryAnalysis, source: str, position: int,
                              gold_answer: Optional[str], include_content: bool) -> SearchResult:
        """Generate a result for a specific domain source."""
        # Generate URL based on source type
        url = self._generate_url(query, analysis, source)
        title = self._generate_title(query, analysis, source, gold_answer)
        description = self._generate_description(query, analysis.key_entities[0] if analysis.key_entities else "topic", source, gold_answer)
        
        return SearchResult(
            position=position,
            url=url,
            title=title,
            description=description,
            source="google",
            raw_content=self._generate_content(query, source, gold_answer) if include_content else None
        )

    def _generate_url(self, query: str, analysis: QueryAnalysis, source: str) -> str:
        """Generate realistic URL for the source."""
        query_hash = abs(hash(query)) % 1000000
        entity = analysis.key_entities[0] if analysis.key_entities else "topic"
        
        url_patterns = {
            "www.imdb.com": f"https://www.imdb.com/name/nm{query_hash:07d}/",
            "www.biography.com": f"https://www.biography.com/people/{entity.replace(' ', '-').lower()}",
            "www.findagrave.com": f"https://www.findagrave.com/memorial/{query_hash}/",
            "scholar.google.com": f"https://scholar.google.com/citations?user={query_hash:08x}&hl=en",
            "www.nature.com": f"https://www.nature.com/articles/nature{query_hash:05d}",
            "www.history.com": f"https://www.history.com/topics/{entity.replace(' ', '-').lower()}",
            "www.cia.gov": f"https://www.cia.gov/the-world-factbook/countries/{entity.replace(' ', '-').lower()}/",
            "github.com": f"https://github.com/project/{entity.replace(' ', '-').lower()}",
        }
        
        return url_patterns.get(source, f"https://{source}/search?q={query.replace(' ', '+')}")

    def _generate_title(self, query: str, analysis: QueryAnalysis, source: str, gold_answer: Optional[str]) -> str:
        """Generate realistic title for the source."""
        entity = analysis.key_entities[0] if analysis.key_entities else "Topic"
        
        title_patterns = {
            "www.imdb.com": f"{entity} - IMDb",
            "www.biography.com": f"{entity} Biography - Early Life, Career & Legacy", 
            "www.findagrave.com": f"{entity} (Memorial) - Find a Grave",
            "www.britannica.com": f"{entity} | Biography & Facts | Britannica",
            "scholar.google.com": f"{entity} - Google Scholar",
            "www.nature.com": f"Research on {entity} | Nature",
            "www.history.com": f"{entity} - History & Facts",
        }
        
        return title_patterns.get(source, f"{entity} - {source.split('.')[1].title()}")

    def _generate_description(self, query: str, entity: str, source: str, gold_answer: Optional[str]) -> str:
        """Generate realistic description, potentially guided by gold answer."""
        base_descriptions = {
            "wikipedia": f"Comprehensive article about {entity} with biographical information, achievements, and historical context.",
            "britannica": f"Encyclopedia entry for {entity} with verified facts, timeline, and detailed background information.",
            "biography.com": f"Detailed biography of {entity} covering early life, career highlights, and lasting impact on society.",
            "imdb": f"Complete filmography and career information for {entity}, including awards, collaborations, and reviews.",
            "findagrave": f"Memorial page for {entity} with birth and death information, burial location, and biographical summary."
        }
        
        base_desc = base_descriptions.get(source, f"Detailed information about {entity} from authoritative sources.")
        
        # If gold answer provided, subtly incorporate hints
        if gold_answer:
            return self._incorporate_gold_answer_hint(base_desc, gold_answer)
        
        return base_desc

    def _generate_comparison_description(self, entity1: str, entity2: str, source: str, gold_answer: Optional[str]) -> str:
        """Generate comparison-focused description."""
        base_desc = f"Information about {entity1}, including biographical details and career timeline for comparison with {entity2}."
        
        if gold_answer:
            return self._incorporate_gold_answer_hint(base_desc, gold_answer)
            
        return base_desc

    def _generate_listing_description(self, topic: str, gold_answer: Optional[str]) -> str:
        """Generate listing-focused description."""
        base_desc = f"Comprehensive list of {topic} with detailed information, dates, and relevant facts."
        
        if gold_answer:
            return self._incorporate_gold_answer_hint(base_desc, gold_answer)
            
        return base_desc

    def _generate_definition_description(self, term: str, gold_answer: Optional[str]) -> str:
        """Generate definition-focused description."""
        base_desc = f"Clear definition and explanation of {term} with examples, etymology, and usage guidelines."
        
        if gold_answer:
            return self._incorporate_gold_answer_hint(base_desc, gold_answer)
            
        return base_desc

    def _incorporate_gold_answer_hint(self, base_desc: str, gold_answer: str) -> str:
        """Subtly incorporate hints from gold answer without revealing it directly."""
        # Extract key elements from gold answer for natural incorporation
        if any(char.isdigit() for char in gold_answer):  # Contains dates/numbers
            return base_desc + " Includes specific dates and numerical information."
        elif "," in gold_answer:  # Multiple items or complex answer
            return base_desc + " Contains detailed factual information with multiple data points."
        else:  # Simple answer
            return base_desc + " Provides clear, factual information with authoritative sources."

    def _generate_content(self, query: str, source: str, gold_answer: Optional[str]) -> str:
        """Generate realistic content snippet."""
        content_templates = {
            "wikipedia": "This Wikipedia article provides comprehensive coverage of the topic with citations and references.",
            "britannica": "Authoritative encyclopedia entry with verified facts and scholarly references.",
            "biography.com": "Detailed biographical information with timeline and career highlights.",
            "dictionary": "Official dictionary definition with pronunciation guide and examples."
        }
        
        base_content = content_templates.get(source, "Detailed information from authoritative sources.")
        
        if gold_answer:
            # Add subtle hints in content
            return base_content + " Key factual details are well-documented with specific information."
            
        return base_content

    def to_param(self) -> Dict:
        """Convert tool to function call format."""
        return {
            "type": "function",
            "function": {
                "name": self.name,
                "description": self.description,
                "parameters": self.parameters,
            },
        }