#!/usr/bin/env python3
"""
Deterministic mock evidence generator for GEC verification mode.
Produces stable, bounded GES scores from (qid, docid) pairs without LLM inference.
"""

import hashlib
import json
import random
from typing import Dict, List, Tuple, Any


class MockEvidenceGenerator:
    """Mock generator that produces deterministic evidence scores."""
    
    def __init__(self, seed: int = 42):
        self.seed = seed
        self.rng = random.Random(seed)
        
    def hash_qid_docid(self, qid: str, docid: str) -> int:
        """Generate deterministic hash from query-document pair."""
        combined = f"{qid}:{docid}:{self.seed}"
        return int(hashlib.md5(combined.encode()).hexdigest()[:8], 16)
    
    def generate_portfolio_pack(self, qid: str, documents: List[Dict], pack_config: str) -> Dict[str, Any]:
        """Generate mock portfolio synthesis pack with citations."""
        doc_ids = [doc['docid'] for doc in documents]
        pack_hash = self.hash_qid_docid(qid, pack_config)
        self.rng.seed(pack_hash)
        
        # Generate citation counts for each document
        citations = {}
        total_citations = self.rng.randint(3, 8)
        
        for docid in doc_ids:
            doc_hash = self.hash_qid_docid(qid, docid)
            self.rng.seed(doc_hash)
            
            # Bias toward higher relevance docs (simulate real evidence patterns)
            relevance_signal = (doc_hash % 100) / 100.0
            cite_prob = 0.1 + 0.4 * relevance_signal
            
            if self.rng.random() < cite_prob:
                citations[docid] = self.rng.randint(1, 3)
            else:
                citations[docid] = 0
        
        # Ensure at least one document gets cited
        if sum(citations.values()) == 0:
            best_doc = max(doc_ids, key=lambda d: self.hash_qid_docid(qid, d))
            citations[best_doc] = 1
            
        # Select BEST_DOCUMENT (highest evidence utility)
        best_doc = max(citations.keys(), key=lambda d: citations[d] + 0.1 * (self.hash_qid_docid(qid, d) % 100))
        
        return {
            "qid": qid,
            "pack_config": pack_config,
            "citations": citations,
            "best_document": best_doc,
            "synthesis_text": f"Mock synthesis for {qid} (pack {pack_config})"
        }
    
    def generate_multi_ges_scores(self, qid: str, documents: List[Dict], 
                                pack_configs: List[str]) -> Dict[str, float]:
        """Generate Multi-GES scores aggregated across portfolio packs."""
        doc_ids = [doc['docid'] for doc in documents]
        pack_weights = [1.0, 0.8, 0.6, 0.5, 0.4][:len(pack_configs)]
        
        # Initialize scores
        ges_scores = {docid: 0.0 for docid in doc_ids}
        
        for i, pack_config in enumerate(pack_configs):
            pack_result = self.generate_portfolio_pack(qid, documents, pack_config)
            weight = pack_weights[i] if i < len(pack_weights) else 0.3
            
            for docid in doc_ids:
                citation_count = pack_result["citations"].get(docid, 0)
                best_bonus = 1.0 if docid == pack_result["best_document"] else 0.0
                
                # Multi-GES formula: α * citations + β * best_document_indicator
                pack_score = 0.7 * citation_count + 0.3 * best_bonus
                ges_scores[docid] += weight * pack_score
        
        # Normalize scores to reasonable range
        if ges_scores:
            max_score = max(ges_scores.values())
            if max_score > 0:
                for docid in ges_scores:
                    ges_scores[docid] = ges_scores[docid] / max_score
        
        return ges_scores


def generate_mock_synthesis_output(qid: str, documents: List[Dict], 
                                 pack_configs: List[str] = None, seed: int = 42) -> Dict:
    """
    Main interface for mock evidence generation.
    Returns same structure as real portfolio synthesis.
    """
    if pack_configs is None:
        pack_configs = ["1-12", "13-40", "41-100"]
    
    generator = MockEvidenceGenerator(seed=seed)
    
    # Generate portfolio packs
    portfolio_results = []
    for pack_config in pack_configs:
        pack_result = generator.generate_portfolio_pack(qid, documents, pack_config)
        portfolio_results.append(pack_result)
    
    # Generate Multi-GES scores
    ges_scores = generator.generate_multi_ges_scores(qid, documents, pack_configs)
    
    return {
        "qid": qid,
        "portfolio_packs": portfolio_results,
        "multi_ges_scores": ges_scores,
        "metadata": {
            "generator": "mock_ges_v1.0",
            "seed": seed,
            "pack_configs": pack_configs,
            "document_count": len(documents)
        }
    }


if __name__ == "__main__":
    # Quick test
    test_docs = [
        {"docid": "D001", "text": "Sample document 1"},
        {"docid": "D002", "text": "Sample document 2"},
        {"docid": "D003", "text": "Sample document 3"}
    ]
    
    result = generate_mock_synthesis_output("Q001", test_docs)
    print(json.dumps(result, indent=2))