import os
import math
import threading
import time
from typing import List, Dict, Any
import sys

from core.clients.openrouter_client import OpenRouterClient
from core.components.inner_monologue_manager import MonologueManager
from core.ablations.talker_ablation import TalkerAblation

class MirrorThreadsOnly:
    """
    Ablation variant of MIRROR that only uses the Inner Monologue threads,
    bypassing the Cognitive Controller synthesis step.
    """

    def __init__(self, 
                 api_key: str = None,
                 model: str = "openai/gpt-4o"
                 ):
        """
        Initialize the Threads-Only Mirror system.

        Args:
            api_key: OpenRouter API key (defaults to environment variable)
            model: The OpenRouter model identifier string to use
        """
        # Get API key from environment if not provided
        if not api_key:
            api_key = os.environ.get("OPENROUTER_API_KEY")
            if not api_key:
                raise ValueError("OPENROUTER_API_KEY environment variable must be set")
        
        # Store the model identifier
        self.model = model
        print(f"INFO: Initializing Mirror (Threads Only) with model: {self.model}")
        sys.stdout.flush()

        # Initialize conversation history
        self.conversation_history = []
        
        # Create OpenRouter client
        self.client = OpenRouterClient(api_key)
        
        # Create monologue manager, passing the model
        self.monologue_manager = MonologueManager(self.client, model=self.model)
        
        # Create talker ablation version, passing the model
        self.talker = TalkerAblation(
            client=self.client,
            model=self.model
        )
        
        # Initialize turn counter
        self.turn_count = 0
        
        # Initialize insights
        self.current_insights = None
        self.insights_lock = threading.Lock()
        self.processing_thread = None
        self.background_queue = []
        self.queue_lock = threading.Lock()
        
        # Store raw thread outputs
        self.last_thread_outputs = None

    def truncate_conversation_history(self, conversation_history: List[Dict[str, str]], max_tokens: int = 20000) -> List[Dict[str, str]]:
        """
        Truncate conversation history to fit within token limits.
        (Copied from original Mirror class)
        """
        def estimate_tokens(text: str) -> int:
            return len(text.split()) * 1.3
        
        total_tokens = sum(estimate_tokens(turn['content']) for turn in conversation_history)
        
        if total_tokens <= max_tokens:
            return conversation_history
        
        truncated_history = []
        running_tokens = 0
        
        for turn in reversed(conversation_history):
            turn_tokens = estimate_tokens(turn['content'])
            if running_tokens + turn_tokens > max_tokens:
                break
            truncated_history.insert(0, turn)
            running_tokens += turn_tokens
        
        return truncated_history

    def process_user_input(self, user_input: str) -> str:
        """
        Process user input and generate a response using only threads.
        """
        print(f"\n--- Turn {self.turn_count + 1} ---")
        sys.stdout.flush()
        
        # Add user input to conversation history
        self.conversation_history.append({
            "role": "user",
            "content": user_input
        })
        
        # Get current insights (thread outputs)
        with self.insights_lock:
            current_insights = self.current_insights
        
        # If we have background processing, wait for it with timeout
        if self.processing_thread and self.processing_thread.is_alive():
            wait_start = time.time()
            while self.processing_thread.is_alive() and (time.time() - wait_start < 0.5):
                time.sleep(0.05)
        
        # Re-check insights after potential wait
        with self.insights_lock:
            current_insights = self.current_insights
        
        # Generate response using ablation talker with thread outputs
        conversation_history_snapshot = self.conversation_history.copy()
        response = self.talker.respond(
            conversation_history_snapshot,
            thread_outputs=current_insights
        )
        
        # Update conversation history
        self.conversation_history.append({
            "role": "assistant", 
            "content": response
        })
        
        # Schedule background processing for next turn
        conversation_history_snapshot = self.conversation_history.copy()
        
        with self.queue_lock:
            self.background_queue.append(conversation_history_snapshot)
            if not self.processing_thread or not self.processing_thread.is_alive():
                self.processing_thread = threading.Thread(
                    target=self._process_queue_worker,
                    daemon=True
                )
                self.processing_thread.start()
        
        # Increment turn counter
        self.turn_count += 1
        
        return response

    def _process_queue_worker(self):
        """Background worker to process the queue."""
        while True:
            with self.queue_lock:
                if not self.background_queue:
                    break
                conversation_history_snapshot = self.background_queue.pop(0)
            
            self.process_background_thinking(conversation_history_snapshot)

    def process_background_thinking(self, conversation_history_snapshot: List[Dict[str, str]], timeout=120.0):
        """
        Process background thinking using only Inner Monologue threads.
        This bypasses the Cognitive Controller synthesis.
        """
        start_time = time.time()
        print(f"Starting background thinking (Threads Only) (timeout: {timeout}s)")
        sys.stdout.flush()
        
        try:
            # Truncate conversation history
            truncated_conversation = self.truncate_conversation_history(conversation_history_snapshot)
            
            # Process through monologue manager
            monologue_result = self.monologue_manager.process_user_input(truncated_conversation)
            thread_results = monologue_result.get("results", [])
            
            # Store raw thread outputs
            self.last_thread_outputs = thread_results
            
            # Display notice about bypassing cognitive controller
            if thread_results:
                print("\n╔══════════════════════════════════════════════════════════════╗")
                print("║          THREADS-ONLY ABLATION (NO SYNTHESIS)                ║")
                print("╚══════════════════════════════════════════════════════════════╝")
                print("\n⚠️  Bypassing Cognitive Controller - Raw threads will be passed to Talker")
                print(f"📤 Number of threads: {len(thread_results)}")
                for thread in thread_results:
                    print(f"   • {thread.get('name', 'Unknown')}: {len(thread.get('output', ''))} chars")
                print("─" * 60 + "\n")
                sys.stdout.flush()
            
            # Store thread outputs directly (no synthesis)
            # The talker will receive the raw thread outputs
            with self.insights_lock:
                self.current_insights = thread_results
            
            processing_time = time.time() - start_time
            print(f"Background thinking (Threads Only) completed in {processing_time:.2f}s")
            sys.stdout.flush()
            
        except Exception as e:
            print(f"Error in background thinking: {e}")
            sys.stdout.flush()
            with self.insights_lock:
                self.current_insights = None

    def reset_conversation(self):
        """Reset the conversation state."""
        print("\nResetting conversation state...")
        self.conversation_history = []
        self.turn_count = 0
        with self.insights_lock:
            self.current_insights = None
        self.last_thread_outputs = None
        print("Conversation reset complete.")
        sys.stdout.flush() 