#!/usr/bin/env python3
"""
Main Question Answering Script

This script provides an interactive interface for question answering with two modes:
1. Standard QA: Question only (using 32B model)
2. Enhanced QA: Question + Topological Graph (using structured knowledge)

Usage:
    python qa_main.py --question "Your question here" --mode standard
    python qa_main.py --question "Your question here" --mode enhanced --graph-dir path/to/graph
"""

import os
import sys
import asyncio
import argparse
from typing import Optional, List
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

VLLM_TEXT_URL = os.getenv("VLLM_QWEN_32B_URL")
DEFAULT_TEXT_MODEL = os.getenv("VLLM_QWEN_32B_MODEL")

# Debug: Check if environment variables are loaded
print(f"🔧 Environment check:")
print(f"   VLLM_TEXT_URL: {VLLM_TEXT_URL}")
print(f"   DEFAULT_TEXT_MODEL: {DEFAULT_TEXT_MODEL}")

if not VLLM_TEXT_URL or not DEFAULT_TEXT_MODEL:
    print("❌ Environment variables not loaded properly!")
    print("   Please check your .env file")
    sys.exit(1)

# Add current directory to path for imports
sys.path.insert(0, os.path.dirname(__file__))

from utils.question_answerer import answer_question
from utils.context_retrievers import DataRetrievalContextRetriever

def check_graph_availability() -> bool:
    """Check if topological graph data is available (for enhanced mode)"""
    # Enhanced mode now reruns the pipeline, so we don't need to check for existing graphs
    return True

def get_default_graph_dir() -> Optional[str]:
    """Get the default topological graph directory (for reference only)"""
    # Look for the most recent graph directory
    temp_dir = os.path.join(os.path.dirname(__file__), "..", "temp_files")
    graph_dir = os.path.join(temp_dir, "topological_graph")
    
    return graph_dir if os.path.exists(graph_dir) else None

def interactive_mode():
    """Interactive mode for question answering"""
    print("🤖 Question Answering System")
    print("=" * 50)
    
    print("\nAvailable modes:")
    print("1. Standard QA (question only)")
    print("2. Enhanced QA (question + topological graph - reruns pipeline)")
    
    while True:
        try:
            choice = input("\nSelect mode (1 or 2): ").strip()
            
            if choice == "1":
                mode = "standard"
                break
            elif choice == "2":
                mode = "enhanced"
                break
            else:
                print("❌ Invalid choice. Please enter 1 or 2.")
        except KeyboardInterrupt:
            print("\n👋 Goodbye!")
            return
    
    # Get thinking mode preference
    print(f"\n🧠 Thinking Mode:")
    print("1. With thinking (shows step-by-step reasoning)")
    print("2. Without thinking (direct answer)")
    
    while True:
        try:
            thinking_choice = input("Select thinking mode (1 or 2): ").strip()
            
            if thinking_choice == "1":
                use_thinking = False  # Disabled for now
                break
            elif thinking_choice == "2":
                use_thinking = False
                break
            else:
                print("❌ Invalid choice. Please enter 1 or 2.")
        except KeyboardInterrupt:
            print("\n👋 Goodbye!")
            return
    
    # Get question
    print(f"\n📝 Mode: {mode.upper()} {'(with thinking)' if use_thinking else '(without thinking)'}")
    question = input("Enter your question: ").strip()
    
    if not question:
        print("❌ No question provided.")
        return
    
    # Get output path (optional)
    output_path = input("Output file path (optional, press Enter to skip): ").strip()
    if not output_path:
        output_path = None
    
    # Run question answering
    asyncio.run(run_qa(question, mode, output_path, use_thinking))

async def run_qa(question: str, mode: str, output_path: Optional[str] = None, use_thinking: bool = False, 
                 strategy: str = "dynamic", corpus_strategy: str = "strategy_1", model: str = "32B",
                 use_codebooks: bool = False, codebooks_file: str = None, cluster_ids: Optional[List[int]] = None,
                 max_codebooks_per_cluster: Optional[int] = None, batch_size: int = 10000):
    """Run question answering with specified mode"""
    import time
    
    # Initialize timing variables
    start_time = time.time()
    pipeline_time = 0
    chunk_processing_time = 0
    code_generation_time = 0
    answer = None  # Initialize answer variable
    
    print("\n")
    print("=" * 60)
    
    try:
        # Configure model settings based on selected model
        if model == "32B":
            model_config = {
                'base_url': VLLM_TEXT_URL,
                'model_name': DEFAULT_TEXT_MODEL,
                'max_tokens': 2048,
                'temperature': 0.7
            }
        elif model == "30B-A3B":
            model_config = {
                'base_url': os.getenv("VLLM_QWEN_A3B_URL"),
                'model_name': os.getenv("VLLM_QWEN_A3B_MODEL"),
                'max_tokens': 2048,
                'temperature': 0.7
            }
        else:
            raise ValueError(f"Unknown model: {model}")
        
        # Set up context retriever for enhanced mode
        context_retriever = None
        if mode == "enhanced":
            print("🚀 Enhanced mode: Running full schema induction pipeline...")
            
            # Import and run the schema induction pipeline
            from utils.initial_iteration.schema_induction_pipeline import LLMSchemaInductionPipeline
            
            # Initialize and run the pipeline
            pipeline = LLMSchemaInductionPipeline(
                question=question,
                chunk_size=256,
                overlap=50,
                max_iterations=1,
                min_frequency=1,
                min_frequency_ratio=0.5,
                strategy=corpus_strategy,
                model=model,
                use_codebooks=use_codebooks,
                codebooks_file=codebooks_file,
                cluster_ids=cluster_ids,
                max_codebooks_per_cluster=max_codebooks_per_cluster,
                batch_size=batch_size
            )
            
            print("   🔄 Running schema induction pipeline...")
            pipeline_start = time.time()
            result = await pipeline.run_pipeline()
            pipeline_time = time.time() - pipeline_start
            print(f"   ✅ Schema induction pipeline completed! (Time: {pipeline_time:.2f}s)")
            
            # Extract timing from pipeline result if available
            chunk_processing_time = result.get('build_corpus_time', 0)
            code_generation_time = result.get('code_generation_time', 0)
            if chunk_processing_time > 0:
                print(f"      📊 Build corpus: {chunk_processing_time:.2f}s")
            if code_generation_time > 0:
                print(f"      📊 Code generation: {code_generation_time:.2f}s")
            
            # Now set up data retrieval context retriever
            embeddings_path = "temp_files/embeddings.parquet"
            topological_graph_dir = "temp_files/topological_graph"
            
            if os.path.exists(embeddings_path) and os.path.exists(topological_graph_dir):
                context_retriever = DataRetrievalContextRetriever(
                    embeddings_path,
                    topological_graph_dir,
                    top_k=10
                )
                print(f"   Context: Data retrieval (strategy={strategy})")
            else:
                print(f"   ⚠️ Required files not found after pipeline run, falling back to standard mode")
                mode = "standard"
        
        # Run question answering
        print("   🔄 Running question answering...")
        qa_start = time.time()
        answer = await answer_question(
            question=question,
            mode=mode,
            output_path=output_path,
            model_config=model_config,
            context_retriever=context_retriever,
            use_thinking=use_thinking,
            strategy=strategy
        )
        qa_time = time.time() - qa_start
        print(f"   ✅ Question answering completed! (Time: {qa_time:.2f}s)")
        
        # Calculate total time
        total_time = time.time() - start_time
        
        # Display results
        print("\n" + "="*60)
        print("📋 RESULTS")
        print("="*60)
        print(f"Question: {question}")
        print(f"Mode: {mode.upper()}")
        print(f"Thinking: {'Enabled' if use_thinking else 'Disabled'}")
        print(f"Output: {output_path if output_path else 'Not saved'}")
        
        # Display timing summary
        print("\n" + "="*60)
        print("⏱️ TIMING SUMMARY")
        print("="*60)
        if mode == "enhanced":
            print(f"📊 Schema Induction Pipeline: {pipeline_time:.2f}s")
            if chunk_processing_time > 0:
                print(f"   ├─ Chunk Processing: {chunk_processing_time:.2f}s")
            if code_generation_time > 0:
                print(f"   └─ Code Generation: {code_generation_time:.2f}s")
        print(f"📊 Question Answering: {qa_time:.2f}s")
        print(f"📊 Total Time: {total_time:.2f}s")
        
        # Calculate percentages
        if total_time > 0:
            if mode == "enhanced":
                pipeline_percent = (pipeline_time / total_time) * 100
                qa_percent = (qa_time / total_time) * 100
                print(f"📊 Pipeline: {pipeline_percent:.1f}% | QA: {qa_percent:.1f}%")
        
        print("\n" + "="*60)
        print("ANSWER:")
        print("="*60)
        print(answer)
        print("="*60)
        
        # Return the answer
        return answer
        
    except Exception as e:
        print(f"❌ Error during question answering: {e}")
        import traceback
        traceback.print_exc()
        return None

async def main():
    """Main function with command line interface"""
    parser = argparse.ArgumentParser(
        description="Question Answering with Topological Graph Integration",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  # Standard QA (question only)
  python qa_main.py --question "How to improve productivity?" --mode standard
  
  # Enhanced QA (reruns pipeline + topological graph)
  python qa_main.py --question "How to improve productivity?" --mode enhanced
  
  # With thinking mode (shows step-by-step reasoning)
  python qa_main.py --question "How to improve productivity?" --mode enhanced --thinking on
  
  # Without thinking mode (direct answer)
  python qa_main.py --question "How to improve productivity?" --mode standard --thinking off
  
  # Interactive mode
  python qa_main.py --interactive
  
  # Save answer to file
  python qa_main.py --question "How to improve productivity?" --mode enhanced --output answer.json
        """
    )
    
    parser.add_argument("--question", help="Question to answer")
    parser.add_argument("--mode", choices=["standard", "enhanced"], 
                       help="Answering mode (standard or enhanced)")
    parser.add_argument("--thinking", choices=["on", "off"], default="off",
                       help="Enable/disable thinking mode (shows step-by-step reasoning)")
    parser.add_argument("--strategy", choices=["fixed", "dynamic"], default="dynamic",
                       help="Chunk strategy for enhanced mode (fixed: top-k chunks, dynamic: fit to context)")
    parser.add_argument("--corpus-strategy", choices=["strategy_1", "strategy_2", "strategy_3"], default="strategy_1",
                       help="Corpus generation strategy for schema induction pipeline")
    parser.add_argument("--output", help="Output file path for saving answer")
    parser.add_argument("--interactive", action="store_true", 
                       help="Run in interactive mode")
    parser.add_argument("--model", choices=["32B", "30B-A3B"], default="32B",
                       help="Model to use for processing (default: 32B)")
    
    args = parser.parse_args()
    
    # Interactive mode
    if args.interactive:
        interactive_mode()
        return
    
    # Command line mode
    if not args.question:
        print("❌ Question is required. Use --question or --interactive")
        parser.print_help()
        return
    
    if not args.mode:
        print("❌ Mode is required. Use --mode standard or --mode enhanced")
        parser.print_help()
        return
    
    # Parse thinking mode
    use_thinking = args.thinking == "on"
    
    # Run question answering
    await run_qa(args.question, args.mode, args.output, use_thinking, args.strategy, args.corpus_strategy, args.model)

if __name__ == "__main__":
    asyncio.run(main()) 