import sys
from pathlib import Path

FILE_PATH = Path(__file__).absolute()
BASE_DIR = FILE_PATH.parent.parent.parent
sys.path.insert(0, str(BASE_DIR))

from src.configs.logger import get_logger
from src.models.generator.evaluation_driven_outlines_generator import EvaluationDrivenOutlinesGenerator
from src.modules.preprocessor.utils import parse_arguments_for_integration_test

logger = get_logger("tasks.workflow.03_gen_outlines_with_evaluation")


def parse_evaluation_arguments():
    """Parse command line arguments for evaluation-driven outline generation"""
    import argparse
    
    parser = argparse.ArgumentParser(
        description="Evaluation-Driven Outline Generation with Supervisor LLM",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  # Run with LLM automatic evaluation (3 rounds max for each stage)
  python tasks/workflow/03_gen_outlines_with_evaluation.py --task_id your_task_id --evaluation_mode auto --max_rounds 3
  
  # Run with interactive LLM + user evaluation
  python tasks/workflow/03_gen_outlines_with_evaluation.py --task_id your_task_id --evaluation_mode interactive --max_rounds 5
  
  # Run without evaluation (traditional mode)
  python tasks/workflow/03_gen_outlines_with_evaluation.py --task_id your_task_id --evaluation_mode none
        """
    )
    
    parser.add_argument(
        "--task_id", 
        type=str, 
        required=True,
        help="Task ID for the survey generation"
    )
    
    parser.add_argument(
        "--evaluation_mode", 
        type=str, 
        choices=["none", "auto", "interactive"], 
        default="auto",
        help="Evaluation mode: none (no evaluation), auto (LLM automatic), interactive (LLM + user)"
    )
    
    parser.add_argument(
        "--max_rounds", 
        type=int, 
        default=3,
        help="Maximum number of evaluation rounds for each stage (default: 3)"
    )
    
    parser.add_argument(
        "--score_threshold", 
        type=float, 
        default=3.5,
        help="Minimum average score to accept outline without further refinement (default: 3.5)"
    )
    
    parser.add_argument(
        "--export_conversations", 
        action="store_true",
        help="Export detailed conversation logs and modification history"
    )
    
    parser.add_argument(
        "--rollback_on_degradation", 
        action="store_true", 
        default=True,
        help="Enable rollback when evaluation scores decrease"
    )
    
    return parser.parse_args()


if __name__ == "__main__":
    try:
        args = parse_evaluation_arguments()
        
        logger.info(f"Starting evaluation-driven outline generation for task: {args.task_id}")
        logger.info(f"Evaluation mode: {args.evaluation_mode}")
        logger.info(f"Max rounds: {args.max_rounds}")
        logger.info(f"Score threshold: {args.score_threshold}")
        
        # Initialize the evaluation-driven outline generator
        generator = EvaluationDrivenOutlinesGenerator(
            task_id=args.task_id,
            evaluation_mode=args.evaluation_mode,
            max_evaluation_rounds=args.max_rounds,
            score_threshold=args.score_threshold,
            enable_rollback=args.rollback_on_degradation,
            export_conversations=args.export_conversations
        )
        
        success = generator.run()
        
        if success:
            logger.info("Evaluation-driven outline generation completed successfully!")
            
            # Display summary
            generator.display_final_summary()
            
            if args.export_conversations:
                logger.info("Conversation logs exported to output directory")
        else:
            logger.error("Evaluation-driven outline generation failed")
            sys.exit(1)
            
    except KeyboardInterrupt:
        logger.info("Process interrupted by user")
        sys.exit(0)
    except Exception as e:
        logger.error(f"Unexpected error: {e}")
        sys.exit(1)