#!/usr/bin/env python3
"""
Ground Truth Annotation Tool - FastAPI Application
Web interface for creating ground truth annotations for dashcam videos
"""

from fastapi import FastAPI, HTTPException, Request, Depends
from fastapi.staticfiles import StaticFiles
from fastapi.responses import HTMLResponse, StreamingResponse, JSONResponse, FileResponse
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
import uvicorn
from pathlib import Path
import logging
from typing import Optional, Dict, Any, List
import json
import asyncio
import tempfile
import shutil
try:
    import aiofiles
except ImportError:
    # Fallback for systems without aiofiles
    aiofiles = None

# Module imports (run as: python -m evaluation.make_dataset.s4_annotate_ground_truth.app)
from .core.config import Settings
from .core.video_manager import VideoManager
from .managers.session_manager import SessionManager
from .managers.progress_tracker import ProgressTracker
from .models import SessionStatusResponse, VideoClip
from .pipeline.annotation_generator import AnnotationGenerator
from .pipeline.scene_extractor import SceneExtractor
from .pipeline.violation_analyzer import ViolationAnalyzer
from .pipeline.accident_analyzer import AccidentAnalyzer
from .pipeline.assessment_generator import AssessmentGenerator

# Initialize settings and managers
settings = Settings()
video_manager = VideoManager(settings.video_directory)
session_manager = SessionManager()
progress_tracker = ProgressTracker()

# Initialize pipeline components
annotation_generator = AnnotationGenerator()
scene_extractor = SceneExtractor()
violation_analyzer = ViolationAnalyzer()
accident_analyzer = AccidentAnalyzer()
assessment_generator = AssessmentGenerator()

# Create FastAPI app
app = FastAPI(
    title="Ground Truth Annotation Tool",
    description="Web interface for creating ground truth annotations for dashcam videos",
    version="1.0.0"
)

# Add CORS middleware for development
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],  # Configure appropriately for production
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Custom StaticFiles class to disable caching during development
class NoCacheStaticFiles(StaticFiles):
    """
    StaticFiles subclass that disables HTTP caching for development.
    Prevents browser from caching JavaScript/CSS files, allowing real-time updates.
    """
    def is_not_modified(self, response_headers, request_headers) -> bool:
        """Always return False to disable 304 Not Modified responses"""
        return False
    
    def file_response(self, *args, **kwargs):
        """Add no-cache headers to prevent browser caching"""
        resp = super().file_response(*args, **kwargs)
        resp.headers.setdefault("Cache-Control", "max-age=0, no-cache, no-store, must-revalidate")
        resp.headers.setdefault("Pragma", "no-cache")
        resp.headers.setdefault("Expires", "0")
        return resp

# Mount static files with no-cache behavior for development
static_dir = Path(__file__).parent / "static"
if static_dir.exists():
    app.mount("/static", NoCacheStaticFiles(directory=str(static_dir)), name="static")

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Request models for API endpoints
class GenerateAnnotationRequest(BaseModel):
    optimize: bool = False
    user_feedback: Optional[str] = None

class OptimizeAnnotationRequest(BaseModel):
    user_instructions: str

class SaveStepDataRequest(BaseModel):
    data: Dict[str, Any]

class NavigateRequest(BaseModel):
    video_id: str
    step: int

# JSON validation and repair utilities
def validate_json_structure(data: Dict[str, Any]) -> bool:
    """Validate that the ground truth data has the expected structure"""
    try:
        # Check for required top-level keys
        if not isinstance(data, dict):
            return False
        
        # Validate nested ground_truth structure
        if 'ground_truth' in data:
            gt = data['ground_truth']
            if not isinstance(gt, dict):
                return False
        
        return True
    except Exception as e:
        logger.warning(f"JSON structure validation failed: {e}")
        return False

def repair_json_file(file_path: Path) -> Optional[Dict[str, Any]]:
    """Attempt to repair a corrupted JSON file"""
    try:
        # Read the raw content
        content = file_path.read_text(encoding='utf-8')
        
        # Common corruption patterns to fix
        # 1. Remove duplicate closing braces at the end
        lines = content.split('\n')
        cleaned_lines = []
        found_main_closing = False
        
        for line in lines:
            stripped = line.strip()
            # If we find a closing brace that looks like the main one
            if stripped == '}' and not found_main_closing:
                # Check if this could be the real ending
                if any(key in ''.join(cleaned_lines[-5:]) for key in ['"evaluation_criteria"', '"video_path"', '"video_id"']):
                    found_main_closing = True
                    cleaned_lines.append(line)
                else:
                    cleaned_lines.append(line)
            elif found_main_closing:
                # Skip additional content after main closing brace
                if stripped and not stripped.startswith('//'):  # Keep comments
                    logger.warning(f"Removing extra content after JSON: {stripped[:50]}...")
                break
            else:
                cleaned_lines.append(line)
        
        # Try to parse the cleaned content
        cleaned_content = '\n'.join(cleaned_lines)
        try:
            data = json.loads(cleaned_content)
            logger.info(f"Successfully repaired JSON file: {file_path}")
            return data
        except json.JSONDecodeError as e:
            logger.warning(f"Could not repair JSON file {file_path}: {e}")
            return None
            
    except Exception as e:
        logger.error(f"Failed to repair JSON file {file_path}: {e}")
        return None

async def safe_save_json(file_path: Path, data: Dict[str, Any]) -> bool:
    """Safely save JSON data with atomic writes and validation"""
    try:
        # Validate data structure first
        if not validate_json_structure(data):
            logger.error(f"Invalid JSON structure for {file_path}")
            return False
        
        # Ensure parent directory exists
        file_path.parent.mkdir(parents=True, exist_ok=True)
        
        # Create JSON content
        content = json.dumps(data, indent=2, ensure_ascii=False)
        
        # Validate that we can parse what we just created
        try:
            json.loads(content)
        except json.JSONDecodeError as e:
            logger.error(f"Generated invalid JSON content: {e}")
            return False
        
        # Write to temporary file first (atomic operation)
        temp_file = None
        try:
            with tempfile.NamedTemporaryFile(
                mode='w',
                suffix='.json.tmp',
                dir=file_path.parent,
                delete=False,
                encoding='utf-8'
            ) as f:
                temp_file = Path(f.name)
                f.write(content)
                f.flush()
                
            # Atomic move to final location
            shutil.move(str(temp_file), str(file_path))
            logger.debug(f"Successfully saved JSON file: {file_path}")
            return True
            
        except Exception as e:
            # Cleanup temp file if something went wrong
            if temp_file and temp_file.exists():
                temp_file.unlink()
            raise e
            
    except Exception as e:
        logger.error(f"Failed to save JSON file {file_path}: {e}")
        return False

# Utility functions
async def load_ground_truth(video_id: str) -> Dict[str, Any]:
    """Load ground truth data for a video with automatic repair for corrupted files"""
    # Check both filename formats for compatibility
    gt_file = settings.ground_truth_directory / f"{video_id}.json"
    old_format = settings.ground_truth_directory / f"{video_id}_ground_truth.json"
    
    # Try new format first, then old format
    if gt_file.exists():
        file_to_load = gt_file
    elif old_format.exists():
        file_to_load = old_format
    else:
        return {}
    
    try:
        if aiofiles:
            async with aiofiles.open(file_to_load, 'r', encoding='utf-8') as f:
                content = await f.read()
                return json.loads(content)
        else:
            # Fallback to synchronous file operations in thread pool
            loop = asyncio.get_event_loop()
            content = await loop.run_in_executor(None, file_to_load.read_text, 'utf-8')
            return json.loads(content)
            
    except json.JSONDecodeError as e:
        logger.warning(f"JSON decode error in {file_to_load}: {e}")
        logger.info(f"Attempting to repair corrupted file: {file_to_load}")
        
        # Try to repair the file
        loop = asyncio.get_event_loop()
        repaired_data = await loop.run_in_executor(None, repair_json_file, file_to_load)
        
        if repaired_data:
            # Save the repaired version
            success = await safe_save_json(file_to_load, repaired_data)
            if success:
                logger.info(f"Successfully repaired and saved: {file_to_load}")
                return repaired_data
            else:
                logger.error(f"Failed to save repaired file: {file_to_load}")
        
        # If repair failed, return empty dict
        logger.error(f"Could not repair file {file_to_load}, returning empty data")
        return {}
        
    except Exception as e:
        logger.error(f"Unexpected error loading {file_to_load}: {e}")
        return {}

async def save_ground_truth(video_id: str, data: Dict[str, Any]) -> None:
    """Save ground truth data for a video using safe atomic writes"""
    gt_file = settings.ground_truth_directory / f"{video_id}.json"
    
    # Add evaluation criteria if missing
    if 'evaluation_criteria' not in data:
        data['evaluation_criteria'] = {
            "annotation_quality": "How accurate is the scene description?",
            "scene_extraction": "Are all important scenes identified?",
            "violation_detection": "Are traffic violations correctly identified?",
            "accident_assessment": "Are accident risks properly evaluated?",
            "safety_scoring": "Is the safety score appropriate?",
            "advice_relevance": "Are recommendations actionable and relevant?"
        }
    
    # Use safe atomic save
    success = await safe_save_json(gt_file, data)
    if not success:
        raise RuntimeError(f"Failed to save ground truth data for {video_id}")

async def get_video_info(video_id: str) -> Optional[VideoClip]:
    """Get video information by ID"""
    clips = await video_manager.get_clips()
    for clip in clips:
        if clip.video_id == video_id:
            return clip
    return None

# Root route - serve the main interface
@app.get("/", response_class=HTMLResponse)
async def root():
    """Serve the main application interface"""
    index_file = static_dir / "index.html"
    if index_file.exists():
        if aiofiles:
            async with aiofiles.open(index_file, 'r') as f:
                content = await f.read()
                return HTMLResponse(content=content)
        else:
            # Fallback to synchronous file operations
            loop = asyncio.get_event_loop()
            content = await loop.run_in_executor(None, index_file.read_text, 'utf-8')
            return HTMLResponse(content=content)
    else:
        return HTMLResponse("<h1>Ground Truth Annotation Tool</h1><p>Interface not found</p>")

# Health check endpoint
@app.get("/api/health")
async def health_check():
    """Health check endpoint"""
    return {
        "status": "healthy",
        "service": "ground-truth-annotation",
        "version": "1.0.0"
    }

# Session management endpoints
@app.get("/api/session/current")
async def get_current_session() -> SessionStatusResponse:
    """Get current session information"""
    try:
        session = session_manager.get_session_state()
        if not session:
            # Create a default session if none exists
            raise HTTPException(status_code=500, detail="No active session found")
            
        clips = await video_manager.get_clips()
        current_clip = None
        current_progress = None
        
        if session.current_video_id:
            current_clip = next((clip for clip in clips if clip.video_id == session.current_video_id), None)
            current_progress = progress_tracker.load_clip_progress(session.current_video_id)
        
        # Create a new SessionState from the dict to ensure compatibility
        session_dict = session.model_dump()
        from models import SessionState
        compatible_session = SessionState(**session_dict)
        
        return SessionStatusResponse(
            session=compatible_session,
            current_clip=current_clip,
            current_progress=current_progress
        )
    except Exception as e:
        logger.error(f"Failed to get current session: {e}")
        raise HTTPException(status_code=500, detail=str(e))

@app.post("/api/session/navigate")
async def navigate_session(request: NavigateRequest):
    """Navigate to a specific clip and step"""
    try:
        session_manager.set_position(request.video_id, request.step)
        return {"success": True}
    except Exception as e:
        logger.error(f"Failed to navigate session: {e}")
        return {"success": False, "error": str(e)}

@app.get("/api/session/statistics")
async def get_session_statistics():
    """Get session statistics"""
    try:
        stats = session_manager.export_session_summary()
        return {"success": True, "data": stats}
    except Exception as e:
        logger.error(f"Failed to get session statistics: {e}")
        return {"success": False, "error": str(e)}

@app.get("/api/session/next-unprocessed")
async def get_next_unprocessed_clip():
    """Get the next unprocessed clip to work on"""
    try:
        clips = await video_manager.get_clips()
        clip_ids = [clip.video_id for clip in clips]
        
        next_clip_id = await session_manager.get_next_unprocessed_clip(clip_ids, progress_tracker)
        
        if next_clip_id:
            # Set this as current clip in session
            await session_manager.navigate_to(next_clip_id, 1)
            return {"success": True, "video_id": next_clip_id}
        else:
            return {"success": False, "error": "No unprocessed clips found"}
    except Exception as e:
        logger.error(f"Failed to get next unprocessed clip: {e}")
        return {"success": False, "error": str(e)}

@app.get("/api/session/clip-status")
async def get_clips_by_status():
    """Get clips categorized by processing status"""
    try:
        clips = await video_manager.get_clips()
        clip_ids = [clip.video_id for clip in clips]
        
        processed = await session_manager.get_processed_clips(clip_ids, progress_tracker)
        unprocessed = await session_manager.get_unprocessed_clips(clip_ids, progress_tracker)
        
        return {
            "success": True,
            "data": {
                "processed": processed,
                "unprocessed": unprocessed,
                "total": len(clip_ids),
                "processed_count": len(processed),
                "unprocessed_count": len(unprocessed)
            }
        }
    except Exception as e:
        logger.error(f"Failed to get clips by status: {e}")
        return {"success": False, "error": str(e)}

# Clips management endpoints
@app.get("/api/clips")
async def get_clips():
    """Get list of all available video clips"""
    try:
        clips = await video_manager.get_clips()
        return {
            "success": True,
            "clips": [clip.dict() for clip in clips]
        }
    except Exception as e:
        logger.error(f"Failed to get clips: {e}")
        return {"success": False, "error": str(e)}

@app.get("/api/clips/{video_id}")
async def get_clip_info(video_id: str):
    """Get information about a specific clip"""
    try:
        clip = await get_video_info(video_id)
        if not clip:
            raise HTTPException(status_code=404, detail="Clip not found")
        
        return {
            "success": True,
            "data": clip.dict()
        }
    except Exception as e:
        logger.error(f"Failed to get clip info: {e}")
        if isinstance(e, HTTPException):
            raise
        return {"success": False, "error": str(e)}

@app.get("/api/clips/{video_id}/progress")
async def get_clip_progress(video_id: str):
    """Get progress information for a specific clip"""
    try:
        progress = progress_tracker.load_clip_progress(video_id)
        return {
            "success": True,
            "data": progress.dict() if progress else None
        }
    except Exception as e:
        logger.error(f"Failed to get clip progress: {e}")
        return {"success": False, "error": str(e)}

@app.get("/api/clips/{video_id}/ground-truth")
async def get_ground_truth(video_id: str):
    """Get ground truth data for a specific clip"""
    try:
        data = await load_ground_truth(video_id)
        # Return the ground_truth nested object if it exists, otherwise the entire data
        if 'ground_truth' in data:
            return {
                "success": True,
                "data": {"ground_truth": data['ground_truth']}
            }
        else:
            return {
                "success": True,
                "data": {"ground_truth": data}
            }
    except Exception as e:
        logger.error(f"Failed to get ground truth: {e}")
        return {"success": False, "error": str(e)}

@app.get("/api/clips/{video_id}/stream")
async def stream_video(video_id: str):
    """Stream video content"""
    try:
        clip = await get_video_info(video_id)
        if not clip:
            raise HTTPException(status_code=404, detail="Clip not found")
        
        video_path = Path(clip.video_path)
        if not video_path.exists():
            raise HTTPException(status_code=404, detail="Video file not found")
        
        return FileResponse(
            video_path,
            media_type="video/mp4",
            filename=f"{video_id}.mp4"
        )
    except Exception as e:
        logger.error(f"Failed to stream video: {e}")
        if isinstance(e, HTTPException):
            raise
        raise HTTPException(status_code=500, detail=str(e))

# Step 1: Annotation generation endpoints
@app.post("/api/clips/{video_id}/step/1/generate")
async def generate_annotation(video_id: str, request: GenerateAnnotationRequest):
    """Generate annotation for a video clip"""
    try:
        clip = await get_video_info(video_id)
        if not clip:
            raise HTTPException(status_code=404, detail="Clip not found")
        
        # Generate annotation using the pipeline
        annotation = await annotation_generator.generate_annotation(
            video_path=clip.video_path,
            optimize=request.optimize,
            user_feedback=request.user_feedback
        )
        
        # Save to ground truth with proper structure
        gt_data = await load_ground_truth(video_id)
        
        # Ensure proper structure matching evaluation template
        if 'ground_truth' not in gt_data:
            gt_data['ground_truth'] = {}
        gt_data['ground_truth']['annotation'] = annotation
        
        # Add video metadata if missing
        if 'video_id' not in gt_data:
            gt_data['video_id'] = video_id
        if 'video_path' not in gt_data:
            gt_data['video_path'] = clip.video_path
            
        await save_ground_truth(video_id, gt_data)
        
        # Update progress
        progress_tracker.complete_step(video_id, 1)
        
        return {
            "success": True,
            "data": {"annotation": annotation}
        }
        
    except Exception as e:
        logger.error(f"Failed to generate annotation: {e}")
        if isinstance(e, HTTPException):
            raise
        return {"success": False, "error": str(e)}

@app.post("/api/clips/{video_id}/step/1/optimize")
async def optimize_annotation(video_id: str, request: OptimizeAnnotationRequest):
    """Optimize existing annotation with user instructions"""
    try:
        clip = await get_video_info(video_id)
        if not clip:
            raise HTTPException(status_code=404, detail="Clip not found")
        
        # Load existing annotation
        gt_data = await load_ground_truth(video_id)
        
        # Handle both nested and flat structure
        if 'ground_truth' in gt_data:
            current_annotation = gt_data['ground_truth'].get('annotation', '')
        else:
            current_annotation = gt_data.get('annotation', '')
        
        if not current_annotation:
            raise HTTPException(status_code=400, detail="No annotation to optimize")
        
        # Optimize annotation
        optimized_annotation = await annotation_generator.optimize_annotation(
            video_path=clip.video_path,
            current_annotation=current_annotation,
            user_instructions=request.user_instructions
        )
        
        # Save updated annotation with proper structure
        if 'ground_truth' not in gt_data:
            gt_data['ground_truth'] = {}
        gt_data['ground_truth']['annotation'] = optimized_annotation
        
        # Add video metadata if missing
        if 'video_id' not in gt_data:
            gt_data['video_id'] = video_id
        if 'video_path' not in gt_data:
            gt_data['video_path'] = clip.video_path
            
        await save_ground_truth(video_id, gt_data)
        
        return {
            "success": True,
            "data": {"annotation": optimized_annotation}
        }
        
    except Exception as e:
        logger.error(f"Failed to optimize annotation: {e}")
        if isinstance(e, HTTPException):
            raise
        return {"success": False, "error": str(e)}

# Step 2: Scene extraction endpoints
@app.post("/api/clips/{video_id}/step/2/extract")
async def extract_scenes(video_id: str):
    """Extract scenes from annotation"""
    try:
        clip = await get_video_info(video_id)
        if not clip:
            raise HTTPException(status_code=404, detail="Clip not found")
        
        # Load annotation from nested structure
        gt_data = await load_ground_truth(video_id)
        annotation = gt_data['ground_truth'].get('annotation', '')
        
        if not annotation:
            raise HTTPException(status_code=400, detail="No annotation available for scene extraction")
        
        # Extract scenes
        scenes = await scene_extractor.extract_scenes(
            video_path=clip.video_path,
            annotation=annotation
        )
        
        # Save scenes to ground truth nested structure (scenes are now string arrays)
        gt_data['ground_truth']['scenes'] = scenes
        await save_ground_truth(video_id, gt_data)
        
        # Update progress
        progress_tracker.complete_step(video_id, 2)
        
        return {
            "success": True,
            "data": {"scenes": scenes}
        }
        
    except Exception as e:
        logger.error(f"Failed to extract scenes: {e}")
        if isinstance(e, HTTPException):
            raise
        return {"success": False, "error": str(e)}

# Step 3: Violation analysis endpoints
@app.post("/api/clips/{video_id}/step/3/analyze")
async def analyze_violations(video_id: str):
    """Analyze traffic violations"""
    try:
        clip = await get_video_info(video_id)
        if not clip:
            raise HTTPException(status_code=404, detail="Clip not found")
        
        # Load scenes from nested structure
        gt_data = await load_ground_truth(video_id)
        scenes_data = gt_data['ground_truth'].get('scenes', [])
        
        if not scenes_data:
            raise HTTPException(status_code=400, detail="No scenes available for violation analysis")
        
        # Analyze violations
        violations = await violation_analyzer.analyze_violations(
            video_path=clip.video_path,
            scenes=scenes_data
        )
        
        # Format violations for JSON output
        formatted_violations = violation_analyzer.format_violations_for_json(scenes_data, violations)
        
        # Save violations to ground truth nested structure
        gt_data['ground_truth']['violations'] = formatted_violations
        await save_ground_truth(video_id, gt_data)
        
        # Update progress
        progress_tracker.complete_step(video_id, 3)
        
        return {
            "success": True,
            "data": {"violations": formatted_violations}
        }
        
    except Exception as e:
        logger.error(f"Failed to analyze violations: {e}")
        if isinstance(e, HTTPException):
            raise
        return {"success": False, "error": str(e)}

# Step 4: Accident analysis endpoints
@app.post("/api/clips/{video_id}/step/4/analyze")
async def analyze_accidents(video_id: str):
    """Analyze accident risks"""
    try:
        clip = await get_video_info(video_id)
        if not clip:
            raise HTTPException(status_code=404, detail="Clip not found")
        
        # Load scenes from nested structure
        gt_data = await load_ground_truth(video_id)
        scenes_data = gt_data['ground_truth'].get('scenes', [])
        
        if not scenes_data:
            raise HTTPException(status_code=400, detail="No scenes available for accident analysis")
        
        # Analyze accidents
        accidents = await accident_analyzer.analyze_accidents(
            video_path=clip.video_path,
            scenes=scenes_data
        )
        
        # Format accidents for JSON output
        formatted_accidents = accident_analyzer.format_accidents_for_json(scenes_data, accidents)
        
        # Save accidents to ground truth nested structure
        gt_data['ground_truth']['accidents'] = formatted_accidents
        await save_ground_truth(video_id, gt_data)
        
        # Update progress
        progress_tracker.complete_step(video_id, 4)
        
        return {
            "success": True,
            "data": {"accidents": formatted_accidents}
        }
        
    except Exception as e:
        logger.error(f"Failed to analyze accidents: {e}")
        if isinstance(e, HTTPException):
            raise
        return {"success": False, "error": str(e)}

# Step 5: Assessment generation endpoints
@app.post("/api/clips/{video_id}/step/5/generate")
async def generate_assessment(video_id: str):
    """Generate final driving assessment"""
    try:
        clip = await get_video_info(video_id)
        if not clip:
            raise HTTPException(status_code=404, detail="Clip not found")
        
        # Load all previous step data from nested structure
        gt_data = await load_ground_truth(video_id)
        annotation = gt_data['ground_truth'].get('annotation', '')
        scenes_data = gt_data['ground_truth'].get('scenes', [])
        violations_data = gt_data['ground_truth'].get('violations', [])
        accidents_data = gt_data['ground_truth'].get('accidents', [])
        
        if not annotation:
            raise HTTPException(status_code=400, detail="No annotation available for assessment")
        
        # Generate assessment
        assessment = await assessment_generator.generate_assessment(
            video_path=clip.video_path,
            annotation=annotation,
            scenes=scenes_data,
            violations=violations_data,
            accidents=accidents_data
        )
        
        # Save assessment to ground truth nested structure
        # Assessment is now returned as a dictionary from the generator
        if isinstance(assessment, dict):
            assessment_dict = assessment
        else:
            # Fallback for unexpected format
            logger.warning(f"Unexpected assessment format: {type(assessment)}")
            assessment_dict = {
                'safety_score': 5,
                'risk_level': 'medium', 
                'overall_evaluation': 'Assessment format error',
                'strengths': ['System attempted assessment'],
                'weaknesses': ['Assessment format issue'],
                'improvement_advice': ['Please check system configuration']
            }
        
        gt_data['ground_truth']['assessment'] = assessment_dict
        await save_ground_truth(video_id, gt_data)
        
        # Update progress
        progress_tracker.complete_step(video_id, 5)
        
        return {
            "success": True,
            "data": {"assessment": assessment_dict}
        }
        
    except Exception as e:
        logger.error(f"Failed to generate assessment: {e}")
        if isinstance(e, HTTPException):
            raise
        return {"success": False, "error": str(e)}

@app.post("/api/clips/{video_id}/step/5/optimize")
async def optimize_assessment(video_id: str, request: OptimizeAnnotationRequest):
    """Optimize existing assessment with user instructions"""
    try:
        clip = await get_video_info(video_id)
        if not clip:
            raise HTTPException(status_code=404, detail="Clip not found")
        
        # Load existing assessment
        gt_data = await load_ground_truth(video_id)
        
        # Get current assessment from nested structure
        current_assessment = gt_data['ground_truth'].get('assessment', {})
        
        if not current_assessment:
            raise HTTPException(status_code=400, detail="No assessment to optimize")
        
        # Optimize assessment
        optimized_assessment = await assessment_generator.optimize_assessment(
            video_path=clip.video_path,
            current_assessment=current_assessment,
            user_instructions=request.user_instructions
        )
        
        # Save updated assessment with proper structure
        gt_data['ground_truth']['assessment'] = optimized_assessment
        
        # Add video metadata if missing
        if 'video_id' not in gt_data:
            gt_data['video_id'] = video_id
        if 'video_path' not in gt_data:
            gt_data['video_path'] = clip.video_path
            
        await save_ground_truth(video_id, gt_data)
        
        return {
            "success": True,
            "data": {"assessment": optimized_assessment}
        }
        
    except Exception as e:
        logger.error(f"Failed to optimize assessment: {e}")
        if isinstance(e, HTTPException):
            raise
        return {"success": False, "error": str(e)}

# Generic step data saving endpoint
@app.put("/api/clips/{video_id}/step/{step}")
async def save_step_data(video_id: str, step: int, request: SaveStepDataRequest):
    """Save data for any step"""
    try:
        if step < 1 or step > 5:
            raise HTTPException(status_code=400, detail="Invalid step number")
        
        clip = await get_video_info(video_id)
        if not clip:
            raise HTTPException(status_code=404, detail="Clip not found")
        
        # Load ground truth data
        gt_data = await load_ground_truth(video_id)
        
        # Ensure proper structure
        if 'ground_truth' not in gt_data:
            gt_data['ground_truth'] = {}
            
        # Add video metadata if missing
        if 'video_id' not in gt_data:
            gt_data['video_id'] = video_id
        if 'video_path' not in gt_data and clip:
            gt_data['video_path'] = clip.video_path
        
        # Update the appropriate step data
        step_key_map = {
            1: 'annotation',
            2: 'scenes',
            3: 'violations',
            4: 'accidents',
            5: 'assessment'
        }
        
        step_key = step_key_map[step]
        gt_data['ground_truth'][step_key] = request.data.get(step_key, request.data)
        
        # Save updated ground truth
        await save_ground_truth(video_id, gt_data)
        
        # Mark step as completed
        progress_tracker.complete_step(video_id, step)
        
        return {"success": True}
        
    except Exception as e:
        logger.error(f"Failed to save step data: {e}")
        if isinstance(e, HTTPException):
            raise
        return {"success": False, "error": str(e)}

# Export endpoint
@app.post("/api/clips/{video_id}/export")
async def export_ground_truth(video_id: str):
    """Export complete ground truth data"""
    try:
        clip = await get_video_info(video_id)
        if not clip:
            raise HTTPException(status_code=404, detail="Clip not found")
        
        # Load complete ground truth data
        gt_data = await load_ground_truth(video_id)
        
        if not gt_data:
            raise HTTPException(status_code=400, detail="No ground truth data to export")
        
        # Create export file
        export_dir = settings.ground_truth_directory / "exports"
        export_dir.mkdir(parents=True, exist_ok=True)
        export_file = export_dir / f"{video_id}_ground_truth_export.json"
        
        export_data = {
            "video_id": video_id,
            "clip_info": clip.dict(),
            "ground_truth": gt_data,
            "exported_at": asyncio.get_event_loop().time()
        }
        
        content = json.dumps(export_data, indent=2)
        if aiofiles:
            async with aiofiles.open(export_file, 'w') as f:
                await f.write(content)
        else:
            loop = asyncio.get_event_loop()
            await loop.run_in_executor(None, export_file.write_text, content, 'utf-8')
        
        return {
            "success": True,
            "data": {
                "export_file": str(export_file),
                "download_url": f"/api/clips/{video_id}/download-export"
            }
        }
        
    except Exception as e:
        logger.error(f"Failed to export ground truth: {e}")
        if isinstance(e, HTTPException):
            raise
        return {"success": False, "error": str(e)}

@app.get("/api/clips/{video_id}/download-export")
async def download_export(video_id: str):
    """Download exported ground truth file"""
    try:
        export_file = settings.ground_truth_directory / "exports" / f"{video_id}_ground_truth_export.json"
        
        if not export_file.exists():
            raise HTTPException(status_code=404, detail="Export file not found")
        
        return FileResponse(
            export_file,
            media_type="application/json",
            filename=f"{video_id}_ground_truth.json"
        )
        
    except Exception as e:
        logger.error(f"Failed to download export: {e}")
        if isinstance(e, HTTPException):
            raise
        raise HTTPException(status_code=500, detail=str(e))

# Clip completion management
@app.post("/api/clips/{video_id}/mark-complete")
async def mark_clip_complete(video_id: str):
    """Mark a clip as completely annotated"""
    try:
        # Verify the clip exists
        clip = await get_video_info(video_id)
        if not clip:
            raise HTTPException(status_code=404, detail="Clip not found")
        
        # Check if all 5 steps are actually completed with data
        gt_data = await load_ground_truth(video_id)
        
        # Validate that all required steps have data
        required_steps = ['annotation', 'scenes', 'violations', 'accidents', 'assessment']
        missing_steps = []
        
        for step in required_steps:
            # Check in the nested ground_truth structure
            if 'ground_truth' not in gt_data or step not in gt_data['ground_truth'] or not gt_data['ground_truth'][step]:
                missing_steps.append(step)
        
        if missing_steps:
            return {
                "success": False, 
                "error": f"Cannot mark complete: missing data for steps: {', '.join(missing_steps)}"
            }
        
        # Mark all steps as completed to ensure clip is marked complete
        for step in range(1, 6):
            progress_tracker.complete_step(video_id, step)
        
        # Mark clip as completed in progress tracker  
        progress = progress_tracker.load_clip_progress(video_id)
        progress.overall_status = "completed"
        progress_tracker._save_clip_progress(video_id, progress)
        
        # Update session statistics
        clips = await video_manager.get_clips()
        clip_ids = [clip.video_id for clip in clips]
        
        # Count completed clips
        completed_count = sum(1 for clip_id in clip_ids if progress_tracker.is_clip_completed(clip_id))
        in_progress_count = 0
        pending_count = 0
        
        for clip_id in clip_ids:
            if not progress_tracker.is_clip_completed(clip_id):
                # Check if any steps have been started
                any_started = any(progress_tracker.is_step_completed(clip_id, step) for step in range(1, 6))
                if any_started:
                    in_progress_count += 1
                else:
                    pending_count += 1
        
        session_manager.update_statistics(
            total_clips=len(clip_ids),
            clips_completed=completed_count,
            clips_in_progress=in_progress_count,
            clips_pending=pending_count
        )
        
        return {
            "success": True,
            "data": {
                "video_id": video_id,
                "status": "completed",
                "completed_count": completed_count,
                "total_clips": len(clip_ids)
            }
        }
        
    except Exception as e:
        logger.error(f"Failed to mark clip complete: {e}")
        if isinstance(e, HTTPException):
            raise
        return {"success": False, "error": str(e)}

# Utility endpoints
@app.post("/api/backup")
async def create_backup():
    """Create backup of all ground truth data"""
    try:
        backup_file = await session_manager.create_backup()
        return {
            "success": True,
            "data": {"backup_file": str(backup_file)}
        }
    except Exception as e:
        logger.error(f"Failed to create backup: {e}")
        return {"success": False, "error": str(e)}

@app.get("/api/logs")
async def get_recent_logs():
    """Get recent application logs"""
    try:
        # This would typically read from a log file
        # For now, return empty logs
        return {
            "success": True,
            "data": {"logs": []}
        }
    except Exception as e:
        logger.error(f"Failed to get logs: {e}")
        return {"success": False, "error": str(e)}

if __name__ == "__main__":
    # Run with: uv run python -m evaluation.make_dataset.s4_annotate_ground_truth.app
    uvicorn.run(
        "evaluation.make_dataset.s4_annotate_ground_truth.app:app",
        host="0.0.0.0",
        port=8000,
        reload=True,
        log_level="info"
    )