#!/usr/bin/env python3
"""
File manager for hierarchical output layouts.
Layout: outputs/<dataset>/<NUMframes>/<SAMPLER[_variant]>/<TIMESTAMP>/
"""

import os
import json
from datetime import datetime
from pathlib import Path
from typing import Dict, Optional, List, Tuple


class OutputFileManager:
    """Output file manager."""
    
    def __init__(self, base_output_path: str, dataset_name: str):
        """
        Initialize file manager.

        Args:
            base_output_path: Base output path (e.g., ./outputs).
            dataset_name: Dataset name (e.g., longvideobench).
        """
        self.base_path = Path(base_output_path)
        self.dataset_name = dataset_name
        self.dataset_path = self.base_path / dataset_name

    def _build_sampler_dir(self, num_frames: int, sampler_type: str, variant: Optional[str] = None) -> Path:
        """
        Build sampler directory path, optionally appending a variant tag.
        """
        frames_dir = self.dataset_path / f"{num_frames}frames"
        if variant:
            variant = str(variant).strip().replace(" ", "_")
            sampler_name = f"{sampler_type}_{variant}"
        else:
            sampler_name = sampler_type
        return frames_dir / sampler_name
    
    def create_preprocess_session(self, num_frames: int, sampler_type: str, variant: Optional[str] = None) -> Dict[str, str]:
        """
        Create a new preprocess session directory.

        Returns:
            Dict of file paths.
        """
        # Timestamp
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        
        # Build session dir
        sampler_dir = self._build_sampler_dir(num_frames, sampler_type, variant)
        session_dir = sampler_dir / timestamp
        
        # Create directories
        session_dir.mkdir(parents=True, exist_ok=True)
        
        # Define file paths
        file_paths = {
            "session_dir": str(session_dir),
            "timestamp": timestamp,
            "indices": str(session_dir / "indices.json"),
            "timing": str(session_dir / "timing.json"),
            "errors": str(session_dir / "errors.json"),
            "error_counts": str(session_dir / "error_counts.json"),
        }
        
        return file_paths
    
    def find_latest_session(self, num_frames: int, sampler_type: str, variant: Optional[str] = None) -> Optional[str]:
        """
        Find the latest session directory for a given sampler setup.
        """
        sampler_dir = self._build_sampler_dir(num_frames, sampler_type, variant)
        
        if not sampler_dir.exists():
            # Backward compatibility: try without variant
            if variant:
                fallback_dir = self._build_sampler_dir(num_frames, sampler_type, None)
                if fallback_dir.exists():
                    sampler_dir = fallback_dir
                else:
                    return None
            else:
                return None
        
        # Find latest timestamp dir
        timestamp_dirs = [d for d in sampler_dir.iterdir() 
                         if d.is_dir()]
        
        if not timestamp_dirs:
            return None
        
        # Sort by timestamp, take latest
        timestamp_dirs.sort(key=lambda x: x.name, reverse=True)
        latest_dir = timestamp_dirs[0]
        
        return str(latest_dir)
    
    def get_session_files(self, session_dir: str, sampler_type: str) -> Dict[str, str]:
        """
        Get file paths under a session directory.
        """
        session_path = Path(session_dir)
        
        file_paths = {
            "session_dir": str(session_path),
            "indices": str(session_path / "indices.json"),
            "timing": str(session_path / "timing.json"),
            "errors": str(session_path / "errors.json"),
            "error_counts": str(session_path / "error_counts.json"),
        }

        
        return file_paths
    
    def list_available_sessions(self, num_frames: int, sampler_type: str, variant: Optional[str] = None) -> List[Tuple[str, str]]:
        """
        List available sessions.

        Returns:
            List of (timestamp, full_path) sorted descending.
        """
        sampler_dir = self._build_sampler_dir(num_frames, sampler_type, variant)
        
        if not sampler_dir.exists():
            if variant:
                sampler_dir = self._build_sampler_dir(num_frames, sampler_type, None)
                if not sampler_dir.exists():
                    return []
            else:
                return []
        
        timestamp_dirs = [d for d in sampler_dir.iterdir() 
                         if d.is_dir()]
        
        # Sort descending by timestamp
        timestamp_dirs.sort(key=lambda x: x.name, reverse=True)
        
        return [(d.name, str(d)) for d in timestamp_dirs]
    
    def verify_session_completeness(self, session_dir: str, sampler_type: str) -> Dict[str, bool]:
        """
        Verify that expected files exist under a session directory.
        """
        file_paths = self.get_session_files(session_dir, sampler_type)
        
        status = {}
        for file_type, file_path in file_paths.items():
            if file_type == "session_dir":
                continue
            status[file_type] = Path(file_path).exists()
        
        return status
    
    
    def load_indices(self, session_dir: str) -> Optional[List]:
        """
        Load indices.json for a session (returns None on error).
        """
        indices_file = Path(session_dir) / "indices.json"
        
        if not indices_file.exists():
            return None
        
        try:
            with open(indices_file, 'r') as f:
                return json.load(f)
        except (json.JSONDecodeError, IOError):
            return None
    
    def load_meta(self, session_dir: str) -> Optional[Dict]:
        """
        Load meta.json for a session (returns None on error).
        """
        meta_file = Path(session_dir) / "meta.json"
        
        if not meta_file.exists():
            return None
        
        try:
            with open(meta_file, 'r', encoding='utf-8') as f:
                return json.load(f)
        except (json.JSONDecodeError, IOError):
            return None
    
    def get_summary_info(self, num_frames: int, sampler_type: str, variant: Optional[str] = None) -> Dict:
        """
        Get a summary of sessions for a given sampler setup.
        """
        sessions = self.list_available_sessions(num_frames, sampler_type, variant)
        latest_session = self.find_latest_session(num_frames, sampler_type, variant)
        
        summary = {
            "num_frames": num_frames,
            "sampler_type": sampler_type,
            "variant": variant,
            "total_sessions": len(sessions),
            "latest_session": latest_session,
            "all_sessions": sessions
        }
        
        if latest_session:
            completeness = self.verify_session_completeness(latest_session, sampler_type)
            summary["latest_session_completeness"] = completeness
        
        return summary


def print_file_structure_help():
    """Print the output directory structure."""
    help_text = """
Output directory structure:
outputs/
└── {dataset_name}/
    └── {num_frames}frames/
        └── {sampler_type}_{variant}/   # variant optional, e.g. importance/uniform/random
            ├── {timestamp1}/
            │   ├── indices.json     # sampled indices
            │   ├── timing.json      # timings
            │   ├── meta.json        # optional meta (legacy)
            │   ├── errors.json      # error log
            │   └── error_counts.json
            └── {timestamp2}/

Example:
outputs/longvideobench/16frames/scope_importance/20250913_143052/indices.json

In inference, you can:
1) Use the latest results: --sampler scope --num_frames 16
2) Use a specific session: --preprocess_session outputs/longvideobench/16frames/scope_importance/20250913_143052
    """
    print(help_text)


if __name__ == "__main__":
    # Example usage
    print_file_structure_help()
    
    # Smoke test
    fm = OutputFileManager("./outputs", "longvideobench")
    
    # Create a new session
    paths = fm.create_preprocess_session(16, "scope")
    print(f"Created session: {paths['session_dir']}")
    
    # Find latest session
    latest = fm.find_latest_session(16, "scope")
    print(f"Latest session: {latest}")
    
    # Summary
    summary = fm.get_summary_info(16, "scope")
    print(f"Summary: {summary}")
