#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
M74 Verbatim Log Transformer (Simplified Version v2)

This script transforms a Gemini CLI verbatim log file into a structured JSON
format. It focuses on the primary goal of separating User and AI turns and
parsing the session summary epilogue. This version is hardened to handle
multiple observed log formats.
"""

import argparse
import json
import re
import sys
from pathlib import Path
from dataclasses import dataclass, asdict, field
from typing import Union, List, Optional

# --- Simplified Data Model ---

@dataclass
class UserTurn:
    content: str
    speaker: str = "user"

@dataclass
class AITurn:
    content: str
    speaker: str = "ai"

Turn = Union[UserTurn, AITurn]

@dataclass
class ModelUsage:
    model_name: str
    requests: int
    input_tokens: int
    output_tokens: int

@dataclass
class Epilogue:
    raw: str
    tool_calls_total: Optional[str] = None
    tool_calls_successful: Optional[str] = None
    tool_calls_failed: Optional[str] = None
    success_rate: Optional[str] = None
    user_agreement_rate: Optional[str] = None
    wall_time: Optional[str] = None
    agent_active_time: Optional[str] = None
    api_time: Optional[str] = None
    tool_time: Optional[str] = None
    model_usage: List[ModelUsage] = field(default_factory=list)

# --- Custom JSON Encoder ---

class CustomLogEncoder(json.JSONEncoder):
    def default(self, o):
        if isinstance(o, (UserTurn, AITurn, ModelUsage, Epilogue)):
            return asdict(o)
        return super().default(o)

# --- Parsing Logic ---

def segment_log(content: str) -> tuple[list[str], str | None]:
    """
    Phase 1: Turn Segmentation.
    Splits the raw log file content into turn blocks and an epilogue.
    This version uses a more precise regex to only identify user input boxes
    and AI text blocks as turn headers.
    """
    # This regex now ONLY finds user input boxes (containing '>') or AI turns (starting with ✦)
    TURN_HEADER_PATTERN = re.compile(r"^(?:╭─+╮\n│\s+>\s|✦ )", re.MULTILINE)
    EPILOGUE_MARKER = "Agent powering down. Goodbye!"

    epilogue = None
    turn_content = content

    epilogue_start_index = content.find(EPILOGUE_MARKER)
    if epilogue_start_index != -1:
        box_start_index = content.rfind("╭─", 0, epilogue_start_index)
        if box_start_index != -1:
            epilogue = content[box_start_index:].strip()
            turn_content = content[:box_start_index]

    matches = list(TURN_HEADER_PATTERN.finditer(turn_content))
    
    if not matches:
        return [], epilogue

    turn_blocks = []
    for i, match in enumerate(matches):
        start_index = match.start()
        end_index = matches[i + 1].start() if i + 1 < len(matches) else len(turn_content)
        turn_blocks.append(turn_content[start_index:end_index])
        
    return turn_blocks, epilogue

def parse_turn(turn_block: str) -> Optional[Turn]:
    """
    Parses a single Turn Block into a structured dataclass instance.
    """
    stripped_block = turn_block.strip()
    if not stripped_block:
        return None

    if stripped_block.startswith("╭─"): # User Turn
        match = re.search(r"│  > (.*)  │", stripped_block, re.DOTALL)
        content = match.group(1).strip() if match else ""
        if not content:
            return None
        return UserTurn(content=content)
    
    elif stripped_block.startswith("✦"): # AI Turn
        return AITurn(content=stripped_block[1:].strip())
        
    return None # Ignore system and other turns

def _parse_epilogue(epilogue_text: str) -> Optional[Epilogue]:
    """Parses the epilogue block into a structured dictionary."""
    if not epilogue_text:
        return None

    summary = {"raw": epilogue_text}
    
    patterns = {
        "tool_calls_total": r"Tool Calls:\s+(\d+)",
        "tool_calls_successful": r"\( ✔ (\d+)",
        "tool_calls_failed": r"✖ (\d+)",
        "success_rate": r"Success Rate:\s+([\d\.]+)%",
        "user_agreement_rate": r"User Agreement:\s+([\d\.]+)%",
        "wall_time": r"Wall Time:\s+([\w\s\.]+)",
        "agent_active_time": r"Agent Active:\s+([\w\s\.]+)",
        "api_time": r"API Time:\s+([\w\s\.\(\)%]+)",
        "tool_time": r"Tool Time:\s+([\w\s\.\(\)%]+)",
    }
    
    for key, pattern in patterns.items():
        match = re.search(pattern, epilogue_text)
        if match:
            summary[key] = match.group(1).strip().replace("│", "").strip()

    model_usage_pattern = re.compile(
        r"│\s+([\w\.-]+)\s+(\d+)\s+([\d,]+)\s+([\d,]+)\s+│"
    )
    
    model_usage_list = []
    for line in epilogue_text.split('\n'):
        match = model_usage_pattern.search(line)
        if match:
            model_usage_list.append(ModelUsage(
                model_name=match.group(1),
                requests=int(match.group(2)),
                input_tokens=int(match.group(3).replace(",", "")),
                output_tokens=int(match.group(4).replace(",", "")),
            ))
    if model_usage_list:
        summary["model_usage"] = model_usage_list
            
    return Epilogue(**summary)

def main():
    """Main function to drive the log transformation."""
    parser = argparse.ArgumentParser(description="Transform a Gemini CLI verbatim log to structured JSON.")
    parser.add_argument("input_file", type=Path, help="Path to the input verbatim log file.")
    parser.add_argument("-o", "--output_file", type=Path, help="Path to the output JSON file (optional). Defaults to stdout.")
    args = parser.parse_args()

    if not args.input_file.is_file():
        print(f"Error: Input file not found at {args.input_file}", file=sys.stderr)
        sys.exit(1)

    try:
        log_content = args.input_file.read_text(encoding='utf-8')
    except Exception as e:
        print(f"Error: Could not read input file: {e}", file=sys.stderr)
        sys.exit(1)

    turn_blocks, epilogue_text = segment_log(log_content)
    
    parsed_conversation = []
    for block in turn_blocks:
        parsed_turn = parse_turn(block)
        if parsed_turn:
            parsed_conversation.append(parsed_turn)

    structured_epilogue = _parse_epilogue(epilogue_text)

    output_data = {
        "log_filename": args.input_file.name,
        "session_id": args.input_file.stem.replace("gemini-", ""),
        "epilogue": structured_epilogue,
        "conversation": parsed_conversation
    }

    try:
        if args.output_file:
            args.output_file.write_text(json.dumps(output_data, indent=2, ensure_ascii=False, cls=CustomLogEncoder), encoding='utf-8')
            print(f"Successfully wrote structured log to {args.output_file}")
        else:
            print(json.dumps(output_data, indent=2, ensure_ascii=False, cls=CustomLogEncoder))
    except Exception as e:
        print(f"Error: Could not write output: {e}", file=sys.stderr)
        sys.exit(1)

if __name__ == "__main__":
    main()
