#!/usr/bin/env python3
"""
Get representative sample data from each table in the database.
This tool is available to subagents for database analysis.
"""

import sqlite3
import json
import sys
from pathlib import Path

def get_sample_data(db_path: str, sample_size: int = 5) -> dict:
    """Get sample rows from each table."""
    conn = sqlite3.connect(f'file:{db_path}?mode=ro', uri=True)
    cursor = conn.cursor()
    
    samples = {}
    
    # Get all tables
    cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%'")
    tables = [row[0] for row in cursor.fetchall()]
    
    for table in tables:
        samples[table] = {
            'row_count': 0,
            'sample_rows': [],
            'column_names': []
        }
        
        # Get row count
        cursor.execute(f'SELECT COUNT(*) FROM "{table}"')
        samples[table]['row_count'] = cursor.fetchone()[0]
        
        # Get column names
        cursor.execute(f'PRAGMA table_info("{table}")')
        samples[table]['column_names'] = [col[1] for col in cursor.fetchall()]
        
        # Get sample rows
        if samples[table]['row_count'] > 0:
            # For small tables, get all rows
            if samples[table]['row_count'] <= sample_size * 2:
                cursor.execute(f'SELECT * FROM "{table}"')
            else:
                # Try to get diverse sample, but handle tables without rowid
                try:
                    # Try using rowid for diverse sampling
                    cursor.execute(f"""
                        SELECT * FROM (
                            SELECT * FROM "{table}" LIMIT {sample_size//2}
                        )
                        UNION ALL
                        SELECT * FROM (
                            SELECT * FROM "{table}" 
                            ORDER BY rowid DESC 
                            LIMIT {sample_size//2}
                        )
                        LIMIT {sample_size}
                    """)
                except sqlite3.OperationalError as e:
                    if "no such column: rowid" in str(e):
                        # Fallback: just get first N rows if rowid doesn't exist
                        cursor.execute(f'SELECT * FROM "{table}" LIMIT {sample_size}')
                    else:
                        raise
            
            samples[table]['sample_rows'] = [
                dict(zip(samples[table]['column_names'], row))
                for row in cursor.fetchall()
            ]
    
    conn.close()
    return samples

def main():
    if len(sys.argv) < 2:
        print("Usage: python sample_data.py <database.sqlite> [sample_size]")
        sys.exit(1)
    
    db_path = sys.argv[1]
    sample_size = int(sys.argv[2]) if len(sys.argv) > 2 else 5
    
    if not Path(db_path).exists():
        print(f"Error: Database not found: {db_path}")
        sys.exit(1)
    
    samples = get_sample_data(db_path, sample_size)
    print(json.dumps(samples, indent=2, default=str))

if __name__ == "__main__":
    main()