#!/usr/bin/env python3
"""
Convert .fvecs file to .csv format.

FVECS format: Each vector is stored as:
- 4 bytes (int32): dimension of the vector
- dimension * 4 bytes (float32): the vector components

Usage: python fvecs_to_csv.py input.fvecs
Output: Creates input.csv in the same directory
"""

import sys
import struct
import csv
import os
from pathlib import Path


def read_fvecs(filename):
    """
    Read vectors from a .fvecs file.
    
    Args:
        filename (str): Path to the .fvecs file
        
    Yields:
        list: Each vector as a list of floats
    """
    with open(filename, 'rb') as f:
        while True:
            # Read dimension (4 bytes, little-endian int32)
            dim_bytes = f.read(4)
            if not dim_bytes:
                break  # End of file
                
            if len(dim_bytes) < 4:
                raise ValueError("Incomplete dimension data")
                
            dim = struct.unpack('<i', dim_bytes)[0]
            
            # Read vector components (dim * 4 bytes, little-endian float32)
            vector_bytes = f.read(dim * 4)
            if len(vector_bytes) < dim * 4:
                raise ValueError("Incomplete vector data")
                
            vector = struct.unpack(f'<{dim}f', vector_bytes)
            yield list(vector)


def fvecs_to_csv(fvecs_path):
    """
    Convert a .fvecs file to .csv format.
    
    Args:
        fvecs_path (str): Path to the input .fvecs file
        
    Returns:
        str: Path to the output .csv file
    """
    # Create output path by replacing extension
    fvecs_path = Path(fvecs_path)
    csv_path = fvecs_path.with_suffix('.csv')
    
    print(f"Converting {fvecs_path} to {csv_path}")
    
    vector_count = 0
    dimension = None
    
    try:
        with open(csv_path, 'w', newline='') as csvfile:
            writer = None
            
            for vector in read_fvecs(fvecs_path):
                if writer is None:
                    # Initialize CSV writer with header based on vector dimension
                    dimension = len(vector)
                    fieldnames = [f'dim_{i}' for i in range(dimension)]
                    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
                    writer.writeheader()
                    print(f"Vector dimension: {dimension}")
                
                # Verify all vectors have the same dimension
                if len(vector) != dimension:
                    raise ValueError(f"Inconsistent vector dimension at vector {vector_count}: "
                                   f"expected {dimension}, got {len(vector)}")
                
                # Write vector as CSV row
                row = {f'dim_{i}': vector[i] for i in range(dimension)}
                writer.writerow(row)
                vector_count += 1
                
                # Progress indicator for large files
                if vector_count % 10000 == 0:
                    print(f"Processed {vector_count} vectors...")
    
    except Exception as e:
        # Clean up partial file on error
        if csv_path.exists():
            csv_path.unlink()
        raise e
    
    print(f"Successfully converted {vector_count} vectors to {csv_path}")
    return str(csv_path)


def main():
    """Main function to handle command line arguments."""
    if len(sys.argv) != 2:
        print("Usage: python fvecs_to_csv.py <input.fvecs>")
        print("Example: python fvecs_to_csv.py vectors.fvecs")
        sys.exit(1)
    
    input_file = sys.argv[1]
    
    # Check if input file exists
    if not os.path.exists(input_file):
        print(f"Error: Input file '{input_file}' does not exist")
        sys.exit(1)
    
    # Check if input file has .fvecs extension
    if not input_file.lower().endswith('.fvecs'):
        print(f"Warning: Input file '{input_file}' does not have .fvecs extension")
    
    try:
        output_file = fvecs_to_csv(input_file)
        print(f"\nConversion completed successfully!")
        print(f"Output file: {output_file}")
        
        # Show file sizes for comparison
        input_size = os.path.getsize(input_file)
        output_size = os.path.getsize(output_file)
        print(f"Input size: {input_size:,} bytes")
        print(f"Output size: {output_size:,} bytes")
        
    except Exception as e:
        print(f"Error during conversion: {e}")
        sys.exit(1)


if __name__ == "__main__":
    main()
    