#!/usr/bin/env python3
"""
Convert .npy files generated by generate_data.py to RDS format (.rds files)
that R can read natively without Python dependencies.
"""

import os
import numpy as np
import rpy2.robjects as ro
from rpy2.robjects import numpy2ri
from rpy2.robjects.packages import importr

# Activate numpy to R conversion
numpy2ri.activate()

# Import base R
base = importr('base')

# Configuration
NPY_DIR = "covariance_data"
RDS_DIR = "covariance_data_rds"
SAMPLE_SIZES = [250, 500, 1250, 2500]
NUM_REP = 200

def convert_npy_to_rds(n, seed):
    """
    Convert one dataset from .npy to .rds format.
    """
    n_dir_npy = os.path.join(NPY_DIR, f"n{n}")
    n_dir_rds = os.path.join(RDS_DIR, f"n{n}")
    os.makedirs(n_dir_rds, exist_ok=True)
    
    prefix_npy = os.path.join(n_dir_npy, f"seed{seed:03d}")
    prefix_rds = os.path.join(n_dir_rds, f"seed{seed:03d}")
    
    x_path = prefix_npy + "_X.npy"
    m_path = prefix_npy + "_M.npy"
    c_path = prefix_npy + "_C.npy"
    rds_path = prefix_rds + ".rds"
    
    # Skip if RDS already exists
    if os.path.exists(rds_path):
        print(f"[n={n}] seed={seed:03d} RDS already exists, skipping")
        return (n, seed, "exists")
    
    # Check if .npy files exist
    if not (os.path.exists(x_path) and os.path.exists(m_path) and os.path.exists(c_path)):
        print(f"[n={n}] seed={seed:03d} .npy files not found, skipping")
        return (n, seed, "not_found")
    
    try:
        # Load .npy files
        X_np = np.load(x_path)  # n x 4
        M_np = np.load(m_path)  # n x q x q
        C_np = np.load(c_path)  # n x q x q
        
        # Convert to R objects
        X_r = numpy2ri.numpy2rpy(X_np)
        M_r = numpy2ri.numpy2rpy(M_np)
        C_r = numpy2ri.numpy2rpy(C_np)
        
        # Create a named list in R
        data_list = ro.ListVector({
            'X': X_r,
            'M': M_r,
            'C': C_r
        })
        
        # Save as RDS
        base.saveRDS(data_list, rds_path)
        
        print(f"[n={n}] seed={seed:03d} converted successfully")
        return (n, seed, "ok")
        
    except Exception as e:
        print(f"[n={n}] seed={seed:03d} ERROR: {e}")
        return (n, seed, f"error: {e}")


def main():
    print(f"Converting .npy files to RDS format")
    print(f"Source directory: {NPY_DIR}")
    print(f"Output directory: {RDS_DIR}")
    print(f"Sample sizes: {SAMPLE_SIZES}")
    print(f"Replications per n: {NUM_REP}")
    
    # Create output directory
    os.makedirs(RDS_DIR, exist_ok=True)
    
    # Process all files
    errors = []
    total = 0
    converted = 0
    
    for n in SAMPLE_SIZES:
        for seed in range(1, NUM_REP + 1):
            total += 1
            _, _, status = convert_npy_to_rds(n, seed)
            if status == "ok":
                converted += 1
            elif "error" in status:
                errors.append((n, seed, status))
    
    print("\n=== Summary ===")
    print(f"Total files: {total}")
    print(f"Successfully converted: {converted}")
    print(f"Already existed: {total - converted - len(errors)}")
    print(f"Errors: {len(errors)}")
    
    if errors:
        print("\nErrors:")
        for n, seed, msg in errors[:10]:
            print(f"  n={n}, seed={seed:03d}: {msg}")
        if len(errors) > 10:
            print(f"  ... and {len(errors) - 10} more")
    
    print(f"\nRDS files saved in: {RDS_DIR}")
    print("Done.")


if __name__ == "__main__":
    main()

