#!/usr/bin/env python3
"""
Convert .npy files generated by generate_data.py to RDS format (.rds files)
using Rscript with reticulate instead of rpy2.
"""

import os
import subprocess

# Configuration
NPY_DIR = "covariance_data"
RDS_DIR = "covariance_data_rds"
SAMPLE_SIZES = [500, 1250, 2500]
NUM_REP = 200

def convert_npy_to_rds(n, seed):
    """
    Convert one dataset from .npy to .rds format.
    """
    n_dir_npy = os.path.join(NPY_DIR, f"n{n}")
    n_dir_rds = os.path.join(RDS_DIR, f"n{n}")
    os.makedirs(n_dir_rds, exist_ok=True)
    
    prefix_npy = os.path.join(n_dir_npy, f"seed{seed:03d}")
    prefix_rds = os.path.join(n_dir_rds, f"seed{seed:03d}")
    
    x_path = prefix_npy + "_X.npy"
    m_path = prefix_npy + "_M.npy"
    c_path = prefix_npy + "_C.npy"
    rds_path = prefix_rds + ".rds"
    
    # Skip if RDS already exists
    if os.path.exists(rds_path):
        print(f"[n={n}] seed={seed:03d} RDS already exists, skipping")
        return (n, seed, "exists")
    
    # Check if .npy files exist
    if not (os.path.exists(x_path) and os.path.exists(m_path) and os.path.exists(c_path)):
        print(f"[n={n}] seed={seed:03d} .npy files not found, skipping")
        return (n, seed, "not_found")
    
    try:
        # Use Rscript to convert
        script_dir = os.path.dirname(os.path.abspath(__file__))
        r_script = os.path.join(script_dir, "convert_npy_to_rds.R")
        
        result = subprocess.run(
            ['Rscript', r_script, NPY_DIR, RDS_DIR, str(n), str(seed)],
            capture_output=True,
            text=True,
            timeout=30
        )
        
        if result.returncode == 0:
            print(f"[n={n}] seed={seed:03d} converted successfully")
            return (n, seed, "ok")
        else:
            error_msg = result.stderr.strip() if result.stderr else result.stdout.strip()
            print(f"[n={n}] seed={seed:03d} ERROR: {error_msg}")
            return (n, seed, f"error: {error_msg}")
        
    except subprocess.TimeoutExpired:
        print(f"[n={n}] seed={seed:03d} ERROR: Timeout")
        return (n, seed, "error: timeout")
    except Exception as e:
        print(f"[n={n}] seed={seed:03d} ERROR: {e}")
        return (n, seed, f"error: {e}")


def main():
    print(f"Converting .npy files to RDS format")
    print(f"Source directory: {NPY_DIR}")
    print(f"Output directory: {RDS_DIR}")
    print(f"Sample sizes: {SAMPLE_SIZES}")
    print(f"Replications per n: {NUM_REP}")
    
    # Create output directory
    os.makedirs(RDS_DIR, exist_ok=True)
    
    # Process all files
    errors = []
    total = 0
    converted = 0
    
    for n in SAMPLE_SIZES:
        for seed in range(1, NUM_REP + 1):
            total += 1
            _, _, status = convert_npy_to_rds(n, seed)
            if status == "ok":
                converted += 1
            elif "error" in status:
                errors.append((n, seed, status))
    
    print("\n=== Summary ===")
    print(f"Total files: {total}")
    print(f"Successfully converted: {converted}")
    print(f"Already existed: {total - converted - len(errors)}")
    print(f"Errors: {len(errors)}")
    
    if errors:
        print("\nErrors:")
        for n, seed, msg in errors[:10]:
            print(f"  n={n}, seed={seed:03d}: {msg}")
        if len(errors) > 10:
            print(f"  ... and {len(errors) - 10} more")
    
    print(f"\nRDS files saved in: {RDS_DIR}")
    print("Done.")


if __name__ == "__main__":
    main()

