#!/usr/bin/env python3
"""
Simple script to convert PhysioNet 2012 data to Raindrop format
"""

import os
from baselines.raindrop_preprocessing_converter import convert_to_raindrop_format

def main():
    """Convert PhysioNet 2012 data to Raindrop format."""
    
    print("="*50)
    print("CONVERTING PHYSIONET 2012 DATA TO RAINDROP FORMAT")
    print("="*50)
    
    # Data paths
    data_dir = "/home/dcm.aau.dk/km20bf/Biomarker_FeatureGroup_GNAN/tmp"
    output_dir = "baselines/Raindrop/P12data_converted"
    
    print(f"Input data directory: {data_dir}")
    print(f"Output directory: {output_dir}")
    
    # Check if data directory exists
    if not os.path.exists(data_dir):
        print(f" Error: Data directory '{data_dir}' not found!")
        return False
    
    # Check for PSV files (pipe-separated values)
    psv_files = []
    for root, dirs, files in os.walk(data_dir):
        for file in files:
            if file.endswith('.psv'):
                psv_files.append(os.path.join(root, file))
    
    if not psv_files:
        print(f" Error: No PSV files found in '{data_dir}'!")
        return False
    
    print(f" Found {len(psv_files)} PSV files")
    
    # Convert data to Raindrop format
    print(f"\n Converting data to Raindrop format...")
    try:
        convert_to_raindrop_format(data_dir, output_dir)
        print(f" Data conversion completed!")
    except Exception as e:
        print(f" Error during data conversion: {e}")
        return False
    
    # Check if conversion was successful
    required_files = [
        os.path.join(output_dir, 'processed_data', 'PTdict_list.npy'),
        os.path.join(output_dir, 'processed_data', 'arr_outcomes.npy'),
        os.path.join(output_dir, 'splits', 'phy12_split_subset1.npy'),
        os.path.join(output_dir, 'splits', 'phy12_split_subset2.npy'),
        os.path.join(output_dir, 'splits', 'phy12_split_subset3.npy'),
        os.path.join(output_dir, 'splits', 'phy12_split_subset4.npy'),
        os.path.join(output_dir, 'splits', 'phy12_split_subset5.npy')
    ]
    
    print(f"\n Checking created files:")
    for file_path in required_files:
        if os.path.exists(file_path):
            size = os.path.getsize(file_path) / (1024 * 1024)  # Size in MB
            print(f" {file_path} ({size:.1f} MB)")
        else:
            print(f" {file_path} - MISSING")
    
    print(f"\n Data conversion completed successfully!")
    print(f"You can now run the MTGNN baseline using:")
    print(f"sbatch baselines/scripts/run_MTGNN_P12.sh")
    
    return True

if __name__ == "__main__":
    success = main()
    if not success:
        print(f"\n Data conversion failed. Please check the errors above.") 