import numpy as np
import pandas as pd
import torch
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
from scipy import sparse

class TabularDataset(Dataset):
    """Dataset for tabular data"""
    def __init__(self, features):
        self.features = features
        self.dim = features.shape[1]
        self.num_samples = features.shape[0]
        
    def __len__(self):
        return len(self.features)
    
    def __getitem__(self, idx):
        return self.features[idx]

def load_tabular_data(file_path, test_size=0.1, val_size=0.1, random_state=42):
    """Load and preprocess tabular data"""
    # Load data
    if file_path.endswith('.csv'):
        data = pd.read_csv(file_path)
    elif file_path.endswith('.xlsx') or file_path.endswith('.xls'):
        data = pd.read_excel(file_path)
    elif file_path.endswith('.npz'):
        data = sparse.load_npz(file_path).toarray()
    else:
        try:
            data = pd.read_csv(file_path)  # Try CSV as default
        except:
            raise ValueError(f"Unsupported file format: {file_path.split('.')[-1]}")
    
    # Convert to numpy arrays if not already
    if isinstance(data, pd.DataFrame):
        features = data.values.astype(np.float32)
    elif isinstance(data, np.ndarray):
        features = data.astype(np.float32)
    
    # Standardize features
    scaler = StandardScaler()
    features_scaled = scaler.fit_transform(features)
    
    # Split into train, validation, test sets
    train_features, test_features = train_test_split(
        features_scaled, test_size=test_size, random_state=random_state
    )
    
    train_features, val_features = train_test_split(
        train_features, test_size=val_size/(1-test_size), random_state=random_state
    )
    
    # Create datasets
    train_dataset = TabularDataset(torch.FloatTensor(train_features))
    val_dataset = TabularDataset(torch.FloatTensor(val_features))
    test_dataset = TabularDataset(torch.FloatTensor(test_features))
    
    return train_dataset, val_dataset, test_dataset, scaler

def create_data_loaders(train_dataset, val_dataset, test_dataset, batch_size=64):
    """Create data loaders for training and evaluation"""
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    
    return train_loader, val_loader, test_loader
