import pandas as pd
from sklearn.model_selection import train_test_split

def split_data(meta_data_path, train_size=0.7, val_size=0.15, test_size=0.15):
    # Ensure the sizes sum to 1
    assert train_size + val_size + test_size == 1.0, "Sizes must sum to 1"
    
    # Load the CSV
    df = pd.read_csv(meta_data_path)
    
    # First split into train and temporary (validation + test)
    train, temp = train_test_split(df, train_size=train_size, random_state=42)
    
    # Split the temp dataset into validation and test
    val, test = train_test_split(temp, test_size=test_size/(val_size + test_size), random_state=42)
    
    # Save to separate CSV files
    train.to_csv('train_bw.csv', index=False)
    val.to_csv('val_bw.csv', index=False)
    test.to_csv('test_bw.csv', index=False)

    print(f"Data split: {len(train)} train, {len(val)} val, {len(test)} test")
    return train, val, test

# Example usage
split_data("/datasets/fairface/metadata_processed_bw.csv", train_size=0.7, val_size=0.15, test_size=0.15)
