import pandas as pd
import os
import librosa
import soundfile as sf
from pathlib import Path

def create_windows(audio, sr, window_duration=0.5):
    """Split audio into windows of specified duration"""
    window_length = int(window_duration * sr)
    windows = []
    
    # Calculate number of complete windows
    num_windows = len(audio) // window_length
    
    for i in range(num_windows):
        start = i * window_length
        end = start + window_length
        windows.append(audio[start:end])
    
    return windows

def process_wav_files(csv_path, output_dir, window_duration=0.5):
    """
    Process WAV files based on CSV entries and create window files
    
    Args:
        csv_path (str): Path to the CSV file containing window information
        output_dir (str): Directory to save the window WAV files
    """
    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)
    
    # Read the CSV file
    df = pd.read_csv(csv_path)
    
    # Group by original WAV file path
    for original_path, group in df.groupby('original_path'):
        if not os.path.exists(original_path):
            print(f"Warning: Original WAV file not found: {original_path}")
            continue
        
        # Load the original WAV file
        audio, sr = librosa.load(original_path, sr=None)
        
        # Create windows
        windows = create_windows(audio, sr, window_duration)
        
        # Save only the windows that are in the CSV
        for _, row in group.iterrows():
            window_name = row['name']
            window_num = int(window_name.split('_window')[1].split('.')[0])  # Get window number
            
            if window_num < len(windows):
                output_path = os.path.join(output_dir, window_name)
                sf.write(output_path, windows[window_num], sr)
                print(f"Created: {output_path}")
            else:
                print(f"Warning: Window {window_num + 1} not found in {original_path}")

if __name__ == "__main__":

    # detection with with whistle classification task
    csv_path = "detection/dolphin_reef_watkins/all.csv"
    output_dir = "detection/dolphin_reef_watkins"
    process_wav_files(csv_path, output_dir, window_duration=0.5)

    # binary detection task
    csv_path = "detection/binary/all.csv"
    output_dir = "detection/binary"
    process_wav_files(csv_path, output_dir, window_duration=0.3)