import pandas as pd
import samplerate 
import h5py
import numpy as np
import os

# +
audio_dataset = h5py.File('./EPIC_audio.hdf5', 'r')
Q = 16000
P = 24000

for split in ['train', 'test']:
    for p in ['P01', 'P02', 'P04', 'P22', 'P30']:
        file = pd.read_pickle(f'./{p}_{split}.pkl')
        video_id = np.unique(file['video_id'])

        dir = f'./EPIC_KITCHENS/audio/{split}/{p}'
        if not os.path.exists(dir):
            os.makedirs(dir)

        for id in video_id:
            samples = audio_dataset[id][()]
            data = samplerate.resample(samples, Q/P, 'sinc_best')

            np.save(f'./EPIC_KITCHENS/audio/{split}/{p}/{id}.npy', data)

# -

