import os
import io
import numpy as np
import pyarrow.parquet as pq
import torch



def get_data(path):
    # Check if the data file exists
    if not os.path.exists(path):
        raise FileNotFoundError(f"Data file not found: {path}.")
    
    # Load data from parquet file:
    # 1. Read the 'u' column from the parquet file
    # 2. Convert each entry to a numpy array using BytesIO
    # 3. Stack all arrays together
    # 4. Convert to float32 for better memory efficiency
    data = np.stack([np.load(io.BytesIO(x.as_buffer())) for x in pq.read_table(path)['u']]).astype(np.float32)
    
    # Convert numpy array to PyTorch tensor
    data = torch.tensor(data)
    return data 


path = ''
data = get_data(path)
index = list(range(0, data.shape[1], 4))
print(data.shape)
data = data[:,index,:]
print(data.shape)
scale = torch.max(torch.abs(data[:, 1:]-data[:, :-1])).item()
# Prevent division by zero
scale = max(scale, 1e-8)
print(scale)

