import numpy as np
import os
BASE_PATH = os.environ.get("BASE_PATH", "")
if BASE_PATH and BASE_PATH.endswith('/'):
    BASE_PATH = BASE_PATH[:-1]
# Define file paths
input_path = f"{BASE_PATH}/data/corelogic/loan_data_60000_unemployment_curr_balance_cir_spi.npz"
output_path = f"{BASE_PATH}/data/corelogic/loan_data_60000_unemployment_curr_balance_cir_spi_lp.npz"
test_path = f"{BASE_PATH}/data/corelogic/loan_data_60000_unemployment_curr_balance_cir_spi_test.npz"
# Load dataset
data = np.load(input_path, allow_pickle=True)

# Convert all float data to float16
optimized_data = {}
for key in data.files:
    array = data[key]

    # Convert to float16 if it's a floating-point type
    if np.issubdtype(array.dtype, np.floating):
        optimized_data[key] = array.astype(np.float16)
    else:
        optimized_data[key] = array  # Keep other data types unchanged

# Save in a compressed format
np.savez_compressed(output_path, **optimized_data)

# Return the new file path
print(f"Saved to {output_path}")
import os

# Get original file size in GB
original_size_gb = os.path.getsize(input_path) / 1e9

# Get new file size in GB
new_size_gb = os.path.getsize(output_path) / 1e9

# Return results
print(f"Original size: {original_size_gb} GB")
print(f"New size: {new_size_gb} GB")

import numpy as np

# Load new dataset
new_data = np.load(output_path, allow_pickle=True)

# Print summary of shapes and data types
for key in new_data.files:
    print(f"{key}: shape={new_data[key].shape}, dtype={new_data[key].dtype}, size={new_data[key].nbytes / 1e6:.2f} MB")

# Save again, but **without** compression
np.savez(test_path, **new_data)

import os
# Check the new file size
test_size_gb = os.path.getsize(test_path) / 1e9
print(f"Test uncompressed size: {test_size_gb} GB")
