import pandas as pd
import os 
CORELOGIC_DATA_PATH = os.environ.get("CORELOGIC_DATA_PATH", None)
if CORELOGIC_DATA_PATH is None:
    raise ValueError("CORELOGIC_DATA_PATH is not set")

# File paths for input and output
nr_zips = 4
filtered_input_file = os.path.join(CORELOGIC_DATA_PATH, "filtered_origination_data_top_{}_zips.csv".format(nr_zips))
output_file = os.path.join(CORELOGIC_DATA_PATH, "top_{}_zips_loan_ids.csv".format(nr_zips))

# Read the filtered dataset
chunk_size = 10**6  # Process in chunks to handle large files

with pd.read_csv(filtered_input_file, usecols=["loan_id"], chunksize=chunk_size) as reader:
    for chunk in reader:
        # Save only the loan_id column to the new file
        chunk.to_csv(output_file, mode='a', index=False, header=not bool(chunk.empty))
        
print(f"New file '{output_file}' containing only 'loan_id' has been created.")