#!/usr/bin/env python3
import os
import numpy as np
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq

# --------------------
# Hardcoded paths & knobs
# --------------------
INPUT_TXT  = "/PATH/exchange_rate.txt"
OUT_PARQUET = "/PATH/exchange_rate_timeseries.parquet"

# --------------------
# Load text file (no header)
# --------------------
df = pd.read_csv(INPUT_TXT, header=None)
# ensure numeric and drop NaNs column-wise later
df = df.apply(pd.to_numeric, errors="coerce")

# --------------------
# Build long-format (series_id, time_idx, value)
# --------------------
rows = []
for col in df.columns:
    series = df[col].dropna().astype(np.float32).to_numpy()
    time_idx = np.arange(len(series), dtype=np.int64)
    sid = f"col_{col}"  # name by column index
    rows.append(pd.DataFrame({"series_id": sid, "time_idx": time_idx, "value": series}))

if rows:
    out_df = pd.concat(rows, ignore_index=True)
else:
    out_df = pd.DataFrame(
        {"series_id": pd.Series(dtype="string"),
         "time_idx": pd.Series(dtype="int64"),
         "value": pd.Series(dtype="float32")}
    )

# Enforce dtypes expected by your loader
out_df = out_df.astype({"series_id": "string", "time_idx": "int64", "value": "float32"})

# --------------------
# Write Parquet
# --------------------
os.makedirs(os.path.dirname(OUT_PARQUET), exist_ok=True)
table = pa.Table.from_pandas(out_df, preserve_index=False)
pq.write_table(table, OUT_PARQUET)

print(f"[OK] Wrote {len(out_df):,} rows across {out_df['series_id'].nunique()} series → {OUT_PARQUET}")
