import pyarrow as pa
import pyarrow.parquet as pq
import pandas as pd
import numpy as np

# --- Load ETTm1 ---
csv_path = "/PATH/ETTm1.csv"
df = pd.read_csv(csv_path)

# Drop timestamp column (named 'date' in ETTm1)
if "date" in df.columns:
    df = df.drop(columns=["date"])

df = df.apply(pd.to_numeric, errors="coerce")

rows = []
for col in df.columns:
    series = df[col].dropna().astype(np.float32).to_numpy()
    if len(series) < 8:
        continue
    time_idx = np.arange(len(series), dtype=np.int64)
    sid = str(col)  
    part = pd.DataFrame(
        {"series_id": sid, "time_idx": time_idx, "value": series}
    )
    rows.append(part)

out_df = pd.concat(rows, ignore_index=True) if rows else pd.DataFrame(
    {"series_id": pd.Series(dtype="string"),
     "time_idx": pd.Series(dtype="int64"),
     "value": pd.Series(dtype="float32")}
)

# Write Parquet
out_path = "/PATH/ett_timeseries.parquet"
table = pa.Table.from_pandas(out_df, preserve_index=False)
pq.write_table(table, out_path)

print(f"Saved ETTm1 in (series_id, time_idx, value) format to {out_path}")
print(f"#series: {out_df['series_id'].nunique()} | rows: {len(out_df):,}")
