from pathlib import Path
import pandas as pd
import numpy as np

from script.utils.splits import make_nested_split_plan  

def dump_outer_csv_splits(csv_path: Path, work_dir: Path, keep_header: bool = True):
    df = pd.read_csv(csv_path)
    n = len(df)
    plan = make_nested_split_plan(n)

    splits_root = work_dir / "splits"
    splits_root.mkdir(parents=True, exist_ok=True)

    out_dirs = []
    for k, fold in enumerate(plan):
        tr_idx, te_idx = fold["outer"]

        df_tr = df.iloc[tr_idx].reset_index(drop=True)
        df_te = df.iloc[te_idx].reset_index(drop=True)

        fold_dir = splits_root / f"outer_{k}"
        fold_dir.mkdir(parents=True, exist_ok=True)
        out_dirs.append(fold_dir)

        df_tr.to_csv(fold_dir / "train.csv", index=False, header=keep_header)
        df_te.to_csv(fold_dir / "test.csv",  index=False, header=keep_header)

        np.save(fold_dir / "train_indices.npy", tr_idx)
        np.save(fold_dir / "test_indices.npy",  te_idx)

    return out_dirs


from pathlib import Path

root = Path("./data")
for dataset_dir in root.iterdir():
    if not dataset_dir.is_dir():
        continue

    csv_files = list(dataset_dir.glob("*_continuous.csv"))
    if not csv_files:
        continue  

    csv_path = csv_files[0]  
    print(f"Processing {csv_path} ...")

    dump_outer_csv_splits(csv_path, dataset_dir)
