import argparse
from features import build_dataset

def main():
    parser = argparse.ArgumentParser(description="Build clustered probability features for LLM diagnostics")

    parser.add_argument("root_dir", nargs="?", default="data/full-trace-dataset")

    parser.add_argument("--cluster", choices=["none", "kmeans", "gmm"], default="gmm")

    parser.add_argument("--k-vertical", type=int, default=3)
    parser.add_argument("--k-horizontal", type=int, default=2)

    parser.add_argument("--metric", choices=["inc", "self"], default="self")

    parser.add_argument("--no-log-transform", dest="log_transform", action="store_true")
    parser.add_argument("--no-rebuild-cache", dest="rebuild_cache", action="store_true")
    parser.set_defaults(log_transform=True, rebuild_cache=False)

    parser.add_argument("--labels", dest="write_labels", action="store_true")

    parser.add_argument("--output-style", choices=["per_step", "per_task"])

    parser.add_argument("--center-alpha-vertical", type=float, default=0.85)
    parser.add_argument("--center-alpha-horizontal", type=float, default=0.65)

    args = parser.parse_args()

    print(f"[INFO] root_dir={args.root_dir}")
    print(f"[INFO] cluster={args.cluster}, k_vertical={args.k_vertical}, k_horizontal={args.k_horizontal}")
    print(f"[INFO] metric={args.metric}, log_transform={args.log_transform}, rebuild_cache={args.rebuild_cache}")
    print(f"[INFO] center_alpha_vertical={args.center_alpha_vertical}, center_alpha_horizontal={args.center_alpha_horizontal}")
    print(f"[INFO] labels={args.write_labels}, output_style={args.output_style}")

    outs = build_dataset(
        args.root_dir,
        cluster_method=args.cluster,
        k_vertical=args.k_vertical,
        k_horizontal=args.k_horizontal,
        metric=args.metric,
        log_transform=args.log_transform,
        rebuild_cache=args.rebuild_cache,
        write_labels=args.write_labels,
        output_style=args.output_style,
        include_labels_in_meta=True,
        center_mass_alpha_v=args.center_alpha_vertical,
        center_mass_alpha_h=args.center_alpha_horizontal,
    )

    print("[INFO] Generated task meta files:")
    for p in outs:
        print(" -", p)

if __name__ == "__main__":
    main()
