"""Convert puzzle ``generations_*.jsonl`` files to ``samples_*.jsonl`` (lm_eval format).

Mirrors the role of ``dsr_to_jsonl.py`` for the case-where-we-have-puzzle-eval-rollouts-
in-custom-format scenario. Lets the existing exploration pipeline ingest puzzle
eval rollouts that were produced by ``tools/eval_lora_checkpoints.py`` (one trace
per JSON line, keys ``idx``/``response``/``ground_truth``/...) instead of by lm_eval
directly.

Usage:
    python -m analysis.exploration.generations_to_samples \\
        --src /tmp/v90_extra/gspo_bridges_10x10dh.jsonl \\
        --task bridges_10x10dh \\
        --out_dir results/v90_puzzles_extra/gspo_v2_s20/

Each output line has ``doc_id``, ``target``, ``resps=[[response]]``, ``doc``.
"""
import argparse
import json
from datetime import datetime
from pathlib import Path


def convert(src: Path, task: str, out_dir: Path) -> Path:
    out_dir.mkdir(parents=True, exist_ok=True)
    ts = datetime.now().strftime("%Y-%m-%dT%H-%M-%S.%f")
    # Use a directory name matching the task pattern picked up by _extract_task_name.
    task_subdir = out_dir / f"anon-neurips26__{task}_test200"
    task_subdir.mkdir(parents=True, exist_ok=True)
    out_path = task_subdir / f"samples_{task}_test200_{ts}.jsonl"

    n = 0
    with open(src) as fin, open(out_path, "w") as fout:
        for line in fin:
            d = json.loads(line)
            doc_id = int(d.get("idx", n))
            response = d.get("response", "")
            target = d.get("ground_truth", "")
            problem = d.get("problem", "")
            rec = {
                "doc_id": doc_id,
                "target": target,
                "resps": [[response]],
                "filtered_resps": [response],
                "doc": {
                    "problem": problem,
                    "solution": target,
                    "answer": target,
                    "ground_truth": target,
                    "exact_match": d.get("exact_match"),
                    "partial_score": d.get("partial_score"),
                },
            }
            fout.write(json.dumps(rec) + "\n")
            n += 1
    return out_path


def main():
    p = argparse.ArgumentParser()
    p.add_argument("--src", required=True, help="Input generations_*.jsonl file")
    p.add_argument("--task", required=True,
                   help="Short task name, e.g. bridges_10x10dh, undead_5x5dm, pattern_7x7dm")
    p.add_argument("--out_dir", required=True,
                   help="Output directory (e.g. results/v90_puzzles_extra/gspo_v2_s20/)")
    args = p.parse_args()
    out = convert(Path(args.src), args.task, Path(args.out_dir))
    n_lines = sum(1 for _ in open(out))
    print(f"{args.task}: {n_lines} traces → {out}")


if __name__ == "__main__":
    main()
