#!/usr/bin/env python3
"""
Recursively find files whose name contains 'violations_flash_gpt5_attempt2'
and increment the top-level JSON field `step_index` by 1.

- Supports .json (dict or list) and .jsonl (one JSON object per line)
- Modifies files IN PLACE
- Creates a .bak backup next to each modified file
- No command-line arguments: run from the directory you want as the root
"""

import json
import os
from pathlib import Path
from typing import Any, Dict, List


NAME_SUBSTR = "violations_flash_gpt5"
EXTS = {".json", ".jsonl"}


def bump_step_index_in_record(rec: Dict[str, Any]) -> bool:
    """Return True if rec['step_index'] was incremented."""
    if "step_index" not in rec:
        return False

    v = rec["step_index"]

    # bool is a subclass of int; avoid touching it
    if isinstance(v, bool):
        return False

    if isinstance(v, int):
        rec["step_index"] = v + 1
        return True

    if isinstance(v, str) and v.strip().isdigit():
        rec["step_index"] = int(v.strip()) + 1
        return True

    return False


def process_json(path: Path) -> int:
    """Process a .json file; return number of records updated."""
    data = json.loads(path.read_text(encoding="utf-8"))
    changes = 0

    if isinstance(data, list):
        for item in data:
            if isinstance(item, dict) and bump_step_index_in_record(item):
                changes += 1
    elif isinstance(data, dict):
        if bump_step_index_in_record(data):
            changes += 1

    if changes > 0:
        backup = path.with_suffix(path.suffix + ".bak")
        backup.write_bytes(path.read_bytes())
        path.write_text(json.dumps(data, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")

    return changes


def process_jsonl(path: Path) -> int:
    """Process a .jsonl file; return number of lines updated."""
    changes = 0
    out_lines: List[str] = []

    with path.open("r", encoding="utf-8") as f:
        for line in f:
            raw = line.rstrip("\n")
            if not raw.strip():
                out_lines.append(raw)
                continue

            obj = json.loads(raw)
            if isinstance(obj, dict) and bump_step_index_in_record(obj):
                changes += 1

            out_lines.append(json.dumps(obj, ensure_ascii=False))

    if changes > 0:
        backup = path.with_suffix(path.suffix + ".bak")
        backup.write_bytes(path.read_bytes())
        path.write_text("\n".join(out_lines) + "\n", encoding="utf-8")

    return changes


def main():
    root = Path(".").resolve()
    matched_files: List[Path] = []

    for dirpath, _, filenames in os.walk(root):
        for fn in filenames:
            if NAME_SUBSTR not in fn:
                continue
            p = Path(dirpath) / fn
            if p.suffix.lower() in EXTS:
                matched_files.append(p)

    if not matched_files:
        print(f"No files found with name containing '{NAME_SUBSTR}' under {root}")
        return

    total_files = 0
    total_changes = 0

    for path in sorted(matched_files):
        try:
            if path.suffix.lower() == ".jsonl":
                changes = process_jsonl(path)
            else:
                changes = process_json(path)

            if changes > 0:
                total_files += 1
                total_changes += changes
                print(f"[{changes:>4}] updated: {path}")

        except Exception as e:
            print(f"[ERR ] {path} -> {e}")

    print("----")
    print(f"Files modified:              {total_files}")
    print(f"Total step_index increments: {total_changes}")
    print("Backups written as:          *.bak")


if __name__ == "__main__":
    main()
