#!/usr/bin/env python3
import argparse
import json
import os
import shutil
import sys
import tempfile
from datetime import datetime


def remove_keys_from_jsonl(input_path: str, keys_to_remove: list[str]) -> None:
    if not os.path.isfile(input_path):
        print(f"Error: File not found: {input_path}", file=sys.stderr)
        sys.exit(1)

    # Prepare paths
    dir_name = os.path.dirname(input_path)
    base_name = os.path.basename(input_path)

    timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
    backup_path = os.path.join(dir_name, f"{base_name}.bak.{timestamp}")

    # Create a secure temp file in the same directory for atomic replace
    fd, temp_path = tempfile.mkstemp(prefix=f".{base_name}.", dir=dir_name, text=True)
    os.close(fd)

    count_in = 0
    count_out = 0
    count_modified = 0
    errors = 0

    try:
        with open(input_path, "r", encoding="utf-8") as fin, open(temp_path, "w", encoding="utf-8") as fout:
            for line in fin:
                stripped = line.rstrip("\n")
                if not stripped:
                    # Preserve blank lines
                    fout.write("\n")
                    continue
                count_in += 1
                try:
                    obj = json.loads(stripped)
                except json.JSONDecodeError:
                    # If a line is not valid JSON, preserve it as-is
                    errors += 1
                    fout.write(line)
                    continue

                before_keys = set(obj.keys())
                for key in keys_to_remove:
                    if key in obj:
                        del obj[key]
                after_keys = set(obj.keys())

                if before_keys != after_keys:
                    count_modified += 1

                # Write compact JSON on a single line
                fout.write(json.dumps(obj, ensure_ascii=False) + "\n")
                count_out += 1

        # Backup and replace
        shutil.copy2(input_path, backup_path)
        os.replace(temp_path, input_path)

        print(
            json.dumps(
                {
                    "input": input_path,
                    "backup": backup_path,
                    "lines_read": count_in,
                    "lines_written": count_out,
                    "lines_modified": count_modified,
                    "invalid_json_lines": errors,
                },
                ensure_ascii=False,
            )
        )
    except Exception as e:
        # Cleanup temp file on error
        try:
            if os.path.exists(temp_path):
                os.remove(temp_path)
        except Exception:
            pass
        print(f"Error: {e}", file=sys.stderr)
        sys.exit(1)


def main() -> None:
    parser = argparse.ArgumentParser(description="Remove specified keys from each JSON object in a JSONL file.")
    parser.add_argument("jsonl_path", help="Path to the .jsonl file")
    parser.add_argument(
        "--keys",
        nargs="+",
        required=True,
        help="Keys to remove from each JSON object",
    )

    args = parser.parse_args()

    remove_keys_from_jsonl(args.jsonl_path, args.keys)


if __name__ == "__main__":
    main()
