#!/usr/bin/env python3
import argparse
import json
import os
from prover.prover.lean.verifier import Lean4ServerScheduler

# ---------- Utilities ----------
def append_jsonl(path, obj):
    with open(path, "a", encoding="utf-8") as f:
        f.write(json.dumps(obj, ensure_ascii=False) + "\n")

def load_processed(path):
    processed = {}
    if not os.path.exists(path):
        return processed
    with open(path, "r", encoding="utf-8") as f:
        for line in f:
            try:
                obj = json.loads(line)
                processed[obj["name"]] = obj
            except Exception:
                continue
    return processed

# ---------- Main ----------
if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--input_path", required=True, type=str)
    parser.add_argument("--output_path", required=True, type=str)
    parser.add_argument("--cpu", default=32, type=int)
    args = parser.parse_args()

    # Load input codes
    with open(args.input_path, "r", encoding="utf-8") as f:
        codes = json.load(f)

    # Resume support
    intermediate_path = os.path.join(args.output_path, "intermediate_results.jsonl")

    processed = load_processed(intermediate_path)
    remaining = [c for c in codes if c["name"] not in processed]

    print(f"Loaded {len(codes)} codes, {len(processed)} already processed, {len(remaining)} remaining")

    # Scheduler
    lean4_scheduler = Lean4ServerScheduler(
        max_concurrent_requests=args.cpu,
        timeout=120,
        memory_limit=500,
        name="verifier"
    )
    from tqdm import tqdm

    # # Process remaining codes
    # for code in tqdm(remaining, desc="Verifying Lean codes", unit="code"):
    #     request_ids = lean4_scheduler.submit_all_request([code["code"]])
    #     results = lean4_scheduler.get_all_request_outputs(request_ids)
    #     result = results[0]

    #     # Store in intermediate file
    #     record = {
    #         "name": code["name"],
    #         "compilation_result": result["complete"],
    #         "full_result": result
    #     }
    #     append_jsonl(intermediate_path, record)
    BATCH = 256
    for i in tqdm(range(0, len(remaining), BATCH), desc="Verifying Lean codes", unit="batch"):
        batch_codes = remaining[i:i+BATCH]
        request_ids = lean4_scheduler.submit_all_request([c["code"] for c in batch_codes])
        results = lean4_scheduler.get_all_request_outputs(request_ids)
        for code, result in zip(batch_codes, results):
            record = {
                "name": code["name"],
                "compilation_result": result["complete"],
                "full_result": result
            }
            append_jsonl(intermediate_path, record)

    lean4_scheduler.close()

    # Merge all results (processed + new) into final JSON
    final_results = []
    with open(f'{args.output_path}/intermediate_results.jsonl', "r", encoding="utf-8") as f:
        for line in f:
            final_results.append(json.loads(line))

    with open(f'{args.output_path}/code_compilation.json', "w", encoding="utf-8") as f:
        json.dump(final_results, f, indent=4, ensure_ascii=False)

    print(f"Verification finished. Results saved to {args.output_path}")
