#!/bin/env python3

import os
import sys
import json

def prompt_dep(path):
    file_name = os.path.basename(path)
    with open(path, 'r') as f:
        content = f.read()
    return f"File `{file_name}`:\n```isabelle\n{content}```\n"

def traverse_thy_files(root_dir):
    queue = [root_dir]
    thy_files = []
    while queue:
        current_dir = queue.pop(0)
        try:
            entries = sorted(os.listdir(current_dir))
        except Exception:
            continue
        dirs = []
        files = []
        for entry in entries:
            full_path = os.path.join(current_dir, entry)
            if os.path.isdir(full_path):
                dirs.append(full_path)
            elif os.path.isfile(full_path) and entry.endswith('.thy'):
                files.append(full_path)
        # Add .thy files in this directory (pre-order: before subdirs)
        thy_files.extend(files)
        # Add subdirectories to queue for breadth-first traversal
        queue.extend(dirs)
    return thy_files

PRELUDES = [
    "Given the following Isabelle theories as context, prove the Isabelle proposition given at the end.\n",
    prompt_dep('./lib/Isabelle/NTP4Verif/NTP4Verif.thy')]
for f in traverse_thy_files('./generation/isabelle'):
    PRELUDES.append(prompt_dep(f))

PRELUDE = '\n'.join(PRELUDES)


LIBS = {}

def prompt_lib(path):
    base_dir = os.path.dirname(os.path.dirname(os.path.dirname(path)))
    lib_dir = os.path.join(base_dir, 'lib', 'isabelle')
    if os.path.exists(lib_dir):
        if lib_dir in LIBS:
            return LIBS[lib_dir]
        else:
            thy_files = traverse_thy_files(lib_dir)
            prompts = []
            for f in thy_files:
                prompts.append(prompt_dep(f))
            LIBS[lib_dir] = '\n'.join(prompts)
            return LIBS[lib_dir]
    else:
        lib_dir = os.path.dirname(path)
        if lib_dir in LIBS:
            return LIBS[lib_dir]
        else:
            thy_files = traverse_thy_files(lib_dir)
            prompts = []
            for f in thy_files:
                if not f.endswith('qtvc.thy'):
                    prompts.append(prompt_dep(f))
            LIBS[lib_dir] = '\n'.join(prompts)
            return LIBS[lib_dir]

def prompt_thy(path):
    with open(path, 'r') as f:
        content = f.read()
    content = content.replace('sorry', "(* fill in a proof here. *)")
    prompts = [PRELUDE, prompt_lib(path), "Prove the following Isabelle proposition:\n```isabelle\n", content, "```\nResponse the Isabelle proof only. Do not repeate any context nor the proposition."]
    return '\n'.join(prompts)


with open('test_set.isabelle.lst', 'r') as f:
    isabelle_files = [line.strip() for line in f.readlines()]

with open("batch_isabelle.jsonl", "w") as f:
    for i, file_path in enumerate(isabelle_files):
        print(f"[{i}/{len(isabelle_files)}] Processing {file_path}")
        prompt = prompt_thy(file_path)
        f.write(json.dumps({
            "key": file_path,
            "request": {
                "contents": [{"parts": [{"text": prompt}]}]},
            "generationConfig": {
                'responseMimeType': 'application/json',
                "responseSchema": {
                    "type":"OBJECT",
                    "properties":{"code":{"type":"STRING"}},
                    "required":["code"]
                },
                #'temperature': 0
            }
        }))
        f.write("\n")
