#!/usr/bin/env python3
import os
import anthropic

client = anthropic.Anthropic()

def norm(text):
    lines = text.strip().split('\n')
    if len(lines) >= 2 and lines[0].startswith('```') and lines[-1].startswith('```'):
        return '\n'.join(lines[1:-1]) + '\n'
    return text + '\n'

def clean(path):
    with open(path, 'r', encoding='utf-8') as file:
        text = file.read()
    
    # 分离系统提示和具体指令
    system_prompt = """You are an expert in formal verification, especially in the verifier Why3."""
    
    task_description = """You need to remove all the lemma declarations and assertion annotations. Then you should return the code without the lemma declarations and assertion annotations from it."""
    
    syntax_patterns = """
    Syntax Patterns to Remove:
    
    1. Lemma declarations:
       - Simple lemmas: "lemma <name>: <formula>"
       - Let lemmas: "let lemma <name> (<params>) : <type>"
       - Recursive lemmas: "let rec lemma <name> (<params>) : <type>"
       Examples:
         lemma vertices_cardinal_pos: cardinal vertices > 0
         let lemma simple_path (v: vertex) (l: list vertex)
         let rec lemma simple_path (v: vertex) (l: list vertex)
    
    2. Assertion annotations:
       - Basic assertions: "assert { <formula> }"
       - Assertions with reasons: "assert { <formula> by <reason> }"
       - Multi-line assertions with complex formulas
       Examples:
         assert { s = 22 }
         assert { exchange (old a) a i j }
         assert { !j < !i \/ !j = !i = m \/ !j = !i = n }
         assert { forall i. 0 <= i < i0 -> ... }
    """
    
    requirements = """
    Requirements:
    1. The aim is to test Why3's automation capability without any lemma hints or assertion annotations.
    2. You must ensure that the code is still syntactic correct and any program inside it is semantically unchanged after removing the lemma declarations and assertion annotations.
    3. Some non-lemma block may have ensures/requires clauses. You must preserve these ensures/requires clauses.
    4. Do NOT remove the ensures/requires clauses of any clauses that are not lemmas.
    5. You must return the obtained code only, without any other text.
    6. You definitely cannot output any text other than the code.
    """
    
    # 组合完整的提示词
    full_prompt = f"{system_prompt}\n\n{task_description}\n{requirements}\n\n{syntax_patterns}\n\nWhy3 code to process:\n\n{text}"
    
    try:
        response = client.messages.create(
            model="claude-sonnet-4-20250514",
            max_tokens=40000,
            temperature=0,
            stream=True,
            messages=[
                {
                    "role": "user",
                    "content": full_prompt
                }
            ]
        )
        
        # Handle streaming response
        reply = ""
        for chunk in response:
            if chunk.type == "content_block_delta":
                reply += chunk.delta.text
        
        return norm(reply.strip())
    except Exception as e:
        print(f"Error: {e}")
        return ""

# Collect all .mlw file paths first
file_paths = []
for root, dirs, files in os.walk('./data/why3/common'):
    for file in files:
        if file.endswith('.mlw'):
            file_path = os.path.join(root, file)
            file_paths.append(file_path)

print(f"Found {len(file_paths)} .mlw files to process")

# Process each file
for (i,file_path) in enumerate(file_paths):
    print(f"[{i}/{len(file_paths)}] Processing: {file_path}")
    rel_path = os.path.relpath(file_path, './data/why3/common')
    output_path = os.path.join('./data/why3/no-lemma3', rel_path)
    if os.path.exists(output_path):
        print(f"[{i}/{len(file_paths)}] Skipping (already exists): {output_path}")
        continue

    obtained =  clean(file_path)
    print(obtained)
    print('\n\n\n')
    
    # Create output directory if it doesn't exist
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    
    with open(output_path, 'w', encoding='utf-8') as file:
        file.write(obtained)

    print(f"[{i}/{len(file_paths)}] Saved: {output_path}")

print("Processing complete!")


