import json
import re
import argparse
from pathlib import Path
from typing import Dict, Any

def extract_run_index(run_name: str) -> str:
    """Try to extract a trailing number from run_name, e.g., 'gpt5_run0' -> '0'."""
    # Common pattern: ...run(\d+)
    match = re.search(r"run(\d+)$", run_name)
    if match:
        return match.group(1)
    # Fallback: just hash it or keep it?
    # Schema says keys must be pattern "^[0-9]+$"
    # If we can't find a number, we might have to assign one sequentially.
    return None

def merge_results_with_code(input_json: Path, output_json: Path):
    if not input_json.exists():
        raise FileNotFoundError(f"{input_json} does not exist")
    
    with input_json.open("r") as f:
        raw_data = json.load(f)
        
    final_schema: Dict[str, Dict[str, Any]] = {}

    for op, results_list in raw_data.items():
        if not isinstance(results_list, list):
            # Handle single-run case (dict)
            results_list = [results_list]
        
        op_entry = {}
        for idx, res in enumerate(results_list):
            # Try to determine canonical index "0", "1" from run_name
            run_name = res.get("run_name", "")
            run_idx = extract_run_index(run_name)
            if run_idx is None:
                # Fallback to list enumeration if parsing fails
                run_idx = str(idx)
            
            # Read code
            code_content = None
            code_path = res.get("code_path")
            if code_path:
                try:
                    code_content = Path(code_path).read_text(encoding="utf-8", errors="replace")
                except Exception as e:
                    print(f"[WARN] Failed to read code for {op} at {code_path}: {e}")
            
            # Build entry
            entry = {
                "compiled": res.get("compiled", False),
                "correctness": res.get("correctness"),
                "correctness_info": res.get("correctness_info"),
                "performance": res.get("performance"),
                "profiling": res.get("profiling"),
                "hardware": res.get("hardware"),
                "compile_info": res.get("compile_info"),
                "code": code_content if code_content else res.get("code", "")
            }
            # Clean up None values if schema strictly forbids them?
            # Schema allow null for some, but code must be string (minLength 1).
            if not entry["code"]:
                 entry["code"] = "MISSING_CODE"
            
            op_entry[run_idx] = entry
            
        final_schema[op] = op_entry
        
    # Write output
    with output_json.open("w") as f:
        json.dump(final_schema, f, indent=4)
    print(f"[INFO] merged results written to {output_json}")

def main():
    parser = argparse.ArgumentParser(description="Merge op_eval results with source code into aggregation schema.")
    parser.add_argument("input_json", type=Path, help="Path to raw op_eval results.json")
    parser.add_argument("output_json", type=Path, help="Path to write the combined output")
    args = parser.parse_args()
    
    merge_results_with_code(args.input_json, args.output_json)

if __name__ == "__main__":
    main()
