#!/usr/bin/env python3
"""
Convert output.swebench.jsonl format to preds.json format

Usage:
    python convert_jsonl_to_preds.py input.jsonl output.json
"""

import json
import sys
from pathlib import Path


def convert_jsonl_to_preds(input_file, output_file):
    """
    Convert JSONL format to preds.json format
    
    Args:
        input_file (str): Path to input JSONL file
        output_file (str): Path to output JSON file
    """
    preds_dict = {}
    
    try:
        with open(input_file, 'r', encoding='utf-8') as f:
            for line_num, line in enumerate(f, 1):
                line = line.strip()
                if not line:
                    continue
                    
                try:
                    data = json.loads(line)
                    
                    # Validate required fields
                    required_fields = ['instance_id', 'model_patch', 'model_name_or_path']
                    missing_fields = [field for field in required_fields if field not in data]
                    if missing_fields:
                        print(f"Warning: Line {line_num} missing fields: {missing_fields}")
                        data['model_name_or_path'] = "let it go"
                    
                    instance_id = data['instance_id']
                    
                    # Create the preds format
                    preds_dict[instance_id] = {
                        'model_name_or_path': data['model_name_or_path'],
                        'instance_id': instance_id,
                        'model_patch': data['model_patch']
                    }
                    
                except json.JSONDecodeError as e:
                    print(f"Error parsing JSON on line {line_num}: {e}")
                    continue
                    
    except FileNotFoundError:
        print(f"Error: Input file '{input_file}' not found")
        return False
    except Exception as e:
        print(f"Error reading input file: {e}")
        return False
    
    # Write output
    try:
        with open(output_file, 'w', encoding='utf-8') as f:
            json.dump(preds_dict, f, indent=4, ensure_ascii=False)
        
        print(f"Successfully converted {len(preds_dict)} instances")
        print(f"Output written to: {output_file}")
        return True
        
    except Exception as e:
        print(f"Error writing output file: {e}")
        return False


def main():
    
    # input_file = "result/generated-path/moatless-pred.jsonl"
    # output_file = "log/patch-moatless-deepseek-v3.json"
    input_file = "/data/data_public/dtw_data/OpenHands/evaluation/evaluation_outputs/outputs/princeton-nlp__SWE-bench_Lite-dev/CodeActAgent/deepseek-v3_maxiter_30_N_v0.42.0-no-hint-run_1/output.swebench.jsonl"
    output_file = "result/patch-evaluation/20250717/swe-smith-openhands-deepseek-v3.json"
    
    # Validate input file exists
    if not Path(input_file).exists():
        print(f"Error: Input file '{input_file}' does not exist")
        sys.exit(1)
    
    # Convert
    success = convert_jsonl_to_preds(input_file, output_file)
    
    if not success:
        sys.exit(1)


if __name__ == "__main__":
    main()