#!/usr/bin/env python3
"""
Script to merge dev and test folder JSON files into a hard/ folder with renamed task_ids.
Combines corresponding JSON files from dev/ and test/ directories and renames task_ids to lcbhard_0, lcbhard_1, etc.
"""

import json
import os
from pathlib import Path


def load_json_file(filepath: Path) -> list:
    """Load JSON data from file."""
    with open(filepath, 'r', encoding='utf-8') as f:
        return json.load(f)


def save_json_file(data: list, filepath: Path) -> None:
    """Save data to JSON file with proper formatting."""
    with open(filepath, 'w', encoding='utf-8') as f:
        json.dump(data, f, indent=2, ensure_ascii=False)


def rename_task_ids(data: list, start_id: int) -> tuple[list, int]:
    """
    Rename task_ids in the data to lcbhard_X format.
    
    Args:
        data: List of task dictionaries
        start_id: Starting ID number for renaming
        
    Returns:
        Tuple of (modified_data, next_available_id)
    """
    for i, task in enumerate(data):
        task['task_id'] = f'lcbhard_{start_id + i}'
    
    return data, start_id + len(data)


def merge_json_files(dev_file: Path, test_file: Path, output_file: Path) -> int:
    """
    Merge corresponding JSON files from dev and test directories.
    
    Args:
        dev_file: Path to dev JSON file
        test_file: Path to test JSON file  
        output_file: Path to output merged JSON file
        
    Returns:
        Number of tasks in merged file
    """
    print(f"Merging {dev_file.name} and {test_file.name} -> {output_file.name}")
    
    # Load data from both files
    dev_data = load_json_file(dev_file)
    test_data = load_json_file(test_file)
    
    # Rename task_ids starting from 0
    dev_data, next_id = rename_task_ids(dev_data, 0)
    test_data, next_id = rename_task_ids(test_data, next_id)
    
    # Combine the data
    merged_data = dev_data + test_data
    
    # Save merged file
    save_json_file(merged_data, output_file)
    
    print(f"  Dev tasks: {len(dev_data)}, Test tasks: {len(test_data)}, Total: {len(merged_data)}")
    return len(merged_data)


def main():
    """Main function to merge all JSON files."""
    base_dir = Path(__file__).parent
    dev_dir = base_dir / 'dev'
    test_dir = base_dir / 'test'
    hard_dir = base_dir / 'hard'
    
    # Create hard directory if it doesn't exist
    hard_dir.mkdir(exist_ok=True)
    
    # Get list of JSON files to merge
    json_files = ['original.json', 'conflicting.json', 'inverted.json', 'oneoff.json']
    
    total_tasks = 0
    
    print("Merging dev and test JSON files into hard/ directory...")
    print("=" * 60)
    
    for json_file in json_files:
        global_task_counter = 0
        dev_file = dev_dir / json_file
        test_file = test_dir / json_file
        output_file = hard_dir / json_file
        
        if dev_file.exists() and test_file.exists():
            # Load and merge data
            dev_data = load_json_file(dev_file)
            test_data = load_json_file(test_file)
            
            # Rename task_ids with global counter
            dev_data, global_task_counter = rename_task_ids(dev_data, global_task_counter)
            test_data, global_task_counter = rename_task_ids(test_data, global_task_counter)
            
            # Combine and save
            merged_data = dev_data + test_data
            save_json_file(merged_data, output_file)
            
            num_tasks = len(merged_data)
            total_tasks += num_tasks
            
            print(f"{json_file}:")
            print(f"  Dev: {len(dev_data)} tasks")
            print(f"  Test: {len(test_data)} tasks")
            print(f"  Total: {num_tasks} tasks")
            print()
        else:
            print(f"Warning: Missing files for {json_file}")
            if not dev_file.exists():
                print(f"  Missing: {dev_file}")
            if not test_file.exists():
                print(f"  Missing: {test_file}")
            print()
    
    print("=" * 60)
    print(f"Merge completed! Total tasks across all files: {total_tasks}")
    print(f"Task IDs range from lcbhard_0 to lcbhard_{global_task_counter - 1}")
    print(f"Output directory: {hard_dir}")


if __name__ == '__main__':
    main()