#!/usr/bin/env python3
"""
Batch Mojibake Fixer
Re-processes CSV files with enhanced mojibake detection and correction.
"""

import os
import sys
import subprocess
import time
from pathlib import Path


def run_conversion(input_file, output_suffix="_clean"):
    """Run the enhanced conversion on a single file."""
    input_path = Path(input_file)

    # Create output filename with new suffix
    output_file = input_path.parent / f"{input_path.stem}{output_suffix}.csv"

    print(f"\nProcessing: {input_file}")
    print(f"Output: {output_file}")

    start_time = time.time()

    try:
        # Run the enhanced converter
        cmd = [
            "python3",
            "convert_encoding.py",
            str(input_file),
            "-o",
            str(output_file),
        ]
        result = subprocess.run(cmd, capture_output=True, text=True, check=True)

        end_time = time.time()
        processing_time = end_time - start_time

        print(f"✓ Completed in {processing_time:.2f} seconds")
        print(f"Output: {result.stdout}")

        return True, str(output_file)

    except subprocess.CalledProcessError as e:
        print(f"✗ Error processing {input_file}:")
        print(f"  Exit code: {e.returncode}")
        print(f"  Error output: {e.stderr}")
        return False, None
    except Exception as e:
        print(f"✗ Unexpected error: {e}")
        return False, None


def main():
    """Main batch processing function."""
    # Files to process
    files_to_process = [
        "train_sent_emo.csv",
        "test_sent_emo.csv",
        "dev_sent_emo.csv",
    ]

    print("BATCH MOJIBAKE FIXING")
    print("=" * 50)
    print(
        f"Processing {len(files_to_process)} files with enhanced mojibake correction..."
    )

    successful_conversions = []
    failed_conversions = []

    overall_start_time = time.time()

    for file_path in files_to_process:
        if not os.path.exists(file_path):
            print(f"\n✗ File not found: {file_path}")
            failed_conversions.append(file_path)
            continue

        success, output_file = run_conversion(file_path)

        if success:
            successful_conversions.append((file_path, output_file))
        else:
            failed_conversions.append(file_path)

    overall_end_time = time.time()
    total_time = overall_end_time - overall_start_time

    # Summary
    print("\n" + "=" * 50)
    print("BATCH PROCESSING SUMMARY")
    print("=" * 50)
    print(f"Total processing time: {total_time:.2f} seconds")
    print(f"Successfully processed: {len(successful_conversions)} files")
    print(f"Failed: {len(failed_conversions)} files")

    if successful_conversions:
        print("\nSuccessful conversions:")
        for input_file, output_file in successful_conversions:
            print(f"  {input_file} → {output_file}")

    if failed_conversions:
        print("\nFailed conversions:")
        for file_path in failed_conversions:
            print(f"  {file_path}")

        return 1

    return 0


if __name__ == "__main__":
    sys.exit(main())
