#!/usr/bin/env python
"""
Run DANCEST on multiple regions and generate reports for each.

This script runs the DANCEST workflow on multiple specified regions for
a given day, generating proper fusion files and reports for each region.
"""

import os
import sys
import time
import argparse
import subprocess
from pathlib import Path
from datetime import datetime
import glob
import json
import pandas as pd
import multiprocessing
import threading

# Add project root to Python path
project_root = Path(__file__).resolve().parent
if str(project_root) not in sys.path:
    sys.path.append(str(project_root))

def identify_available_regions():
    """Find available regions in the dataset."""
    try:
        # Try loading from corrosion dataset
        df = pd.read_csv("[ANONYMIZED]_lp_corrosion.csv", low_memory=False)
        if 'region' in df.columns:
            regions = df['region'].unique().tolist()
            print(f"Found {len(regions)} regions in corrosion dataset")
            return sorted(regions)
    except Exception as e:
        print(f"Error loading corrosion dataset: {e}")
    
    # Fallback to predefined regions
    print("Using predefined list of regions")
    return [f"s{i}" for i in range(100, 200)]

def run_dancest_for_region(region, day, verbose=False):
    """Run DANCEST workflow for a specific region and day."""
    print(f"Running DANCEST for region {region}, day {day}")
    command = ["python", "DANCEST_model/run_with_agents_verbose.py", "--region", region, "--day", str(day)]
    
    # Capture output depending on verbose flag
    if verbose:
        result = subprocess.run(command)
    else:
        result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    
    # Check result
    if result.returncode == 0:
        print(f"✅ Successfully ran DANCEST for region {region}")
        return True
    else:
        print(f"❌ Failed to run DANCEST for region {region}")
        if not verbose and result.stderr:
            print(f"Error: {result.stderr.decode('utf-8', errors='ignore')}")
        return False

def process_region(region, day, verbose=False):
    """Process a single region and generate report."""
    success = run_dancest_for_region(region, day, verbose)
    
    # Generate failure report
    if success:
        try:
            cmd = ["python", "generate_failure_report.py", "--region", region, "--day", str(day)]
            report_result = subprocess.run(cmd, capture_output=not verbose)
            
            if report_result.returncode == 0:
                print(f"✅ Generated report for region {region}")
                return True
            else:
                print(f"❌ Failed to generate report for region {region}")
                return False
        except Exception as e:
            print(f"Error generating report for region {region}: {e}")
            return False
    return False

def process_regions_sequentially(regions, day, verbose=False):
    """Process regions one by one sequentially."""
    results = {}
    for region in regions:
        start_time = time.time()
        success = process_region(region, day, verbose)
        elapsed = time.time() - start_time
        results[region] = {
            "success": success,
            "time_taken": elapsed
        }
        print(f"Completed region {region} in {elapsed:.2f} seconds (Success: {success})")
    return results

def process_regions_in_parallel(regions, day, num_workers, verbose=False):
    """Process regions in parallel using multiple workers."""
    with multiprocessing.Pool(processes=num_workers) as pool:
        results = []
        for region in regions:
            results.append(pool.apply_async(process_region, args=(region, day, verbose)))
        
        # Wait for all processes to complete
        processed_results = {}
        for i, region in enumerate(regions):
            try:
                success = results[i].get(timeout=600)  # 10 minute timeout per region
                processed_results[region] = {
                    "success": success,
                    "time_taken": 0  # We don't track time in parallel mode
                }
                print(f"Completed region {region} (Success: {success})")
            except Exception as e:
                print(f"Error processing region {region}: {e}")
                processed_results[region] = {
                    "success": False,
                    "time_taken": 0,
                    "error": str(e)
                }
        
        return processed_results

def save_results_summary(results, day):
    """Save a summary of processing results."""
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    with open(f"dancest_multi_region_summary_day{day}_{timestamp}.json", "w") as f:
        json.dump(results, f, indent=2)
    
    # Create a more readable text summary
    success_count = sum(1 for r in results.values() if r.get("success", False))
    total_count = len(results)
    
    summary_text = [
        f"DANCEST Multi-Region Processing Summary",
        f"----------------------------------------",
        f"Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
        f"Day: {day}",
        f"Total regions: {total_count}",
        f"Successful regions: {success_count}",
        f"Success rate: {100 * success_count / total_count:.1f}%",
        f"",
        f"Results by region:",
        f"------------------"
    ]
    
    for region, result in results.items():
        time_info = f" in {result.get('time_taken', 0):.2f}s" if result.get('time_taken', 0) > 0 else ""
        status = "✅ Success" if result.get("success", False) else "❌ Failed"
        error = f" - Error: {result.get('error', 'Unknown')}" if not result.get("success", False) and "error" in result else ""
        summary_text.append(f"{region}: {status}{time_info}{error}")
    
    summary_file = f"dancest_multi_region_summary_day{day}_{timestamp}.txt"
    with open(summary_file, "w") as f:
        f.write("\n".join(summary_text))
    
    print(f"Summary saved to {summary_file}")
    return summary_file

def main():
    """Run main workflow."""
    parser = argparse.ArgumentParser(description="Run DANCEST on multiple regions")
    parser.add_argument("--day", type=int, default=210, help="Day to process")
    parser.add_argument("--regions", nargs="+", help="Specific regions to process (e.g. s126 s127 s128)")
    parser.add_argument("--count", type=int, default=5, help="Number of regions to process if specific regions not provided")
    parser.add_argument("--parallel", action="store_true", help="Run in parallel mode")
    parser.add_argument("--workers", type=int, default=4, help="Number of worker processes if running in parallel")
    parser.add_argument("--verbose", action="store_true", help="Show verbose output")
    
    args = parser.parse_args()
    
    # Determine regions to process
    if args.regions:
        regions_to_process = args.regions
    else:
        available_regions = identify_available_regions()
        regions_to_process = available_regions[:args.count]
    
    print(f"Will process {len(regions_to_process)} regions for day {args.day}")
    print(f"Regions: {', '.join(regions_to_process)}")
    
    # Confirm with user
    confirm = input("Proceed? (y/n): ").lower()
    if confirm != 'y':
        print("Aborted by user")
        return
    
    start_time = time.time()
    
    # Process regions (sequential or parallel)
    if args.parallel:
        print(f"Processing in parallel with {args.workers} workers...")
        results = process_regions_in_parallel(regions_to_process, args.day, args.workers, args.verbose)
    else:
        print(f"Processing sequentially...")
        results = process_regions_sequentially(regions_to_process, args.day, args.verbose)
    
    # Save summary of results
    summary_file = save_results_summary(results, args.day)
    
    # Print final summary
    total_time = time.time() - start_time
    success_count = sum(1 for r in results.values() if r.get("success", False))
    print(f"\nProcessing completed in {total_time:.2f} seconds")
    print(f"Processed {len(regions_to_process)} regions with {success_count} successes")
    print(f"Summary saved to {summary_file}")

if __name__ == "__main__":
    main() 