import json
import argparse
import os
import sys

# Add utils to path to import save_metrics
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
from utils import save_metrics

def merge_metrics_files(from_file, to_file):
    """
    Merge metrics from 'from_file' into 'to_file'.
    Both files should contain JSON dictionaries with metrics.
    """
    # Read metrics from 'from' file
    if not os.path.exists(from_file):
        print(f"Error: Source file {from_file} does not exist.")
        return False
    
    try:
        with open(from_file, "r", encoding="utf-8") as f:
            from_metrics = json.load(f)
    except Exception as e:
        print(f"Error reading from file {from_file}: {e}")
        return False
    
    # Save metrics using existing save_metrics function
    # It will automatically read existing 'to_file' and merge
    save_metrics(to_file, from_metrics)
    
    print(f"Successfully merged metrics from {from_file} into {to_file}")
    print(f"Merged {len(from_metrics)} metrics from source file")
    
    return True

def merge_metrics_folders(from_folder, to_folder, model_name):
    """
    Merge metrics from from_folder to to_folder for a specific model.
    Iterates through datasets and merges train_metrics.json and val_metrics.json files.
    """
    # Convert model name: replace / with _
    model_folder_name = model_name.replace("/", "_")
    
    # Construct paths
    from_model_path = os.path.join(from_folder, model_folder_name)
    to_model_path = os.path.join(to_folder, model_folder_name)
    
    if not os.path.exists(from_model_path):
        print(f"Error: Source model folder {from_model_path} does not exist.")
        return False
    
    print(f"Processing model: {model_name} ({model_folder_name})")
    print(f"From: {from_model_path}")
    print(f"To: {to_model_path}")
    
    # Iterate through dataset folders
    dataset_folders = [d for d in os.listdir(from_model_path) 
                      if os.path.isdir(os.path.join(from_model_path, d))]
    
    if not dataset_folders:
        print(f"No dataset folders found in {from_model_path}")
        return False
    
    success_count = 0
    total_files = 0
    
    for dataset_name in dataset_folders:
        print(f"\nProcessing dataset: {dataset_name}")
        
        from_dataset_path = os.path.join(from_model_path, dataset_name)
        to_dataset_path = os.path.join(to_model_path, dataset_name)
        
        # Process train_metrics.json and val_metrics.json
        for metrics_file in ["train_metrics.json", "val_metrics.json"]:
            from_file = os.path.join(from_dataset_path, metrics_file)
            to_file = os.path.join(to_dataset_path, metrics_file)
            
            total_files += 1
            
            if os.path.exists(from_file):
                print(f"  Merging {metrics_file}...")
                if merge_metrics_files(from_file, to_file):
                    success_count += 1
                else:
                    print(f"  Failed to merge {metrics_file}")
            else:
                print(f"  Warning: {from_file} not found, skipping...")
    
    print(f"\nSummary: Successfully merged {success_count}/{total_files} files")
    return success_count > 0

def main():
    parser = argparse.ArgumentParser(description="Merge metrics folders for a specific model")
    parser.add_argument("from_folder", help="Source folder containing model metrics")
    parser.add_argument("to_folder", help="Destination folder for merged metrics")
    parser.add_argument("model_name", help="Model name (e.g., 'EleutherAI/pythia-410m-deduped')")
    
    args = parser.parse_args()
    
    success = merge_metrics_folders(args.from_folder, args.to_folder, args.model_name)
    if not success:
        sys.exit(1)

if __name__ == "__main__":
    main()
