#!/usr/bin/env python3
"""
Manual Hierarchical Tree Generator for Schema Induction Pipeline

This script allows you to generate the hierarchical tree for any iteration
without rerunning the entire pipeline. It's useful when you want to analyze
intermediate iterations or when the pipeline only saves hierarchical trees
for the last iteration.

Usage:
    python generate_hierarchical_tree.py <iteration_number>
    
Example:
    python generate_hierarchical_tree.py 1
    python generate_hierarchical_tree.py 2
"""

import os
import sys
import json
import pandas as pd
from datetime import datetime

# Add utils to path
sys.path.append('utils')
from hierarchical_tree_creator import HierarchicalTreeCreator

def generate_hierarchical_tree(iteration_number: int):
    """Generate hierarchical tree for a specific iteration"""
    
    print(f'=== GENERATING HIERARCHICAL TREE FOR ITERATION {iteration_number} ===')
    print()
    
    # Check if iteration directory exists
    iteration_dir = f'temp_files/iteration_{iteration_number:02d}'
    if not os.path.exists(iteration_dir):
        print(f'❌ Iteration directory not found: {iteration_dir}')
        return False
    
    print(f'✅ Found iteration directory: {iteration_dir}')
    
    # Check if required files exist
    required_files = [
        os.path.join(iteration_dir, 'topologically_sorted_graph', 'topological_sort.parquet'),
        os.path.join(iteration_dir, 'high_level_codes', 'high_level_codes.parquet'),
        os.path.join(iteration_dir, 'topologically_sorted_graph', 'code_datapoints_enhanced.parquet'),
        os.path.join(iteration_dir, 'topologically_sorted_graph', 'mapping_report.json')
    ]
    
    print('🔍 CHECKING REQUIRED FILES:')
    missing_files = []
    for file_path in required_files:
        if os.path.exists(file_path):
            print(f'✅ {os.path.basename(file_path)}')
        else:
            print(f'❌ {os.path.basename(file_path)} - NOT FOUND')
            missing_files.append(file_path)
    
    if missing_files:
        print()
        print('❌ Cannot generate hierarchical tree - missing required files:')
        for file_path in missing_files:
            print(f'   - {file_path}')
        return False
    
    print()
    print('🌳 CREATING HIERARCHICAL TREE...')
    
    try:
        # Create hierarchical tree
        creator = HierarchicalTreeCreator(iteration_dir)
        result = creator.create_hierarchical_tree()
        
        print()
        print('✅ HIERARCHICAL TREE GENERATION COMPLETE!')
        print(f'📁 Directory: {iteration_dir}/hierarchical_tree')
        print(f'📊 Total nodes: {result["hierarchical_tree"]["metadata"]["total_nodes"]}')
        print(f'📊 Levels: {result["hierarchical_tree"]["metadata"]["levels"]}')
        print(f'�� Saved files: {result["saved_files"]}')
        
        return True
        
    except Exception as e:
        print(f'❌ Error generating hierarchical tree: {e}')
        import traceback
        traceback.print_exc()
        return False

def main():
    """Main function"""
    if len(sys.argv) != 2:
        print("Usage: python generate_hierarchical_tree.py <iteration_number>")
        print("Example: python generate_hierarchical_tree.py 1")
        sys.exit(1)
    
    try:
        iteration_number = int(sys.argv[1])
        if iteration_number < 1:
            print("❌ Iteration number must be >= 1")
            sys.exit(1)
    except ValueError:
        print("❌ Iteration number must be an integer")
        sys.exit(1)
    
    success = generate_hierarchical_tree(iteration_number)
    sys.exit(0 if success else 1)

if __name__ == "__main__":
    main()
