"""
    This script analyzes the structure and size of an HDF5 file. 
    It prints the hierarchy of groups and datasets, their shapes, and size information, and summarizes the total number of datasets, groups, and total data size.

Usage:
    python scripts/analyze_file_structure.py --replay_path <path_to_h5>
"""
import h5py
import argparse
import numpy as np

def analyze_h5_file(file_path):
    """Analyze the structure and sizes of an HDF5 file"""
    print(f"Analyzing HDF5 file: {file_path}\n")
    
    try:
        with h5py.File(file_path, 'r') as h5f:
            # Initialize statistics
            total_size = 0
            dataset_count = 0
            group_count = 0
            
            def process_item(name, obj):
                """Process each HDF5 item (recursive helper function)"""
                nonlocal total_size, dataset_count, group_count
                
                indent = "  " * name.count('/')  # Indentation based on hierarchy depth
                
                if isinstance(obj, h5py.Group):
                    group_count += 1
                    print(f"{indent}Group: {name}")
                    print(f"{indent}  Attributes: {len(obj.attrs)}")
                    return
                
                if isinstance(obj, h5py.Dataset):
                    dataset_count += 1
                    size_bytes = obj.size * obj.dtype.itemsize
                    total_size += size_bytes
                    
                    print(f"{indent}Dataset: {name}")
                    print(f"{indent}  Shape: {obj.shape}")
                    # print(f"{indent}  Dtype: {obj.dtype}")
                    print(f"{indent}  Size: {size_bytes:,} bytes ({size_bytes/1024**2:.2f} MB)")
                    # print(f"{indent}  Compression: {obj.compression or 'None'}")
                    # print(f"{indent}  Chunks: {obj.chunks}")
                    # print(f"{indent}  Attributes: {len(obj.attrs)}")
                    return
            
            print("File Structure:")
            h5f.visititems(process_item)
            
            # Print summary statistics
            print("\n" + "="*50)
            print("SUMMARY STATISTICS:")
            print(f"Total Datasets: {dataset_count}")
            print(f"Total Groups: {group_count}")
            print(f"Total Data Size: {total_size:,} bytes ({total_size/1024**2:.2f} MB)")
            print(f"File Size on Disk: {h5f.id.get_filesize():,} bytes")
            
    except Exception as e:
        print(f"Error analyzing file: {str(e)}")

if __name__ == "__main__":
    args = argparse.ArgumentParser()
    args.add_argument("--replay_path", type=str, default='/infinite/common/r1_dataset/task2_1_1/collected_data_0_success.h5') # h5 file to analyze
    args = args.parse_args()
    analyze_h5_file(args.replay_path)