import os
import pandas as pd
import numpy as np
import glob

def check_dataset_format(data_root):
    """Check H36M dataset file format"""
    print(f"Checking dataset: {data_root}")
    
    all_files = []
    for subdir, _, _ in os.walk(data_root):
        txt_files = glob.glob(os.path.join(subdir, "*.txt"))
        all_files.extend(txt_files)
    
    print(f"Found {len(all_files)} files")
    
    column_counts = {}
    sample_rows = {}
    
    for i, file_path in enumerate(all_files):
        if i > 20:
            break
            
        try:
            df = pd.read_csv(file_path, header=None, sep=',')
            
            col_count = df.shape[1]
            column_counts[col_count] = column_counts.get(col_count, 0) + 1
            
            if col_count not in sample_rows:
                sample_rows[col_count] = df.iloc[0].values
                
            if df.shape[1] != df.iloc[-1].shape[0]:
                print(f"Warning: {file_path} column count mismatch - start: {df.shape[1]}, end: {df.iloc[-1].shape[0]}")
                
        except Exception as e:
            print(f"Error reading file {file_path}: {e}")
    
    print("\nColumn count statistics:")
    for col_count, freq in sorted(column_counts.items()):
        print(f"  {col_count} columns: {freq} files")
    
    print("\nSample rows for each column count:")
    for col_count, row in sample_rows.items():
        print(f"\n{col_count} columns sample:")
        print(row[:10])
        
    most_common_cols = max(column_counts.items(), key=lambda x: x[1])[0]
    print(f"\nRecommended configuration:")
    print(f"NUM_JOINTS = {most_common_cols // 3}")
    print(f"CONDITION_DIM = INPUT_FRAMES * {most_common_cols}")
    print(f"TARGET_DIM = OUTPUT_FRAMES * {most_common_cols}")

if __name__ == "__main__":
    data_root = "D:/study/new_folder/h36m"
    check_dataset_format(data_root)
