import os
from PIL import Image
from collections import Counter
import pandas as pd
from tqdm import tqdm
import argparse

def analyze_image_sizes(base_path):
    """
    分析目录下所有图片的尺寸
    
    Args:
        base_path: 图片目录的根路径
    
    Returns:
        DataFrame: 包含尺寸统计信息的DataFrame
    """
    # 存储所有图片的尺寸
    sizes = []
    errors = []
    total_files = 0
    story_path = os.path.join(base_path, "Cover")
    if not os.path.exists(story_path):
        print(f"错误: Story文件夹不存在于 {base_path}")
        return {}
    # 遍历base_path下的所有目录
    for journal_name in tqdm(os.listdir(story_path)):
        journal_path = os.path.join(story_path, journal_name)
        if not os.path.isdir(journal_path):
            continue
        print(f"处理期刊: {journal_name}")
        # 为每个期刊创建对应的输出目录
        # journal_output_dir = os.path.join(output_base_path, journal_name)
        # os.makedirs(journal_output_dir, exist_ok=True)
        for root, dirs, files in os.walk(journal_path):
            # 只处理txt文件
            txt_files = [f for f in files if f.endswith('.png')]
        
            if not txt_files:
                continue
            
            # # 创建对应的输出目录
            # rel_path = os.path.relpath(root, journal_path)
            # if rel_path != '.':
            #     output_dir = os.path.join(journal_output_dir, rel_path)
            #     os.makedirs(output_dir, exist_ok=True)
            # else:
            #     output_dir = journal_output_dir
            
            # logger.info(f"处理目录: {rel_path} (找到 {len(txt_files)} 个txt文件)")
            
            # 处理当前目录下的所有txt文件
            for txt_file in txt_files:
                total_files += 1
                
                try:
                    # 构建文件路径
                    file_path = os.path.join(root, txt_file)
                    with Image.open(file_path) as img:
                        sizes.append({
                            'path': file_path,
                            'width': img.size[0],
                            'height': img.size[1],
                            'size': f"{img.size[0]}x{img.size[1]}"
                        })
                except Exception as e:
                    errors.append({
                        'path': file_path,
                        'error': str(e)
                    })

    # 创建DataFrame
    df = pd.DataFrame(sizes)
    
    # 计算尺寸统计
    size_counts = df['size'].value_counts().reset_index()
    size_counts.columns = ['Resolution', 'Count']
    
    # 计算基本统计信息
    stats = {
        'Total Images': len(sizes),
        'Unique Sizes': len(size_counts),
        'Failed Images': len(errors),
        'Min Width': df['width'].min(),
        'Max Width': df['width'].max(),
        'Min Height': df['height'].min(),
        'Max Height': df['height'].max(),
        'Mean Width': df['width'].mean(),
        'Mean Height': df['height'].mean()
    }
    
    return df, size_counts, stats, errors

def save_results(df, size_counts, stats, errors, output_dir):
    """
    保存分析结果
    """
    os.makedirs(output_dir, exist_ok=True)
    
    # 保存详细信息
    df.to_csv(os.path.join(output_dir, 'all_images.csv'), index=False)
    
    # 保存尺寸统计
    size_counts.to_csv(os.path.join(output_dir, 'size_statistics.csv'), index=False)
    
    # 保存基本统计信息
    pd.DataFrame([stats]).to_csv(os.path.join(output_dir, 'basic_statistics.csv'), index=False)
    
    # 保存错误信息
    if errors:
        pd.DataFrame(errors).to_csv(os.path.join(output_dir, 'errors.csv'), index=False)

def main():
    parser = argparse.ArgumentParser(description="分析图片尺寸统计")
    parser.add_argument('--input', type=str, required=True, help="图片目录路径")
    parser.add_argument('--output', type=str, default="./image_stats", help="输出目录路径")
    
    args = parser.parse_args()
    
    print(f"开始分析目录: {args.input}")
    df, size_counts, stats, errors = analyze_image_sizes(args.input)
    
    # 打印基本统计信息
    print("\n=== 基本统计信息 ===")
    for key, value in stats.items():
        print(f"{key}: {value}")
    
    print("\n=== 最常见的图片尺寸 ===")
    print(size_counts.head().to_string(index=False))
    
    # 保存结果
    save_results(df, size_counts, stats, errors, args.output)
    print(f"\n结果已保存到: {args.output}")
    
    if errors:
        print(f"\n警告: {len(errors)} 个图片处理失败，详见 errors.csv")

if __name__ == "__main__":
    main()
