import os
import glob
from tqdm import tqdm
from PIL import Image
import numpy as np

def create_npz_from_sample_folder(sample_dir, num=50_000):
    """
    Builds a single .npz file from a folder of .png samples.
    """
    samples = []
    for i in tqdm(range(num), desc=f"Building .npz file from samples in {sample_dir}"):
        sample_pil = Image.open(f"{sample_dir}/{i:06d}.png")
        sample_np = np.asarray(sample_pil).astype(np.uint8)
        samples.append(sample_np)
    samples = np.stack(samples)
    samples = np.random.permutation(samples)
    assert samples.shape == (num, samples.shape[1], samples.shape[2], 3)
    npz_path = f"{sample_dir}.npz"
    np.savez(npz_path, arr_0=samples)
    print(f"Saved .npz file to {npz_path} [shape={samples.shape}].")
    return npz_path

def process_folders(main_dir):
    # 遍历main_dir目录下的所有子目录
    for folder in os.listdir(main_dir):
        folder_path = os.path.join(main_dir, folder)
        if os.path.isdir(folder_path):
            npz_path = f"{folder_path}.npz"
            # 检查是否存在对应的npz文件
            if os.path.exists(npz_path):
                print(f"Skipping {folder_path}, npz file already exists.")
                continue
            
            # 计算文件夹中PNG文件的数量
            png_files = glob.glob(os.path.join(folder_path, '*.png'))
            if len(png_files) >= 50_000:
                print(f"Processing folder: {folder_path}")
                create_npz_from_sample_folder(folder_path)
            else:
                print(f"Skipping {folder_path}, it does not contain exactly 50000 PNG images.")
        else:
            print(f"Skipping {folder_path}, it is not a directory.")

# 调用函数处理指定目录
process_folders('pathxxxx/xxxxxx/git/LlamaGen/samples')  # 替换'a'为你的实际目录路径
