import os
import json

# Root directory of v4 dataset
root_dir = '/fs/scratch/PAS2099/Jiacheng/Texture_tmp/merged_dataset_v4'

# Output JSON file
output_path = '/fs/scratch/PAS2099/Jiacheng/Texture_tmp/output/merged_data_v4_metadata.json'

# Supported image extensions
valid_exts = {'.png', '.jpg', '.jpeg', '.bmp', '.tiff'}

entries = []

# Traverse each attribute folder
for attribute in os.listdir(root_dir):
    attr_path = os.path.join(root_dir, attribute)
    if not os.path.isdir(attr_path):
        continue

    # Traverse each image file
    for fname in os.listdir(attr_path):
        ext = os.path.splitext(fname)[1].lower()
        if ext not in valid_exts:
            continue

        # Extract base dataset from filename (text before first underscore)
        base_dataset = fname.split('_', 1)[0]

        entries.append({
            "texture attribute": attribute,
            "image": fname,
            "base_dataset": base_dataset
        })

# Write JSON
os.makedirs(os.path.dirname(output_path), exist_ok=True)
with open(output_path, 'w') as f:
    json.dump(entries, f, indent=2)

print(f"Wrote metadata for {len(entries)} images to {output_path}")
