import os
import json
import argparse
from tqdm import tqdm


def main(json_dir, output_path):
    labels = []
    for i in tqdm(range(70001)):
        idx_str = f"{i:05d}"
        json_file = os.path.join(json_dir, f"{idx_str}.json")
        try:
            with open(json_file, "r") as f:
                data = json.load(f)

            # Si le fichier est une liste, on prend le premier élément
            if isinstance(data, list):
                if len(data) == 0:
                    continue  # On saute les fichiers vides
                data = data[0]

            gender = data.get("faceAttributes", {}).get("gender", "").lower()
            if gender == "male":
                label = 0
            elif gender == "female":
                label = 1
            else:
                continue  # Skip unknown genders
            n = (i // 1000) * 1000
            labels.append([f"{n:05d}/{idx_str}.png", label])
        except Exception as e:
            print(f"Error processing {json_file}: {e}")

    output = {"labels": labels}
    with open(output_path, "w") as f:
        json.dump(output, f, indent=4)


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--json_dir",
        type=str,
        required=True,
        help="Path to directory containing 00000.json to 70000.json",
    )
    parser.add_argument(
        "--output", type=str, required=True, help="Output path for dataset.json"
    )
    args = parser.parse_args()
    main(args.json_dir, args.output)
