import os
import json

# — Configuration —
# Base data directory
DATA_DIR       = "/fs/scratch/PAS2099/Jiacheng/place365/data"
# Output directory for JSON
OUTPUT_DIR     = "/fs/scratch/PAS2099/Jiacheng/place365/output"
# Make sure the output directory exists
os.makedirs(OUTPUT_DIR, exist_ok=True)
# Folder containing validation images
VAL_IMG_DIR    = os.path.join(DATA_DIR, "val_large")
# Mapping file: category string ↔ index
CAT_FILE       = os.path.join(DATA_DIR, "categories_places365.txt")
# Validation list: image filename ↔ numeric label
VAL_LIST_FILE  = os.path.join(DATA_DIR, "places365_val.txt")
# Output JSON path
OUT_JSON       = os.path.join(OUTPUT_DIR, "places365_val_data.json")
# — End Configuration —

def main():
    # 1. Load mapping from index -> raw category string (e.g. "/g/greenhouse/indoor")
    idx2cat = {}
    with open(CAT_FILE, "r") as f:
        for line in f:
            line = line.strip()
            if not line:
                continue
            raw_cat, idx_str = line.split()
            idx2cat[int(idx_str)] = raw_cat

    # 2. Load validation list: filename -> numeric label index
    val2idx = {}
    with open(VAL_LIST_FILE, "r") as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) != 2:
                continue
            filename, lbl_str = parts
            val2idx[filename] = int(lbl_str)

    # 3. Traverse validation images and build JSON records
    records = []
    for fname in sorted(os.listdir(VAL_IMG_DIR)):
        # Only process image files
        if not fname.lower().endswith((".jpg", ".jpeg", ".png")):
            continue
        # Skip if filename not listed
        if fname not in val2idx:
            continue

        idx = val2idx[fname]
        raw_cat = idx2cat.get(idx)
        if raw_cat is None:
            continue

        # Strip the leading "/<letter>/" prefix, keep the rest
        # e.g. "/g/greenhouse/indoor" -> "greenhouse/indoor"
        cleaned_cat = raw_cat.lstrip("/").split("/", 1)[1]

        records.append({
            "image_name": fname,
            "label": cleaned_cat
        })

    # 4. Write all records to JSON file
    with open(OUT_JSON, "w") as out_f:
        json.dump(records, out_f, indent=2, ensure_ascii=False)

    print(f"Generated JSON with {len(records)} entries at:\n  {OUT_JSON}")

if __name__ == "__main__":
    main()
