# #%%
# import json
# from collections import defaultdict

# # === Define the 8 canonical directions ===
# DIRECTIONS = [
#     "front", "front right", "right", "back right",
#     "back", "back left", "left", "front left"
# ]

# # === Load benchmark.json ===
# with open("/fs/scratch/PAS2099/Jiacheng/EgoOrientBench/EgoOrientBench/all_data/EgocentricDataset/train_benchmark/benchmark.json", "r") as f:
#     data = json.load(f)

# # === Build mapping structures: object → direction → image → items ===
# object_dir_to_image_to_items = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
# object_dir_image_sets = defaultdict(lambda: defaultdict(set))

# for item in data:
#     if item["type"] == "general_complex":
#         obj = item["category_name"].strip().lower()
#         label = item["label"].strip().lower()
#         image = item["image"].strip()

#         object_dir_image_sets[obj][label].add(image)
#         object_dir_to_image_to_items[obj][label][image].append(item)

# # === Filter objects that appear in at least 4 directions (no image count constraints) ===
# filtered_data = []
# valid_objects = []
# all_images = set()

# for obj in object_dir_image_sets:
#     label_map = object_dir_image_sets[obj]

#     # Get list of directions that this object covers
#     available_dirs = [d for d in DIRECTIONS if d in label_map]
#     if len(available_dirs) < 4:
#         continue

#     valid_objects.append(obj)

#     # Collect all image items for each available direction
#     for direction in available_dirs:
#         for image in label_map[direction]:
#             all_images.add(image)
#             filtered_data.extend(object_dir_to_image_to_items[obj][direction][image])

# # === Save filtered results to a JSON file ===
# output_path = "/fs/scratch/PAS2099/Jiacheng/EgoOrientBench/output/general_complex_4dir_all_images.json"
# with open(output_path, "w") as f:
#     json.dump(filtered_data, f, indent=2)

# # === Print results summary ===
# print(f"✅ Done! Saved {len(filtered_data)} entries to:\n{output_path}")
# print(f"🎯 Total valid objects (≥4 directions): {len(valid_objects)}")
# print(f"🖼️  Total number of unique images: {len(all_images)}")

# 4 options, 6-18 images per direction
# import json
# from collections import defaultdict

# # === Define the 8 canonical directions ===
# DIRECTIONS = [
#     "front", "front right", "right", "back right",
#     "back", "back left", "left", "front left"
# ]

# # === Load benchmark.json ===
# with open("/fs/scratch/PAS2099/Jiacheng/EgoOrientBench/EgoOrientBench/all_data/EgocentricDataset/train_benchmark/benchmark.json", "r") as f:
#     data = json.load(f)

# # === Build mapping structures: object → direction → image → items ===
# object_dir_to_image_to_items = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
# object_dir_image_sets = defaultdict(lambda: defaultdict(set))

# for item in data:
#     if item["type"] == "general_complex":
#         obj = item["category_name"].strip().lower()
#         label = item["label"].strip().lower()
#         image = item["image"].strip()

#         object_dir_image_sets[obj][label].add(image)
#         object_dir_to_image_to_items[obj][label][image].append(item)

# # === Filter: at least 4 directions, each direction has 6~18 images (truncate if >18)
# filtered_data = []
# valid_objects = []
# all_images = set()

# for obj in object_dir_image_sets:
#     label_map = object_dir_image_sets[obj]

#     # Get list of valid directions for this object
#     available_dirs = [d for d in DIRECTIONS if d in label_map and len(label_map[d]) >= 6]
#     if len(available_dirs) < 4:
#         continue

#     valid_objects.append(obj)

#     for direction in available_dirs:
#         image_list = list(label_map[direction])
#         if len(image_list) > 18:
#             image_list = image_list[:18]  # or use random.sample(image_list, 18)

#         for image in image_list:
#             all_images.add(image)
#             filtered_data.extend(object_dir_to_image_to_items[obj][direction][image])

# # === Save filtered results to JSON file
# output_path = "/fs/scratch/PAS2099/Jiacheng/EgoOrientBench/output/general_complex_4dir_6to18perdir.json"
# with open(output_path, "w") as f:
#     json.dump(filtered_data, f, indent=2)

# # === Print result summary
# print(f"✅ Done! Saved {len(filtered_data)} entries to:\n{output_path}")
# print(f"🎯 Total valid objects (≥4 directions, each with 6–18 images): {len(valid_objects)}")
# print(f"🖼️  Total unique images: {len(all_images)}")

# 4 options, 6-18 images per direction
# import json
# from collections import defaultdict

# # === Define the 8 canonical directions ===
# DIRECTIONS = [
#     "front", "front right", "right", "back right",
#     "back", "back left", "left", "front left"
# ]

# # === Load benchmark.json ===
# with open("/fs/scratch/PAS2099/Jiacheng/EgoOrientBench/EgoOrientBench/all_data/EgocentricDataset/train_benchmark/benchmark.json", "r") as f:
#     data = json.load(f)

# # === Build mapping structures: object → direction → image → items ===
# object_dir_to_image_to_items = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
# object_dir_image_sets = defaultdict(lambda: defaultdict(set))

# for item in data:
#     if item["type"] == "general_complex":
#         obj = item["category_name"].strip().lower()
#         label = item["label"].strip().lower()
#         image = item["image"].strip()

#         object_dir_image_sets[obj][label].add(image)
#         object_dir_to_image_to_items[obj][label][image].append(item)

# # === Filter: at least 4 directions, each direction has 6–24 images (truncate if >24)
# filtered_data = []
# valid_objects = []
# all_images = set()

# for obj in object_dir_image_sets:
#     label_map = object_dir_image_sets[obj]

#     # Get list of valid directions for this object
#     available_dirs = [d for d in DIRECTIONS if d in label_map and len(label_map[d]) >= 6]
#     if len(available_dirs) < 4:
#         continue

#     valid_objects.append(obj)

#     for direction in available_dirs:
#         image_list = list(label_map[direction])
#         if len(image_list) > 24:
#             image_list = image_list[:24]  # or use random.sample(image_list, 24)

#         for image in image_list:
#             all_images.add(image)
#             filtered_data.extend(object_dir_to_image_to_items[obj][direction][image])

# # === Save filtered results to JSON file
# output_path = "/fs/scratch/PAS2099/Jiacheng/EgoOrientBench/output/general_complex_4dir_6to24perdir.json"
# with open(output_path, "w") as f:
#     json.dump(filtered_data, f, indent=2)

# # === Print result summary
# print(f"✅ Done! Saved {len(filtered_data)} entries to:\n{output_path}")
# print(f"🎯 Total valid objects (≥4 directions, each with 6–24 images): {len(valid_objects)}")
# print(f"🖼️  Total unique images: {len(all_images)}")

# 4 options， 4 - 16 images per direction
# import json
# from collections import defaultdict

# # === Define the 8 canonical directions ===
# DIRECTIONS = [
#     "front", "front right", "right", "back right",
#     "back", "back left", "left", "front left"
# ]

# # === Load benchmark.json ===
# with open("/fs/scratch/PAS2099/Jiacheng/EgoOrientBench/EgoOrientBench/all_data/EgocentricDataset/train_benchmark/benchmark.json", "r") as f:
#     data = json.load(f)

# # === Build mapping structures: object → direction → image → items ===
# object_dir_to_image_to_items = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
# object_dir_image_sets = defaultdict(lambda: defaultdict(set))

# for item in data:
#     if item["type"] == "general_complex":
#         obj = item["category_name"].strip().lower()
#         label = item["label"].strip().lower()
#         image = item["image"].strip()

#         object_dir_image_sets[obj][label].add(image)
#         object_dir_to_image_to_items[obj][label][image].append(item)

# # === Filter: at least 4 directions, each direction has 4–16 images (truncate if >16)
# filtered_data = []
# valid_objects = []
# all_images = set()

# for obj in object_dir_image_sets:
#     label_map = object_dir_image_sets[obj]

#     # Get list of valid directions for this object
#     available_dirs = [d for d in DIRECTIONS if d in label_map and len(label_map[d]) >= 4]
#     if len(available_dirs) < 4:
#         continue

#     valid_objects.append(obj)

#     for direction in available_dirs:
#         image_list = list(label_map[direction])
#         if len(image_list) > 16:
#             image_list = image_list[:16]  # or use random.sample(image_list, 16)

#         for image in image_list:
#             all_images.add(image)
#             filtered_data.extend(object_dir_to_image_to_items[obj][direction][image])

# # === Save filtered results to JSON file
# output_path = "/fs/scratch/PAS2099/Jiacheng/EgoOrientBench/output/general_complex_4dir_4to16perdir.json"
# with open(output_path, "w") as f:
#     json.dump(filtered_data, f, indent=2)

# # === Print result summary
# print(f"✅ Done! Saved {len(filtered_data)} entries to:\n{output_path}")
# print(f"🎯 Total valid objects (≥4 directions, each with 4–16 images): {len(valid_objects)}")
# print(f"🖼️  Total unique images: {len(all_images)}")

# 4 options， 10 - 30 images per direction
# import json
# from collections import defaultdict

# # === Define the 8 canonical directions ===
# DIRECTIONS = [
#     "front", "front right", "right", "back right",
#     "back", "back left", "left", "front left"
# ]

# # === Load benchmark.json ===
# with open("/fs/scratch/PAS2099/Jiacheng/EgoOrientBench/EgoOrientBench/all_data/EgocentricDataset/train_benchmark/benchmark.json", "r") as f:
#     data = json.load(f)

# # === Build mapping structures: object → direction → image → items ===
# object_dir_to_image_to_items = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
# object_dir_image_sets = defaultdict(lambda: defaultdict(set))

# for item in data:
#     if item["type"] == "general_complex":
#         obj = item["category_name"].strip().lower()
#         label = item["label"].strip().lower()
#         image = item["image"].strip()

#         object_dir_image_sets[obj][label].add(image)
#         object_dir_to_image_to_items[obj][label][image].append(item)

# # === Filter: at least 4 directions, each direction has 10–30 images (truncate if >30)
# filtered_data = []
# valid_objects = []
# all_images = set()

# for obj in object_dir_image_sets:
#     label_map = object_dir_image_sets[obj]

#     # Get list of valid directions for this object
#     available_dirs = [d for d in DIRECTIONS if d in label_map and len(label_map[d]) >= 10]
#     if len(available_dirs) < 4:
#         continue

#     valid_objects.append(obj)

#     for direction in available_dirs:
#         image_list = list(label_map[direction])
#         if len(image_list) > 30:
#             image_list = image_list[:30]  # or use random.sample(image_list, 30)

#         for image in image_list:
#             all_images.add(image)
#             filtered_data.extend(object_dir_to_image_to_items[obj][direction][image])

# # === Save filtered results to JSON file
# output_path = "/fs/scratch/PAS2099/Jiacheng/EgoOrientBench/output/general_complex_4dir_10to30perdir.json"
# with open(output_path, "w") as f:
#     json.dump(filtered_data, f, indent=2)

# # === Print result summary
# print(f"✅ Done! Saved {len(filtered_data)} entries to:\n{output_path}")
# print(f"🎯 Total valid objects (≥4 directions, each with 10–30 images): {len(valid_objects)}")
# print(f"🖼️  Total unique images: {len(all_images)}")


# import json
# from collections import defaultdict

# # === Define the 8 canonical directions ===
# DIRECTIONS = [
#     "front", "front right", "right", "back right",
#     "back", "back left", "left", "front left"
# ]

# # === Load benchmark.json ===
# with open("/fs/scratch/PAS2099/Jiacheng/EgoOrientBench/EgoOrientBench/all_data/EgocentricDataset/train_benchmark/benchmark.json", "r") as f:
#     data = json.load(f)

# # === Build mapping structures: object → direction → image → items ===
# object_dir_to_image_to_items = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
# object_dir_image_sets = defaultdict(lambda: defaultdict(set))

# for item in data:
#     if item["type"] == "general_complex":
#         obj = item["category_name"].strip().lower()
#         label = item["label"].strip().lower()
#         image = item["image"].strip()

#         object_dir_image_sets[obj][label].add(image)
#         object_dir_to_image_to_items[obj][label][image].append(item)

# # === Filter: at least 4 directions, each direction has 10-40 images (truncate if >40)
# filtered_data = []
# valid_objects = []
# all_images = set()

# for obj in object_dir_image_sets:
#     label_map = object_dir_image_sets[obj]

#     # Get list of valid directions for this object
#     available_dirs = [d for d in DIRECTIONS if d in label_map and len(label_map[d]) >= 10]
#     if len(available_dirs) < 4:
#         continue

#     valid_objects.append(obj)

#     for direction in available_dirs:
#         image_list = list(label_map[direction])
#         if len(image_list) > 40:
#             image_list = image_list[:40]  # or use random.sample(image_list, 40)

#         for image in image_list:
#             all_images.add(image)
#             filtered_data.extend(object_dir_to_image_to_items[obj][direction][image])

# # === Save filtered results to JSON file
# output_path = "/fs/scratch/PAS2099/Jiacheng/EgoOrientBench/output/general_complex_4dir_10to40perdir.json"
# with open(output_path, "w") as f:
#     json.dump(filtered_data, f, indent=2)

# # === Print result summary
# print(f"✅ Done! Saved {len(filtered_data)} entries to:\n{output_path}")
# print(f"🎯 Total valid objects (≥4 directions, each with 10–40 images): {len(valid_objects)}")
# print(f"🖼️  Total unique images: {len(all_images)}")

import json
from collections import defaultdict

# === Define the 8 canonical directions ===
DIRECTIONS = [
    "front", "front right", "right", "back right",
    "back", "back left", "left", "front left"
]

# === Load benchmark.json ===
with open("/fs/scratch/PAS2099/Jiacheng/EgoOrientBench/output/combined_general_complex_fixed.json", "r") as f:
    data = json.load(f)

# === Build mapping structures: object → direction → image → items ===
object_dir_to_image_to_items = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
object_dir_image_sets = defaultdict(lambda: defaultdict(set))

for item in data:
    if item["type"] == "general_complex":
        obj = item["category_name"].strip().lower()
        label = item["label"].strip().lower()
        image = item["image"].strip()

        object_dir_image_sets[obj][label].add(image)
        object_dir_to_image_to_items[obj][label][image].append(item)

# === Filter: at least 4 directions, each direction has 10-50 images (truncate if >50)
filtered_data = []
valid_objects = []
all_images = set()

for obj in object_dir_image_sets:
    label_map = object_dir_image_sets[obj]

    # Get list of valid directions for this object
    available_dirs = [d for d in DIRECTIONS if d in label_map and len(label_map[d]) >= 10]
    if len(available_dirs) < 4:
        continue

    valid_objects.append(obj)

    for direction in available_dirs:
        image_list = list(label_map[direction])
        if len(image_list) > 50:
            image_list = image_list[:50]  # or use random.sample(image_list, 50)

        for image in image_list:
            all_images.add(image)
            filtered_data.extend(object_dir_to_image_to_items[obj][direction][image])

# === Save filtered results to JSON file
output_path = "/fs/scratch/PAS2099/Jiacheng/EgoOrientBench/output/general_complex_combines_4dir_10to50perdir.json"
with open(output_path, "w") as f:
    json.dump(filtered_data, f, indent=2)

# === Print result summary
print(f"✅ Done! Saved {len(filtered_data)} entries to:\n{output_path}")
print(f"🎯 Total valid objects (≥4 directions, each with 10–50 images): {len(valid_objects)}")
print(f"🖼️  Total unique images: {len(all_images)}")