import pandas as pd
import re
from collections import defaultdict

# Load the CSV file
df = pd.read_csv("bbox_metadata_filtered.csv")

# Step 1: Extract numerical lower bounds from depth_bin and clean
def parse_bin(bin_str):
    match = re.match(r"\((\d+),\s*(\d+)\]", bin_str)
    if match:
        return int(match.group(1)), int(match.group(2))
    return None

df["depth_range"] = df["depth_bin"].apply(lambda x: parse_bin(str(x)))

# Step 2: Group depth bins by label_id
label_to_bins = defaultdict(set)
for _, row in df.iterrows():
    label_to_bins[row["label_id"]].add(row["depth_bin"])

# Step 3: Sort depth bins per label and assign options A-E...
label_to_options = {}
for label_id, bins in label_to_bins.items():
    # Sort by lower bound of the bin
    sorted_bins = sorted(list(bins), key=lambda b: parse_bin(b)[0])
    options = {}
    for idx, bin_val in enumerate(sorted_bins):
        low, high = parse_bin(bin_val)
        letter = chr(ord("A") + idx)
        options[bin_val] = f"{letter}. {low}-{high}"
    label_to_options[label_id] = options

# Step 4: Generate question and correct_answer
questions = []
for _, row in df.iterrows():
    label_id = row["label_id"]
    label_name = row["label_name"]
    new_image_id = row["new_image_id"]
    old_image_id = row["old_image_id"]
    depth_bin = row["depth_bin"]

    option_list = list(label_to_options[label_id].values())
    option_str = ", ".join(option_list)
    question = (
        f"From the camera's perspective, estimate how far the closest point of the "
        f"{label_name} (highlighted by a red box) is from the camera in real-world distance, "
        f"in meters. Select the best answer from the options below: {option_str}"
    )
    correct_answer = label_to_options[label_id][depth_bin]

    questions.append({
        "label_id": label_id,
        "label_name": label_name,
        "new_image_id": new_image_id,
        "old_image_id": old_image_id,
        "depth_bin": depth_bin,
        "question": question,
        "correct_answer": correct_answer
    })

# Step 5: Output to CSV
out_df = pd.DataFrame(questions)
out_df.to_csv("question_output.csv", index=False)

print("Saved as question_output.csv")
