#%%import os
import json
import cv2
import math
import numpy as np

# === Config ===
ocr_jsons = [
    "textvqa_data/TextVQA_Rosetta_OCR_v0.2_train.json",
    "textvqa_data/TextVQA_Rosetta_OCR_v0.2_val.json",
    "textvqa_data/TextVQA_Rosetta_OCR_v0.2_test.json"
]

textvqa_jsons = [
    "textvqa_data/TextVQA_0.5.1_train.json",
    "textvqa_data/TextVQA_0.5.1_val.json"
]

image_dirs = {
    "train": "textvqa_data/train_images",
    "test": "textvqa_data/test_images"
}

resize_size = (384, 384)
area_threshold = 2000
output_dir = f"filtered_bbx_{area_threshold}_output"
output_img_dir = os.path.join(output_dir, "image")
os.makedirs(output_img_dir, exist_ok=True)

# === Helper: build rotated polygon from bbox
def get_rotated_box_from_bbox(bbox, img_w, img_h):
    x = bbox["top_left_x"] * img_w
    y = bbox["top_left_y"] * img_h
    w = bbox["width"] * img_w
    h = bbox["height"] * img_h
    yaw = math.radians(float(bbox.get("yaw", 0.0)))

    dx_w = w * math.cos(yaw)
    dy_w = w * math.sin(yaw)
    dx_h = -h * math.sin(yaw)
    dy_h = h * math.cos(yaw)

    pts = [
        (x, y),
        (x + dx_w, y + dy_w),
        (x + dx_w + dx_h, y + dy_w + dy_h),
        (x + dx_h, y + dy_h)
    ]

    return np.array([[int(px), int(py)] for (px, py) in pts], dtype=np.int32)

# === Step 1: Build image_id -> answer set
imageid_to_answers = {}
for path in textvqa_jsons:
    with open(path, "r") as f:
        for item in json.load(f)["data"]:
            img_id = item["image_id"]
            answers = [a.lower() for a in item["answers"]]
            if img_id not in imageid_to_answers:
                imageid_to_answers[img_id] = set(answers)
            else:
                imageid_to_answers[img_id].update(answers)

# === Step 2: Load OCR data
combined_ocr_data = []
image_id_to_folder = {}

for json_path in ocr_jsons:
    folder = "test" if "test" in json_path.lower() else "train"
    with open(json_path, "r") as f:
        for item in json.load(f)["data"]:
            combined_ocr_data.append(item)
            image_id_to_folder[item["image_id"]] = folder

# === Step 3: Filter + Save
records = []
counter = 0

for entry in combined_ocr_data:
    image_id = entry["image_id"]
    if "ocr_info" not in entry or len(entry["ocr_info"]) == 0:
        continue

    folder = image_id_to_folder.get(image_id, "train")
    image_path = os.path.join(image_dirs[folder], f"{image_id}.jpg")
    if not os.path.exists(image_path):
        continue

    img = cv2.imread(image_path)
    if img is None:
        continue

    img_h, img_w = img.shape[:2]
    answers = imageid_to_answers.get(image_id, set())

    for ann in entry["ocr_info"]:
        word = ann.get("word", "").strip().lower()
        if not word or word == "." or word not in answers:
            continue

        bbox = ann["bounding_box"]
        w = bbox["width"] * resize_size[0]
        h = bbox["height"] * resize_size[1]
        area = w * h
        if area < area_threshold:
            continue

        box = get_rotated_box_from_bbox(bbox, img_w, img_h)

        img_copy = img.copy()
        cv2.polylines(img_copy, [box], isClosed=True, color=(0, 0, 255), thickness=2)

        out_filename = f"{image_id}_{counter}.jpg"
        out_path = os.path.join(output_img_dir, out_filename)
        cv2.imwrite(out_path, img_copy)

        records.append({
            "image_path": f"image/{out_filename}",
            "word": word,
            "area": round(area, 2)
        })

        counter += 1

# === Save metadata
with open(os.path.join(output_dir, "metadata.json"), "w") as f:
    json.dump(records, f, indent=2)

print(f"\n✅ Done! Total saved bbox images: {counter}")



#%%
import json
import os
import uuid
import shutil
import random

input_json_path = "filtered_bbx_2000_output/metadata.json"
source_image_root = "filtered_bbx_2000_output/image"
output_root = "textVQA"
random.seed(42)

train_img_dir = os.path.join(output_root, "train/images")
val_img_dir = os.path.join(output_root, "val/images")
os.makedirs(train_img_dir, exist_ok=True)
os.makedirs(val_img_dir, exist_ok=True)

with open(input_json_path, "r") as f:
    all_data = json.load(f)

random.shuffle(all_data)
split_index = int(0.8 * len(all_data))
train_data = all_data[:split_index]
val_data = all_data[split_index:]

train_json = []
val_json = []
val_ans_json = []
missing_images = []

for item in train_data:
    img_path = os.path.join(source_image_root, os.path.basename(item["image_path"]))
    new_id = str(uuid.uuid4())
    new_img_name = new_id + ".jpg"
    dest_path = os.path.join(train_img_dir, new_img_name)

    if os.path.exists(img_path):
        shutil.copy(img_path, dest_path)
        train_json.append({
            "id": new_id,
            "image": f"textVQA/train/images/{new_img_name}",
            "conversations": [
                {
                    "from": "human",
                    "value": "<image>\nwhat is written in the red bounding box in the image?"
                },
                {
                    "from": "gpt",
                    "value": item["word"]
                }
            ]
        })
    else:
        missing_images.append(img_path)

for item in val_data:
    img_path = os.path.join(source_image_root, os.path.basename(item["image_path"]))
    new_id = str(uuid.uuid4())
    new_img_name = new_id + ".jpg"
    dest_path = os.path.join(val_img_dir, new_img_name)

    if os.path.exists(img_path):
        shutil.copy(img_path, dest_path)
        val_json.append({
            "question_id": new_id,
            "image": new_img_name,
            "category": "default",
            "text": "<image>\nwhat is written in the red bounding box in the image?",
            "id": new_id
        })
        val_ans_json.append({
            "question_id": new_id,
            "prompt": "<image>\nwhat is written in the red bounding box in the image?",
            "text": item["word"],
            "answer_id": None,
            "model_id": None,
            "metadata": {}
        })
    else:
        missing_images.append(img_path)

with open(os.path.join(output_root, "train/train.json"), "w") as f:
    json.dump(train_json, f, indent=2)

with open(os.path.join(output_root, "val/val.json"), "w") as f:
    json.dump(val_json, f, indent=2)

with open(os.path.join(output_root, "val/val_ans.json"), "w") as f:
    json.dump(val_ans_json, f, indent=2)

if missing_images:
    with open("missing_images.txt", "w") as f:
        for path in missing_images:
            f.write(path + "\n")
