#%%
import scipy.io
import json
import os

mat_file_path = "IIIT5K/IIIT5K/testCharBound.mat"
output_json_path = "IIIT5K/IIIT5K/testCharBound.json"

mat_data = scipy.io.loadmat(mat_file_path)

def convert(obj):
    if isinstance(obj, bytes):
        return obj.decode("utf-8")
    if isinstance(obj, (list, tuple)):
        return [convert(i) for i in obj]
    if hasattr(obj, 'tolist'):
        return convert(obj.tolist())
    return obj

clean_data = {k: convert(v) for k, v in mat_data.items() if not k.startswith("__")}

with open(output_json_path, "w", encoding="utf-8") as f:
    json.dump(clean_data, f, indent=2)

print(f"Saved to {output_json_path}")

#%%

import json
import os
import cv2
import numpy as np
import random

char_jsons = {
    "train": "IIIT5K/IIIT5K/trainCharBound.json",
    "test": "IIIT5K/IIIT5K/testCharBound.json"
}
image_root = "IIIT5K/IIIT5K"
output_root = "char_bound_output"
os.makedirs(output_root, exist_ok=True)

resize_shape = (384, 384)
samples_per_split = 30
random.seed(42)

def merge_char_boxes(char_boxes):
    x1s = [x for x, y, w, h in char_boxes]
    y1s = [y for x, y, w, h in char_boxes]
    x2s = [x + w for x, y, w, h in char_boxes]
    y2s = [y + h for x, y, w, h in char_boxes]
    x_min = min(x1s)
    y_min = min(y1s)
    x_max = max(x2s)
    y_max = max(y2s)
    return [x_min, y_min, x_max - x_min, y_max - y_min]

for split_name, json_path in char_jsons.items():
    with open(json_path, "r") as f:
        data = json.load(f)

    key = "trainCharBound" if "train" in split_name else "testCharBound"
    entries = data[key][0]

    print(f"{split_name} split: total available entries = {len(entries)}")
    selected_entries = random.sample(entries, min(samples_per_split, len(entries)))
    save_dir = os.path.join(output_root, f"{split_name}_samples")
    os.makedirs(save_dir, exist_ok=True)

    for idx, entry in enumerate(selected_entries):
        image_path = entry[0][0]
        word = entry[1][0]
        char_boxes = entry[2]
        full_image_path = os.path.join(image_root, image_path)

        if not os.path.exists(full_image_path):
            print(f"Missing image: {full_image_path}")
            continue

        img = cv2.imread(full_image_path)
        if img is None:
            print(f"Failed to load: {full_image_path}")
            continue

        orig_h, orig_w = img.shape[:2]
        scale_w = resize_shape[0] / orig_w
        scale_h = resize_shape[1] / orig_h
        img_resized = cv2.resize(img, resize_shape)

        x, y, w, h = merge_char_boxes(char_boxes)
        x = int(x * scale_w)
        y = int(y * scale_h)
        w = int(w * scale_w)
        h = int(h * scale_h)
        area = w * h

        padded_img = np.zeros((414, 384, 3), dtype=np.uint8)
        padded_img[:384, :, :] = img_resized

        cv2.rectangle(padded_img, (x, y), (x + w, y + h), (0, 0, 255), 2)
        if word:
            cv2.putText(padded_img, word, (x, max(y - 5, 15)),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
        cv2.putText(padded_img, f"Area: {area:.2f}", (10, 404),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)

        save_name = f"{idx:02d}_{os.path.basename(image_path)}"
        out_path = os.path.join(save_dir, save_name)
        cv2.imwrite(out_path, padded_img)
        print(f"Saved: {out_path} | word: '{word}' | area: {area:.2f}")

print("Done. 30 samples saved per split with seed = 42.")

#%%
import os
import json
import cv2
import numpy as np
import random

# === Config ===
char_jsons = {
    "train": "IIIT5K/IIIT5K/trainCharBound.json",
    "test": "IIIT5K/IIIT5K/testCharBound.json"
}
image_root = "IIIT5K/IIIT5K"
output_root = "char_bound_output"
image_output_dir = os.path.join(output_root, "image")
os.makedirs(image_output_dir, exist_ok=True)

resize_shape = (384, 384)
samples_per_split = 30
random.seed(42)
pad_ratio = 0.05  # 5% padding

# === Merge character boxes to word box
def merge_char_boxes(char_boxes):
    x1s = [x for x, y, w, h in char_boxes]
    y1s = [y for x, y, w, h in char_boxes]
    x2s = [x + w for x, y, w, h in char_boxes]
    y2s = [y + h for x, y, w, h in char_boxes]
    x_min = min(x1s)
    y_min = min(y1s)
    x_max = max(x2s)
    y_max = max(y2s)
    return [x_min, y_min, x_max - x_min, y_max - y_min]

# === Start processing
records = []
counter = 0

for split_name, json_path in char_jsons.items():
    with open(json_path, "r") as f:
        data = json.load(f)

    key = "trainCharBound" if "train" in split_name else "testCharBound"
    entries = data[key][0]

    print(f"{split_name} split: total available entries = {len(entries)}")
    selected_entries = random.sample(entries, min(samples_per_split, len(entries)))

    for entry in selected_entries:
        image_path = entry[0][0]
        word = entry[1][0]
        char_boxes = entry[2]
        full_image_path = os.path.join(image_root, image_path)

        if not os.path.exists(full_image_path):
            print(f"Missing image: {full_image_path}")
            continue

        img = cv2.imread(full_image_path)
        if img is None:
            print(f"Failed to load: {full_image_path}")
            continue

        orig_h, orig_w = img.shape[:2]
        scale_w = resize_shape[0] / orig_w
        scale_h = resize_shape[1] / orig_h

        # Merge character boxes
        x, y, w, h = merge_char_boxes(char_boxes)

        # Add padding
        pad_w = w * pad_ratio
        pad_h = h * pad_ratio
        x1 = max(int(round(x - pad_w)), 0)
        y1 = max(int(round(y - pad_h)), 0)
        x2 = min(int(round(x + w + pad_w)), img.shape[1] - 1)
        y2 = min(int(round(y + h + pad_h)), img.shape[0] - 1)
        area = (x2 - x1) * (y2 - y1)

        # Draw and save
        img_copy = img.copy()
        cv2.rectangle(img_copy, (x1, y1), (x2, y2), (0, 0, 255), 2)

        filename = f"{os.path.splitext(os.path.basename(image_path))[0]}_{counter}.jpg"
        out_path = os.path.join(image_output_dir, filename)
        cv2.imwrite(out_path, img_copy)

        records.append({
            "image_path": f"image/{filename}",
            "word": word,
            "area": round(area, 2)
        })

        print(f"Saved: {filename} | word: '{word}' | area: {area:.2f}")
        counter += 1

# === Save metadata
with open(os.path.join(output_root, "metadata.json"), "w") as f:
    json.dump(records, f, indent=2)

print(f"\n✅ Done. Total saved bbox images: {counter}")

#%%
import os
import json
import cv2
import numpy as np

# === Config ===
char_jsons = {
    "train": "IIIT5K/IIIT5K/trainCharBound.json",
    "test": "IIIT5K/IIIT5K/testCharBound.json"
}
image_root = "IIIT5K/IIIT5K"
output_root = "char_bound_output"
image_output_dir = os.path.join(output_root, "image")
os.makedirs(image_output_dir, exist_ok=True)

resize_shape = (384, 384)  # for area scaling only
records = []
counter = 0

# === Merge character boxes
def merge_char_boxes(char_boxes):
    x1s = [x for x, y, w, h in char_boxes]
    y1s = [y for x, y, w, h in char_boxes]
    x2s = [x + w for x, y, w, h in char_boxes]
    y2s = [y + h for x, y, w, h in char_boxes]
    x_min = min(x1s)
    y_min = min(y1s)
    x_max = max(x2s)
    y_max = max(y2s)
    return x_min, y_min, x_max - x_min, y_max - y_min

# === Process all data
for split_name, json_path in char_jsons.items():
    with open(json_path, "r") as f:
        data = json.load(f)

    key = "trainCharBound" if "train" in split_name else "testCharBound"
    entries = data[key][0]
    print(f"Processing {split_name} split: total entries = {len(entries)}")

    for entry in entries:
        image_path = entry[0][0]
        word = entry[1][0]
        char_boxes = entry[2]
        full_image_path = os.path.join(image_root, image_path)

        if not os.path.exists(full_image_path):
            print(f"Missing image: {full_image_path}")
            continue

        img = cv2.imread(full_image_path)
        if img is None:
            print(f"Failed to load: {full_image_path}")
            continue

        orig_h, orig_w = img.shape[:2]
        scale_w = resize_shape[0] / orig_w
        scale_h = resize_shape[1] / orig_h

        # Merge char boxes into word bbox
        x, y, w, h = merge_char_boxes(char_boxes)

        # Area under (384, 384) scale
        scaled_area = (w * scale_w) * (h * scale_h)

        # Draw red bbox on original image
        x1 = int(round(x))
        y1 = int(round(y))
        x2 = int(round(x + w))
        y2 = int(round(y + h))

        img_copy = img.copy()
        cv2.rectangle(img_copy, (x1, y1), (x2, y2), (0, 0, 255), 2)

        filename = f"{os.path.splitext(os.path.basename(image_path))[0]}_{counter}.jpg"
        out_path = os.path.join(image_output_dir, filename)
        cv2.imwrite(out_path, img_copy)

        records.append({
            "image_path": f"image/{filename}",
            "word": word,
            "area": round(scaled_area, 2)
        })

        counter += 1
        if counter % 500 == 0:
            print(f"Processed {counter} word boxes...")

# === Save metadata
with open(os.path.join(output_root, "metadata.json"), "w") as f:
    json.dump(records, f, indent=2)

print(f"\n✅ Done. Total word boxes saved: {counter}")

#%%
import json
import os
import uuid
import shutil
import random

input_json_path = "char_bound_output/metadata.json"
source_image_root = "char_bound_output/image"
output_root = "IIIT"
random.seed(42)

train_img_dir = os.path.join(output_root, "train/images")
val_img_dir = os.path.join(output_root, "val/images")
os.makedirs(train_img_dir, exist_ok=True)
os.makedirs(val_img_dir, exist_ok=True)

with open(input_json_path, "r") as f:
    all_data = json.load(f)

# Step 1: sample 2000
sampled_data = random.sample(all_data, min(2000, len(all_data)))

# Step 2: 80/20 split
random.shuffle(sampled_data)
split_index = int(0.8 * len(sampled_data))
train_data = sampled_data[:split_index]
val_data = sampled_data[split_index:]

train_json = []
val_json = []
val_ans_json = []
missing_images = []

# Train processing
for item in train_data:
    img_path = os.path.join(source_image_root, os.path.basename(item["image_path"]))
    new_id = str(uuid.uuid4())
    new_img_name = new_id + ".jpg"
    dest_path = os.path.join(train_img_dir, new_img_name)

    if os.path.exists(img_path):
        shutil.copy(img_path, dest_path)
        train_json.append({
            "id": new_id,
            "image": f"IIIT5K/train/images/{new_img_name}",
            "conversations": [
                {
                    "from": "human",
                    "value": "<image>\nwhat is written in the red bounding box in the image?"
                },
                {
                    "from": "gpt",
                    "value": item["word"]
                }
            ]
        })
    else:
        missing_images.append(img_path)

# Val processing
for item in val_data:
    img_path = os.path.join(source_image_root, os.path.basename(item["image_path"]))
    new_id = str(uuid.uuid4())
    new_img_name = new_id + ".jpg"
    dest_path = os.path.join(val_img_dir, new_img_name)

    if os.path.exists(img_path):
        shutil.copy(img_path, dest_path)
        val_json.append({
            "question_id": new_id,
            "image": new_img_name,
            "category": "default",
            "text": "<image>\nwhat is written in the red bounding box in the image?",
            "id": new_id
        })
        val_ans_json.append({
            "question_id": new_id,
            "prompt": "<image>\nwhat is written in the red bounding box in the image?",
            "text": item["word"],
            "answer_id": None,
            "model_id": None,
            "metadata": {}
        })
    else:
        missing_images.append(img_path)

with open(os.path.join(output_root, "train/train.json"), "w") as f:
    json.dump(train_json, f, indent=2)

with open(os.path.join(output_root, "val/val.json"), "w") as f:
    json.dump(val_json, f, indent=2)

with open(os.path.join(output_root, "val/val_ans.json"), "w") as f:
    json.dump(val_ans_json, f, indent=2)

if missing_images:
    with open("missing_images.txt", "w") as f:
        for path in missing_images:
            f.write(path + "\n")
