#%%
import os
import numpy as np
from PIL import Image
from tqdm import tqdm

processor_means = [
    [0.48145466, 0.4578275, 0.40821073],
    [0.48145466, 0.4578275, 0.40821073],
    [0.485, 0.456, 0.406],
    [0.485, 0.456, 0.406],
    [0.5, 0.5, 0.5],
    [0.48145466, 0.4578275, 0.40821073],
    [0.485, 0.456, 0.406],
    [0.5, 0.5, 0.5],
    [0.5, 0.5, 0.5]
]
mean_array = np.array(processor_means)
average_mean = np.mean(mean_array, axis=0)
background_color = tuple(int(x * 255) for x in average_mean)  # e.g. (123, 116, 104)

def remove_padding_precise(img: Image.Image, bg_color=(123, 116, 104), tol=3) -> Image.Image:
    np_img = np.array(img)
    mask = np.abs(np_img - np.array(bg_color).reshape(1, 1, 3)) > tol
    mask = np.any(mask, axis=2) 

    if not mask.any():
        return img 

    y_indices, x_indices = np.where(mask)
    left, right = x_indices.min(), x_indices.max() + 1
    top, bottom = y_indices.min(), y_indices.max() + 1
    return img.crop((left, top, right, bottom))


def process_folder(input_dir, output_dir):
    os.makedirs(output_dir, exist_ok=True)
    image_files = [
        f for f in os.listdir(input_dir)
        if f.lower().endswith(('.jpg', '.png'))
    ]

    for fname in tqdm(image_files, desc=f"Processing {input_dir}"):
        in_path = os.path.join(input_dir, fname)
        out_path = os.path.join(output_dir, fname)

        try:
            img = Image.open(in_path).convert("RGB")
            cropped = remove_padding_precise(img, background_color)
            cropped.save(out_path)
        except Exception as e:
            print(f"[ERROR] Failed on {fname}: {e}")

input_folder = "combined/train/images"
output_folder = "new/train/images"
process_folder(input_folder, output_folder)

#%%
import json
import random
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import cv2
import os

json_path = "recognition/val/val.json"
image_root = "recognition/val/images"

with open(json_path, "r") as f:
    data = json.load(f)

sample = random.choice(data)

img_name = sample["image"]
img_path = os.path.join(image_root, img_name)
bbox = sample["bbox"]
question = sample["text"]
question = sample["text"]

image = cv2.imread(img_path)
if image is None:
    raise FileNotFoundError(f"Image not found: {img_path}")

image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
h, w = image.shape[:2]
x1, y1 = int(bbox[0] * w), int(bbox[1] * h)
x2, y2 = int(bbox[2] * w), int(bbox[3] * h)

fig, ax = plt.subplots(1, figsize=(10, 6))
ax.imshow(image)
rect = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, edgecolor='red', facecolor='none')
ax.add_patch(rect)
ax.axis("off")
plt.show()
print(question)
val_ans_path = "recognition/val/val_ans.json"
with open(val_ans_path, "r") as f:
    val_ans_data = json.load(f)

question_id = sample["question_id"]
answer = next((item["text"] for item in val_ans_data if item["question_id"] == question_id), None)

print("✅ Answer:\n", answer)

#%%
import os
import json
import random
import cv2
import matplotlib.pyplot as plt
import matplotlib.patches as patches

with open("recognition/train/train.json", "r") as f:
    data = json.load(f)

sample = random.choice(data)

img_path = sample["image"] 
bbox = sample["bbox"]
convs = sample["conversations"]
label = next(c["value"] for c in convs if c["from"] == "gpt")
question = next(c["value"] for c in convs if c["from"] == "human")

image = cv2.imread(img_path)
if image is None:
    raise FileNotFoundError(f"Image not found: {img_path}")

image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
h, w = image.shape[:2]
x1, y1 = int(bbox[0] * w), int(bbox[1] * h)
x2, y2 = int(bbox[2] * w), int(bbox[3] * h)

fig, ax = plt.subplots(1, figsize=(10, 6))
ax.imshow(image)
rect = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, edgecolor='red', facecolor='none')
ax.add_patch(rect)
ax.axis("off")
plt.title(f"Answer: {label}")
plt.show()

print("Question:")
print(question)
print("\nAnswer:")
print(label)
