#%%
import os
import json
import uuid
import random
import shutil

random.seed(42)

# === Paths ===
LVIS_JSON_PATH = "bbox_and_variance_lvis.json"
O365_JSON_PATH = "bbox_and_variance_object365.json"
LVIS_IMG_DIR = "boxed_images_LVIS"
O365_IMG_DIR = "boxed_images_object365"
OUT_TRAIN_DIR = "color/train/images"
OUT_VAL_DIR = "color/val/images"
os.makedirs(OUT_TRAIN_DIR, exist_ok=True)
os.makedirs(OUT_VAL_DIR, exist_ok=True)

# === Load JSONs ===
with open(LVIS_JSON_PATH, "r") as f:
    lvis_data = json.load(f)
with open(O365_JSON_PATH, "r") as f:
    obj365_data = json.load(f)

# === Collect all records ===
all_items = []

def collect_items(json_data, category, image_dir):
    for fname, boxlist in json_data.items():
        if not boxlist: continue
        rgb = boxlist[0]["mode_rgb"]
        all_items.append({
            "source": category,
            "orig_fname": fname,
            "rgb": rgb,
            "img_dir": image_dir
        })

collect_items(lvis_data, "LVIS", LVIS_IMG_DIR)
collect_items(obj365_data, "object365", O365_IMG_DIR)

# === Shuffle and split ===
random.shuffle(all_items)
split_idx = int(len(all_items) * 0.8)
train_items = all_items[:split_idx]
val_items = all_items[split_idx:]

# === Generate TRAIN JSON ===
train_json = []
for item in train_items:
    img_id = str(uuid.uuid4())
    src_path = os.path.join(item["img_dir"], item["orig_fname"])
    dst_path = os.path.join(OUT_TRAIN_DIR, f"{img_id}.jpg")
    shutil.copy2(src_path, dst_path) 
    train_json.append({
        "id": img_id,
        "category": item["source"],
        "image": f"color/train/images/{img_id}.jpg",
        "conversations": [
            {"from": "human", "value": "<image>\nWhat color is shown within the red bounding box?"},
            {"from": "gpt", "value": str(item["rgb"])}
        ]
    })

# === Generate VAL JSON + VAL_ANS JSON ===
val_json, val_ans_json = [], []
for item in val_items:
    qid = str(uuid.uuid4())
    src_path = os.path.join(item["img_dir"], item["orig_fname"])
    dst_path = os.path.join(OUT_VAL_DIR, f"{qid}.jpg")
    shutil.copy2(src_path, dst_path)
    val_json.append({
        "question_id": qid,
        "image": f"{qid}.jpg",
        "category": item["source"],
        "text": "<image>\nWhat color is shown within the red bounding box?",
        "id": qid
    })
    val_ans_json.append({
        "question_id": qid,
        "prompt": "<image>\nWhat color is shown within the red bounding box?",
        "text": str(item["rgb"]),
        "answer_id": None,
        "model_id": None,
        "metadata": {}
    })

# === Save JSONs ===
with open("color/train/train.json", "w") as f:
    json.dump(train_json, f, indent=2)
with open("color/val/val.json", "w") as f:
    json.dump(val_json, f, indent=2)
with open("color/val/val_ans.json", "w") as f:
    json.dump(val_ans_json, f, indent=2)

print(f"✅ Saved {len(train_json)} train and {len(val_json)} val samples.")


#%%
import json
import random
import cv2
import matplotlib.pyplot as plt
import numpy as np
import os

# === 设置路径 ===
json_path = "color/train/train.json"  
base_image_dir = "."  

with open(json_path, "r") as f:
    data = json.load(f)

random
item = random.choice(data)

img_rel_path = item["image"]
img_path = os.path.join(base_image_dir, img_rel_path)
rgb_str = item["conversations"][1]["value"]
rgb = eval(rgb_str)  # convert "[R, G, B]" → list

# === 加载图像并显示 ===
image = cv2.imread(img_path)
assert image is not None, f"Image not found: {img_path}"
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

# === 创建色块 ===
color_patch = np.ones((100, 100, 3), dtype=np.uint8)
color_patch[:, :] = rgb  # matplotlib 用 RGB，OpenCV 是 BGR

print("RGB value from JSON:", rgb)
print("Pixel value in patch:", color_patch[0, 0].tolist())  # RGB

# === 显示 ===
plt.figure(figsize=(10, 4))
plt.subplot(1, 2, 1)
plt.imshow(image_rgb)
plt.title(f"Image: {os.path.basename(img_path)}")
plt.axis("off")

plt.subplot(1, 2, 2)
plt.imshow(color_patch)
plt.title(f"RGB: {rgb}")
plt.axis("off")

plt.tight_layout()
plt.show()

# %%
import os
import json
import uuid
import random
import shutil
import cv2
import numpy as np
from collections import Counter

random.seed(42)

# === Paths ===
LVIS_JSON_PATH = "bbox_and_variance_lvis.json"
O365_JSON_PATH = "bbox_and_variance_object365.json"
LVIS_IMG_DIR = "boxed_images_LVIS"
O365_IMG_DIR = "boxed_images_object365"
OUT_TRAIN_DIR = "color/train/images"
OUT_VAL_DIR = "color/val/images"
os.makedirs(OUT_TRAIN_DIR, exist_ok=True)
os.makedirs(OUT_VAL_DIR, exist_ok=True)

# === Load JSONs ===
with open(LVIS_JSON_PATH, "r") as f:
    lvis_data = json.load(f)
with open(O365_JSON_PATH, "r") as f:
    obj365_data = json.load(f)

# === Collect all records ===
all_items = []

def collect_items(json_data, category, image_dir):
    for fname, boxlist in json_data.items():
        if not boxlist: continue
        bbox = boxlist[0]["bbox"]
        all_items.append({
            "source": category,
            "orig_fname": fname,
            "bbox": bbox,
            "img_dir": image_dir
        })

collect_items(lvis_data, "LVIS", LVIS_IMG_DIR)
collect_items(obj365_data, "object365", O365_IMG_DIR)

# === Helper: extract most common RGB from bbox ===
def get_most_common_rgb(image_path, bbox):
    img = cv2.imread(image_path)
    if img is None:
        return None
    x1, y1, x2, y2 = bbox
    region = img[y1:y2+1, x1:x2+1]  # BGR
    if region.size == 0:
        return None
    pixels = region.reshape(-1, 3)
    # convert to tuple for counting
    tuples = [tuple(p) for p in pixels]
    most_common_bgr = Counter(tuples).most_common(1)[0][0]
    rgb = list(most_common_bgr[::-1])  # BGR → RGB
    return rgb

# === Shuffle and split ===
random.shuffle(all_items)
split_idx = int(len(all_items) * 0.8)
train_items = all_items[:split_idx]
val_items = all_items[split_idx:]

# === Generate TRAIN JSON ===
train_json = []
for item in train_items:
    img_id = str(uuid.uuid4())
    src_path = os.path.join(item["img_dir"], item["orig_fname"])
    dst_path = os.path.join(OUT_TRAIN_DIR, f"{img_id}.jpg")
    shutil.copy2(src_path, dst_path)
    rgb = get_most_common_rgb(src_path, item["bbox"])
    if rgb is None:
        continue
    train_json.append({
        "id": img_id,
        "category": item["source"],
        "image": f"color/train/images/{img_id}.jpg",
        "conversations": [
            {"from": "human", "value": "<image>\nWhat color is shown within the red bounding box?"},
            {"from": "gpt", "value": str(rgb)}
        ]
    })

# === Generate VAL JSON + VAL_ANS JSON ===
val_json, val_ans_json = [], []
for item in val_items:
    qid = str(uuid.uuid4())
    src_path = os.path.join(item["img_dir"], item["orig_fname"])
    dst_path = os.path.join(OUT_VAL_DIR, f"{qid}.jpg")
    shutil.copy2(src_path, dst_path)
    rgb = get_most_common_rgb(src_path, item["bbox"])
    if rgb is None:
        continue
    val_json.append({
        "question_id": qid,
        "image": f"{qid}.jpg",
        "category": item["source"],
        "text": "<image>\nWhat color is shown within the red bounding box?",
        "id": qid
    })
    val_ans_json.append({
        "question_id": qid,
        "prompt": "<image>\nWhat color is shown within the red bounding box?",
        "text": str(rgb),
        "answer_id": None,
        "model_id": None,
        "metadata": {}
    })

# === Save JSONs ===
with open("color/train/train.json", "w") as f:
    json.dump(train_json, f, indent=2)
with open("color/val/val.json", "w") as f:
    json.dump(val_json, f, indent=2)
with open("color/val/val_ans.json", "w") as f:
    json.dump(val_ans_json, f, indent=2)

print(f"✅ Saved {len(train_json)} train and {len(val_json)} val samples.")

# %%

# %%
