#%%
import os
from PIL import Image, ImageDraw

image_path = "datasets/PUCPR+_devkit/data/Images/0_Cloudy.jpg"
anno_path = "datasets/PUCPR+_devkit/data/Annotations/0_Cloudy.txt"

img = Image.open(image_path).convert("RGB")
draw = ImageDraw.Draw(img)

with open(anno_path, "r") as f:
    for line in f:
        parts = line.strip().split()
        if len(parts) >= 5:
            x1, y1, x2, y2 = map(int, parts[:4])
            draw.rectangle([x1, y1, x2, y2], outline="red", width=2)

img.save("output.jpg")


#%%
import json

def generate_filtered_summary_json(data, min_threshold=10, max_threshold=20, min_valid_counts=5, count_range=(1, 40)):
    result = {}
    grand_total = 0
    object_count = 0

    for obj, count_dict in data.items():
        obj_info = {}
        obj_total = 0
        valid_count_items = {}

        for count_str, info in count_dict.items():
            try:
                count_i = int(count_str)
                count = int(info["count"])
            except ValueError:
                continue

            if count_range[0] <= count_i <= count_range[1] and count >= min_threshold:
                images_to_add = min(count, max_threshold)
                valid_count_items[count_str] = images_to_add
                obj_total += images_to_add

        if len(valid_count_items) >= min_valid_counts:
            valid_count_items["total"] = obj_total
            result[obj] = valid_count_items
            grand_total += obj_total
            object_count += 1

    result["_total_images"] = grand_total
    result["_total_objects"] = object_count
    return result

with open("car_count_summary_all.json", "r") as f:
    data = json.load(f)

summary = generate_filtered_summary_json(data)

with open("summary_output_5.json", "w") as f:
    json.dump(summary, f, indent=2)

#%%
import json
import os
import shutil
import random
import uuid

json_path = "car_count_summary_all.json"
image_dirs = [
    "datasets/CARPK_devkit/data/Images",
    "datasets/PUCPR+_devkit/data/Images"
]

root_dir = "car"
train_img_dir = os.path.join(root_dir, "train/images")
val_img_dir = os.path.join(root_dir, "val/images")
os.makedirs(train_img_dir, exist_ok=True)
os.makedirs(val_img_dir, exist_ok=True)

with open(json_path, "r") as f:
    all_data = json.load(f)

def generate_filtered_summary_json(data, min_threshold=10, max_threshold=20, min_valid_counts=5, count_range=(1, 40)):
    result = {}
    grand_total = 0
    object_count = 0

    for obj, count_dict in data.items():
        obj_total = 0
        valid_count_items = {}

        for count_str, info in count_dict.items():
            if count_str == "total":
                continue
            try:
                count_i = int(count_str)
                count = int(info["count"])
            except ValueError:
                continue

            if count_range[0] <= count_i <= count_range[1] and count >= min_threshold:
                valid_count_items[count_str] = info["image_ids"][:max_threshold]
                obj_total += len(valid_count_items[count_str])

        if len(valid_count_items) >= min_valid_counts:
            valid_count_items["total"] = obj_total
            result[obj] = valid_count_items
            grand_total += obj_total
            object_count += 1

    result["_total_images"] = grand_total
    result["_total_objects"] = object_count
    return result

filtered_data = generate_filtered_summary_json(all_data)

train_json, val_json, val_ans_json = [], [], []
missing_images = []

def find_and_copy_image(img_id, dest_folder, new_name=None):
    for ext in [".jpg", ".png"]:
        fname = f"{img_id}{ext}"
        for image_dir in image_dirs:
            src_path = os.path.join(image_dir, fname)
            if os.path.exists(src_path):
                dst_name = new_name if new_name else fname
                dst_path = os.path.join(dest_folder, dst_name)
                if not os.path.exists(dst_path):
                    shutil.copy2(src_path, dst_path)
                return dst_name
    print(f"[Missing] {img_id} (tried .jpg and .png) not found.")
    return None

random.seed(42)

for obj, count_dict in filtered_data.items():
    if obj.startswith("_"):
        continue

    for count_str, image_ids in count_dict.items():
        if count_str == "total":
            continue
        count_val = int(count_str)
        sampled = image_ids.copy()
        random.shuffle(sampled)
        split_idx = int(len(sampled) * 0.8)
        train_items = sampled[:split_idx]
        val_items = sampled[split_idx:]

        # === Train set
        for img_id in train_items:
            uid = str(uuid.uuid4())
            copied_name = find_and_copy_image(img_id, train_img_dir, f"{uid}.jpg")
            if copied_name:
                train_json.append({
                    "id": uid,
                    "image": f"car/train/images/{copied_name}",
                    "conversations": [
                        {"from": "human", "value": f"<image>\nhow many {obj} are there in the image?"},
                        {"from": "gpt", "value": str(count_val)}
                    ]
                })
            else:
                missing_images.append(img_id)

        # === Validation set
        for img_id in val_items:
            copied_name = find_and_copy_image(img_id, val_img_dir)
            if copied_name:
                qid = str(uuid.uuid4())
                question = f"<image>\nhow many {obj} are there in the image?"
                val_json.append({
                    "question_id": qid,
                    "image": f"val/images/{copied_name}",
                    "category": "default",
                    "text": question,
                    "id": qid
                })
                val_ans_json.append({
                    "question_id": qid,
                    "prompt": question,
                    "text": str(count_val),
                    "answer_id": None,
                    "model_id": None,
                    "metadata": {}
                })
            else:
                missing_images.append(img_id)

# === Save outputs
os.makedirs(os.path.join(root_dir, "train"), exist_ok=True)
os.makedirs(os.path.join(root_dir, "val"), exist_ok=True)

with open(os.path.join(root_dir, "train/train.json"), "w") as f:
    json.dump(train_json, f, indent=2)

with open(os.path.join(root_dir, "val/val.json"), "w") as f:
    json.dump(val_json, f, indent=2)

with open(os.path.join(root_dir, "val/val_ans.json"), "w") as f:
    json.dump(val_ans_json, f, indent=2)

with open(os.path.join(root_dir, "missing_images.txt"), "w") as f:
    for mid in missing_images:
        f.write(mid + "\n")

print(f"✅ Total train images: {len(train_json)}")
print(f"✅ Total val images: {len(val_json)}")
print(f"🚫 Missing images: {len(missing_images)} (see missing_images.txt)")
