import os
import json
import random
import shutil
import shortuuid
from collections import defaultdict

# === Configuration ===
random.seed(42)

# Paths to your split outputs
TRAIN_JSON = "/fs/scratch/PAS2099/Jiacheng/Orientation/train_v2/train.json"
SRC_TRAIN_IMG_DIR = "/fs/scratch/PAS2099/Jiacheng/Orientation/train_v2/image"

# Destination for finetune-format data
DEST_IMG_DIR = "/fs/scratch/PAS2099/Jiacheng/Orientation/train_v2/finetune_images"
DEST_JSON    = "/fs/scratch/PAS2099/Jiacheng/Orientation/train_v2/finetune_data.json"

# make sure dest dir exists
os.makedirs(DEST_IMG_DIR, exist_ok=True)

# nine orientations in order
OPTIONS = [
    "front",
    "front right",
    "right",
    "back right",
    "back",
    "back left",
    "left",
    "front left",
    "top"
]

# === Load your train entries ===
with open(TRAIN_JSON, "r") as f:
    train_entries = json.load(f)

finetune_data = []

for entry in train_entries:
    # generate unique id
    uid = shortuuid.uuid()
    # original filename and extension
    orig_fn = os.path.basename(entry["image"])
    _, ext = os.path.splitext(orig_fn)
    # new filename for finetuning
    new_fn = uid + ext
    # copy image to finetune_images
    src_path = os.path.join(SRC_TRAIN_IMG_DIR, orig_fn)
    dst_path = os.path.join(DEST_IMG_DIR, new_fn)
    if os.path.exists(src_path):
        shutil.copy(src_path, dst_path)
    else:
        print(f"Missing image: {src_path}")
        continue

    # build the human question
    obj = entry["category_name"]
    opts = ", ".join(f"{i+1}. {o}" for i,o in enumerate(OPTIONS))
    human_q = (
        f"<image>\n"
        f"What is the orientation of the {obj} in the image? Choose one from below. {opts}"
    )

    # model answer is the label
    gpt_a = entry["label"]

    finetune_data.append({
        "id": uid,
        "image": os.path.join("finetune_images", new_fn),
        "conversations": [
            {"from": "human", "value": human_q},
            {"from": "gpt",   "value": gpt_a}
        ]
    })

# === Save finetune JSON ===
with open(DEST_JSON, "w") as f:
    json.dump(finetune_data, f, indent=2, ensure_ascii=False)

print(f"✅ Generated {len(finetune_data)} samples for finetuning.")
print(f"Images copied ↦ {DEST_IMG_DIR}")
print(f"Finetune JSON saved ↦ {DEST_JSON}")
