import cv2
import numpy as np
from pycocotools import mask as mask_utils
import json
import os
import sys
sys.path.append(".")
from src.utils.videoreader_pyav import VideoReaderAV

def mask_to_rle(mask_path):
    mask = cv2.imread(mask_path)
    mask = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
    mask = mask.astype(np.uint8)
    mask = mask > 127
    rle = mask_utils.encode(np.asfortranarray(mask))
    rle['counts'] = rle['counts'].decode('utf-8')
    return rle

def rle_to_mask(rle):
    mask = mask_utils.decode(rle)
    mask = mask * 255
    mask = mask.astype(np.uint8)
    return mask

# set root. change it to your own path
video_id = ""
video_path = f"videos/{video_id}.mp4"
annotation_path = f"annotations/{video_id}.json"
output_dir = f"outputs/dataset_samples/{video_id}"
os.makedirs(output_dir, exist_ok=True)

segment_index, character_index, reference_index = 0, 0, 0
video_annotation = json.load(open(annotation_path, "r"))
story_script = video_annotation["segments"][segment_index]
reference_info = story_script["reference images"][character_index]["masks"][reference_index]
rle = reference_info["rle mask"]
frame_index = reference_info["frame index"]

# cut frame
video_reader = VideoReaderAV(video_path)
frame = video_reader.get_batch([frame_index])[0]
frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

# convert rle to mask
mask = rle_to_mask(rle)
mask = np.expand_dims(mask, axis=-1) / 255.0

# get reference image by multiplying with the frame
reference = frame_bgr.astype(np.float32) * mask
white_background = np.ones_like(frame_bgr) * 255
reference = reference + white_background * (1 - mask)
reference = reference.astype(np.uint8)

# save mask and frame
cv2.imwrite(f"{output_dir}/reference_mask.png", mask * 255)
cv2.imwrite(f"{output_dir}/reference_frame.png", frame_bgr)
cv2.imwrite(f"{output_dir}/reference_image.png", reference)


