import os
import sys
import json
from pathlib import Path


path_annotations = sys.argv[1]  # example: Path("annotations/captions_val2017.json")
path_images = sys.argv[2]  # Example: Path("val2017")


# load list of path images and annotations
li_images = os.listdir(path_images)

with open(path_annotations, "r") as f:
    d = json.load(f)
annotations = d["annotations"]


li_prompts = []
li_img_idx = []

for i in range(li_images):
    img_idx = annotations[i]["image_id"]
    img_idx = f"{img_idx:0>12}.jpg"

    # check img exist
    if img_idx not in li_images:
        raise ValueError("idx image not in dir")

    prompt = annotations[i]["caption"]

    li_img_idx.append(img_idx)
    li_prompts.append(prompt.replace("\n", ". "))

# save the
with open("li_prompts_val.txt", "w") as f:
    f.write("\n".join(li_prompts))

with open("li_idx_img_val.txt", "w") as f:
    f.write("\n".join(li_img_idx))
