from retrievers import NUSWideRetriever, WebFlickrRetriever
import json
import os.path as osp

DIR = osp.dirname(__file__)

valid_ids = []
with open(osp.join(DIR, "../../data/Flickr_v2/raw/valid_ids")) as file:
    for line in file:
         idx = int(line)
         valid_ids.append(idx)

_, tags0 = NUSWideRetriever.process_annotation(
    osp.abspath(osp.join(DIR, "../../data/WebFlickr/ImageList/Imagelist.txt")), valid_ids,
)
_, tags1 = WebFlickrRetriever.process_annotation(
    osp.abspath(osp.join(DIR, "../../data/WebFlickr/flickrXml/photosCLEF.xml")), valid_ids,
)
_, tags2 = WebFlickrRetriever.process_annotation(
    osp.abspath(osp.join(DIR, "../../data/WebFlickr/flickrXml/photosMIR.xml")), valid_ids,
)
_, tags3 = WebFlickrRetriever.process_annotation(
    osp.abspath(osp.join(DIR, "../../data/WebFlickr/flickrXml/photosPASCAL.xml")), valid_ids,
)

tags = tags0 | tags1 | tags2 | tags3

with open(osp.abspath(osp.join(DIR, "../../data/WebFlickr/tags.json")), "w") as file:
    json.dump(tags, file)

