import os
import json

with open('gpt4score/filter_data.json', 'r', encoding='utf-8') as f:
    data = json.load(f)

annotations = data['annotations']

selected_annotations = sorted(annotations, key=lambda x: int(x['image_id']))
new = []
for item in selected_annotations:
    image_id = item["image_id"]
    caption = item["caption"]
    new_annotation = {"image_id": image_id, "caption": caption}
    new.append(new_annotation)
new_data = {'annotations': new}
os.makedirs('/path/to/test_dataset/gpt4_200_wo_clus/cc_sbu_align/')
with open('/path/to/test_dataset/gpt4_200_wo_clus/cc_sbu_align/filter_cap.json', 'w', encoding='utf-8') as f:
    json.dump(new_data, f, indent=4, ensure_ascii=False)
