import os
import json
import tqdm
import datetime
import argparse
from pycocotools.coco import COCO


INFO = {
    "description": "COCO format json: Self-train",
    "url": "",
    "version": "1.0",
    "year": 2025,
    "contributor": "Xingyu Feng",
    "date_created": datetime.datetime.now().isoformat()
}

LICENSES = [
    {
        "id": 1,
        "name": "CC-BY 4.0 License",
        "url": "https://creativecommons.org/licenses/by/4.0/deed.en"
    }
]

CATEGORIES = [
    {
        'id': 1,
        'name': 'fg',
        'supercategory': 'fg',
    },
]


if __name__ == "__main__":
    # load model arguments
    parser = argparse.ArgumentParser(description='Generate json files for the self-training')
    parser.add_argument('--detectron2-out-dir', type=str,
                        default='coco_train17_outputs',
                        help='Path to model predictions splits dir')
    parser.add_argument('--coco-ann-path', type=str, default='/data/xxx/datasets/imagenet/annotations/imagenet_val_cls_agnostic_gt.json')
    parser.add_argument('--threshold', type=float, default=0.2,
                        help='Confidence score thresholds')
    args = parser.parse_args()

    self_train_ann_file = "pseudo_labels_self_train/coler_self_train_r1.json"

    res_json_file = "/data/xxx/segmentation/CutLER/coler_eval/imagenet_val/inference_1/coco_instances_results.json"
    # res_json_file = "/data/xxx/segmentation/CutLER/coler_train/imagenet_val/inference/coco_instances_results.json"

    with open(res_json_file, "r") as f:
        predictions = json.load(f)

    new_anns = []
    ann_id = 1
    for id, ann in enumerate(tqdm.tqdm(predictions, desc='Filtering low-confidence predictions')):
        if ann['score'] >= args.threshold:
            ann['id'] = ann_id
            ann['area'] = ann['bbox'][-1] * ann['bbox'][-2]
            ann['iscrowd'] = 0
            ann['width'] = ann['segmentation']['size'][0]
            ann['height'] = ann['segmentation']['size'][1]
            new_anns.append(ann)
            ann_id += 1

    ann_coco = COCO(args.coco_ann_path)

    new_dataset = {
        "info": INFO,
        # "licenses": ann_coco.dataset['licenses'],
        "licenses": LICENSES,
        "categories": CATEGORIES,
        "images": ann_coco.dataset['images'],
        "annotations": new_anns
    }

    # save annotation file
    with open(self_train_ann_file, "w") as f:
        json.dump(new_dataset, f, indent=2)
        print(f"Dump {self_train_ann_file}")

    print("Done: {} images; {} anns.".format(len(new_dataset['images']), len(new_dataset['annotations'])))
