import copy
import os
import json
import pandas as pd
import random

base_path = ''

# For fast evaluation of retrieval performance with test dataset, we extract randomly extract 100k KB that contains test dataset entities.
if __name__ == '__main__':

    document_kb = json.load(open(os.path.join(base_path, 'viquae_wikipedia/viquae_kb_wiki_200k.json'), 'r'))
    
    empty_doc = []

    for doc_kb in document_kb:
        if len(document_kb[doc_kb]['section_titles']) == 0:
            empty_doc.append(doc_kb)
    
    for doc_kb in empty_doc:
        del document_kb[doc_kb]

    with open(os.path.join(base_path, 'viquae_wikipedia/viquae_kb_wiki_200k.json'), 'w') as f:
        json.dump(document_kb, f)
