import copy
import os
import json
import pandas as pd

base_path = ''

# Since loading the KB everytime we debug is time-consuming.
# Hence, we make another KB for debugging, which is very small.
if __name__ == '__main__':

    train_query_df  = pd.read_csv(os.path.join(base_path, 'train_clean.csv'))[:100]
    val_query_df = pd.read_csv(os.path.join(base_path, 'val_clean.csv'))[:100]
    document_kb = json.load(open(os.path.join(base_path, 'encyclopedic_kb_wiki/encyclopedic_kb_wiki_cleaned_table.json'), 'r'))

    debug_doc_kb = {}
    for idx, row_data in train_query_df.iterrows():
        debug_doc_kb[row_data['wikipedia_url']] = copy.deepcopy(document_kb[row_data['wikipedia_url']])

    for idx, row_data in val_query_df.iterrows():
        debug_doc_kb[row_data['wikipedia_url']] = copy.deepcopy(document_kb[row_data['wikipedia_url']])

    with open(os.path.join(base_path, 'encyclopedic_kb_wiki/encyclopedic_kb_wiki_cleaned_table_debug.json'), 'w') as f:
        json.dump(debug_doc_kb, f)
