import json


def main(path, write_path, static_path):
    file = open(path, 'r')
    writer_file = open(write_path, 'w')
    static_file = open(static_path, 'r')
    static_attributes = json.loads(static_file.read())
    map = {
        "aggregator": "I-AGG",
        "prediction_window": "I-PRW",
        "filter_operation": "I-FLO",
        "numbers": "I-NUM"
    }

    for line in file:
        line = line.replace('\n', ' \n').split("|")[0]
        annotated_line = convert_to_conll(line)
        annotated_line += '\n'
        
        # new_line = line.replace('\n', ' \n')
        # new_line = new_line.replace(' ', ' NN I-NP O\n')
        # for attribute_name in static_attributes:
        #     attr_list = static_attributes[attribute_name]
        #     for attr in attr_list:
        #         token = map[attribute_name]
        #         words = attr.split()
        #         for word in words:
        #             new_line = new_line.replace( word + ' NN I-NP O', word + ' NN I-NP ' + token)

        # new_line = new_line.replace('average NN I-NP O', 'average NN I-NP I-AGG')
        
        writer_file.write(annotated_line)


def convert_to_conll(query):
    
    tokenized_query = query.split()
    label_map = {
        "atr": "I-ATR",
        "agg": "I-AGG",
        "flt": "I-FLT",
        "flo": "I-FLO",
        "prw": "I-PRW",
        "num": "I-NUM",
        "ent": "I-ENT"
    }
    annotated_query = ""
    

    # For each token in the query, tag it appropriately
    for token in tokenized_query:
        token_type = ""
        # Flag which part of the sentence we're on

        # If the token has a schema flag, tag it properly
        if "vids-" in token:
            # Based on which ID it has, tag the token appropriately
            token_type = token[5:8]
            extracted_token = token[9:]
            extracted_token = extracted_token[:-1]
            answer_text = extracted_token.replace("_", " ").replace("(","").replace(")","").lower().split()
            entity_label = label_map[token_type]
            for text in answer_text:
                annotated_query += text + f' NN I-NP {entity_label}\n'
        else:
            annotated_query += token + ' NN I-NP O\n'
        
    

    return annotated_query

main("src/data/test_data/conll_format/online_delivary_h_annotated.txt", "src/data/test_data/conll_format/online_delivary_annon.txt", "src/data/fine_tuning/static_attributes.json")
# main("src/data/test_data/conll_format/student_perf_wo_annotation.txt", "src/data/test_data/conll_format/student_perf.txt", "src/data/fine_tuning/static_attributes.json")