from ssl import OP_ENABLE_MIDDLEBOX_COMPAT


entity_dict = {}
relation_dict = {}

entity_count = 0
relation_count = 0

# build dict
new_train = open("train", "w")
with open("train.txt", "r") as f:
    for line in f:
        lhs, rel, rhs = line.strip().split("\t")
        if lhs not in entity_dict:
            entity_dict[lhs] = entity_count
            entity_count += 1
        if rhs not in entity_dict:
            entity_dict[rhs] = entity_count
            entity_count += 1
        if rel not in relation_dict:
            relation_dict[rel] = relation_count
            relation_count += 1
        new_train.write(str(entity_dict[lhs]) + "\t" + str(relation_dict[rel]) + "\t" + str(entity_dict[rhs]) + "\n")
new_train.close()


new_valid = open("valid", "w")
with open("valid.txt", "r") as f:
    for line in f:
        lhs, rel, rhs = line.strip().split("\t")
        if lhs not in entity_dict:
            entity_dict[lhs] = entity_count
            entity_count += 1
        if rhs not in entity_dict:
            entity_dict[rhs] = entity_count
            entity_count += 1
        if rel not in relation_dict:
            relation_dict[rel] = relation_count
            relation_count += 1
        new_valid.write(str(entity_dict[lhs]) + "\t" + str(relation_dict[rel]) + "\t" + str(entity_dict[rhs]) + "\n")
new_valid.close()
        
new_test = open("test", "w")
with open("test.txt", "r") as f:
    for line in f:
        lhs, rel, rhs = line.strip().split("\t")
        if lhs not in entity_dict:
            entity_dict[lhs] = entity_count
            entity_count += 1
        if rhs not in entity_dict:
            entity_dict[rhs] = entity_count
            entity_count += 1
        if rel not in relation_dict:
            relation_dict[rel] = relation_count
            relation_count += 1
        new_test.write(str(entity_dict[lhs]) + "\t" + str(relation_dict[rel]) + "\t" + str(entity_dict[rhs]) + "\n")
new_test.close()

