import pandas as pd
data = pd.read_csv("data/split10.csv", sep='\t')

structures = {}
all_entries = []
for i in range(data.shape[0]):
    EC_number = data['EC number'].iloc[i]
    seq = data['Sequence'].iloc[i]
    entry = data['Entry'].iloc[i]
    all_entries.append(entry)
    for EC in EC_number.split(";"):
        EC_split = EC.split(".")
        for i in range(1, 5):
            e = '.'.join(EC_split[:i] + ['-'] * (4 - i))
            if e not in structures:
                structures[e] = {}
            structures[e][entry] = seq

print(structures.keys())

import os
for key in structures:
    # print(structures[key])
    os.makedirs(f"split10_blastdb/lib/EC/{key}", exist_ok=True)
    with open(f"split10_blastdb/lib/EC/{key}/seq.fa", "w") as f:
        for t in structures[key]:
            f.write(f">{t}\n")
            f.write(f"{structures[key][t]}\n")
    with open(f"split10_blastdb/lib/EC/{key}/positive.ids", "w") as f:
        for t in structures[key]:
            f.write(f"{t}\n")
    with open(f"split10_blastdb/lib/EC/{key}/negative.ids", "w") as f:
        for t in all_entries:
            if t not in structures[key]:
                f.write(f"{t}\n")
    # os.system(f"~/Downloads/ECPred/lib/ncbi-blast-2.7.1+/bin/makeblastdb -in split10_blastdb/{key}/seq.fa -dbtype prot -out split10_blastdb/{key}/blast/{key} -parse_seqids")
            