import pandas as pd
data = open("cath-classification-data/cath-domain-list-S35.txt").readlines()
Entry = []
EC_number = []
Sequence = []

seq_data = open("sequence-data/cath-domain-seqs-S35.fa").read().split(">")
seq_data_dict = {}

for entry in seq_data:
    if len(entry) > 0:
        head, seq = entry.strip().split("\n")
        id_ = head.split("|")[-1].split("/")[0]
        seq_data_dict[id_] = seq


for row in data:
    split_row = list(filter(lambda x: len(x) > 0, row.split(" ")))
    Entry.append(split_row[0])
    EC_number.append('.'.join(split_row[1:5]))
    Sequence.append(seq_data_dict[split_row[0]])


df = pd.DataFrame({
    "Entry": Entry,
    "EC number": EC_number,
    "Sequence": Sequence
})

df.to_csv("cath_S35.csv", sep="\t", index=False)