import random

from utils.data_functions import load_data_from_file, write_data_to_file
from utils.helper_functions import filter_string

save_flag = True
cluster_case = 'SC'

def filter_file(cpred_list):
    for i,cpred_example in enumerate(cpred_list):
        prompt = cpred_example.split(':')[0]
        reads = prompt.split('|')
        for j, read in enumerate(reads):
            reads[j] = filter_string(read)

        cpred_list[i] = '|'.join(reads) + ':' + cpred_example.split(':')[1]

    return cpred_list

starcode_test_cpred = load_data_from_file(f'data/starcode_test_cpred_data_{cluster_case}.txt')
train_cpred = load_data_from_file(f'data/train_data_{cluster_case}.txt')
val_cpred = load_data_from_file(f'data/val_data_{cluster_case}.txt')

starcode_test_cpred = filter_file(starcode_test_cpred)
write_data_to_file(f'data/starcode_test_cpred_data_{cluster_case}.txt', starcode_test_cpred)

train_cpred = filter_file(train_cpred)
write_data_to_file(f'data/train_data_{cluster_case}.txt', train_cpred)

val_cpred = filter_file(val_cpred)
write_data_to_file(f'data/val_data_{cluster_case}.txt', val_cpred)