import random
import math


def read_centers(centers_file_path):
    # Read centers.txt
    with open(centers_file_path, 'r') as f:
        centers = [line.strip() for line in f]
    return centers


def generate_cpred_data(orig_seqs, clusters, size = -1, lb = -1):

    if size == -1 or lb == -1:
        raise ValueError('Size or lower bound is not defined')

    data_list = []   
    ground_truth_list = []
    counter = 0

    for orig_seq, cluster in zip(orig_seqs, clusters):
        #print('counter: ', counter)
        counter += 1

        random.shuffle(cluster) 
        if len(cluster) > size:
            sublists = [cluster[i:i + size] for i in range(0, len(cluster), size)]
            
            for sublist in sublists:
                if len(sublist) >= lb:
                    data_example = '|'.join(sublist) + ':' + orig_seq
                    data_list.append(data_example)
                    ground_truth_list.append(orig_seq)

        else:
            if len(cluster) >= lb:
                data_example = '|'.join(cluster) + ':' + orig_seq
                data_list.append(data_example)
                ground_truth_list.append(orig_seq)

    return data_list, ground_truth_list