from collections import Counter
from tqdm import tqdm

if __name__ == '__main__':
    max_length = 115
    for name in tqdm(['valid', 'test', 'train']):
        for index in tqdm([4, 6, 8]):
            file_base = '{}.fader.with_cat.proc.{}0000'
            filename = file_base.format(name, index)
            with open(filename, 'r') as file:
                lines = file.readlines()
            one_lines = []
            zero_lines = []
            for line in lines:
                content = line.split('\t')[0]
                gender = int(line.split('\t')[1])
                if gender != 2 and len(content.split(' ')) < max_length:
                    if gender == 1:
                        one_lines.append(content)
                    else:
                        zero_lines.append(content)

            with open('processed/' + filename + 'processed_female', 'w') as file:
                for line in one_lines:
                    file.write('{}\n'.format(line))
            with open('processed/' + filename + 'processed_male', 'w') as file:
                for line in zero_lines:
                    file.write('{}\n'.format(line))

            print("STATS : -------------")
            print("Filename {}".format(filename))
            print("Before nb_lines : {}".format(len(lines)))
            print("After nb_lines : {}".format(len(one_lines+zero_lines)))
