import os
from tqdm import tqdm
import random


def open_data(data_suffixes):
    lines = []
    for index, data_suffix in tqdm(enumerate(data_suffixes)):
        with open(os.path.join(data_dir, dataset, 'sentiment.{}'.format(data_suffix)), 'r') as file:
            lines_to_process = file.readlines()
        lines_with_sentiments = ['__label__{}\t{}'.format(index, line) for line in lines_to_process]
        lines += lines_with_sentiments
    random.shuffle(lines)
    return lines


if __name__ == '__main__':
    dataset = 'amazon'
    max_length = 43
    data_dir = '.'
    data_suffixes = ['dev.0', 'dev.1', 'train.0', 'train.1', 'test.1', 'test.0']

    dev_lines = open_data(['dev.0', 'dev.1'])
    with open(os.path.join(data_dir, dataset, 'sentiment.{}'.format('{}_for_fastText.txt'.format('dev'))), 'w') as file:
        file.writelines(dev_lines)

    train_lines = open_data(['train.0', 'train.1'])
    with open(os.path.join(data_dir, dataset, 'sentiment.{}'.format('{}_for_fastText.txt'.format('train'))),
              'w') as file:
        file.writelines(dev_lines)

    test_lines = open_data(['test.0', 'test.1'])
    with open(os.path.join(data_dir, dataset, 'sentiment.{}'.format('{}_for_fastText.txt'.format('test'))),
              'w') as file:
        file.writelines(dev_lines)
