import pandas as pd
import os
import json

if __name__ == '__main__':
    use_16 = True
    suffix = 15 if use_16 else 16
    folder_path = 'wmt15' if use_16 else 'wmt16'
    langs = ['cs-en', 'de-en', 'ru-en', 'fi-en'] if use_16 else ['cs-en', 'de-en', 'ru-en', 'fi-en', 'ro-en', 'tr-en']
    for lang in langs:
        with open(os.path.join('data', folder_path, 'DAseg.newstest20{}.human.{}'.format(suffix, lang)), 'r') as file:
            lines_human = file.readlines()

        with open(os.path.join('data', folder_path, 'DAseg.newstest20{}.mt-system.{}'.format(suffix, lang)),
                  'r') as file:
            lines_system = file.readlines()

        with open(os.path.join('data', folder_path, 'DAseg.newstest20{}.reference.{}'.format(suffix, lang)),
                  'r') as file:
            lines_refs = file.readlines()

        assert len(lines_human) == len(lines_system)
        assert len(lines_human) == len(lines_refs)

        dic_lines = {}
        for i in range(len(lines_human)):
            dic_lines['{}'.format(i)] = {}
            dic_lines['{}'.format(i)]['system'] = {}
            dic_lines['{}'.format(i)]['system'][folder_path] = {}
            dic_lines['{}'.format(i)]['references_sentences'] = lines_refs[i]
            dic_lines['{}'.format(i)]['system'][folder_path]['generated_sentence'] = lines_system[i]
            dic_lines['{}'.format(i)]['system'][folder_path]['scores'] = {
                'human': float(lines_human[i].replace('\n', ''))}

        with open(os.path.join('data', folder_path, '{}_formated.json'.format(lang)), 'w') as file:
            json.dump(dic_lines, file)
