import os
import seaborn as sns
import pandas as pd
import re

dir_mae_e_by_te = 'mae_e_by_time_event'
dir_mae_e_by_et = 'mae_e_data'
datasets = ['bookorder', 'retweet']


selected_dataset = datasets[0]

def parse_mark_and_q_values_from_a_file(mae_e_by_te_infomation_file):
    f = open(mae_e_by_te_infomation_file, 'r')
    data = list(f)
    f.close()

    dict_te = {}

    number_of_marks = int(re.findall(r'\d*\smarks', data[0])[0].split(' ')[0])
    for idx, record in enumerate(data[2::2]):
        parsed_q_values = re.findall(r'Q\d*\:\s\d*\.\d*', record)
        record = {}
        for each_q_value in parsed_q_values:
            q_label, value = map(lambda str: str.strip(), each_q_value.split(':'))
            record[q_label] = float(value)
        
        dict_te[f'mark {idx}'] = record
    
    assert len(dict_te) == number_of_marks
    return dict_te, number_of_marks


mae_e_by_te_infomation_file = os.path.join(dir_mae_e_by_te, f'{selected_dataset}_mae_e_by_marks.txt')
dict_te, number_of_marks_te = parse_mark_and_q_values_from_a_file(mae_e_by_te_infomation_file)
mae_e_by_et_infomation_file = os.path.join(dir_mae_e_by_et, f'{selected_dataset}_mae_e_by_marks.txt')
dict_et, number_of_marks_et = parse_mark_and_q_values_from_a_file(mae_e_by_et_infomation_file)
assert number_of_marks_te == number_of_marks_et

df_te = pd.DataFrame.from_dict(dict_te)
df_et = pd.DataFrame.from_dict(dict_et)
df = pd.concat((df_te, df_et))
df = df.reset_index()
df['Procedure'] = ['TE'] * 3 + ['ET'] * 3
df = pd.melt(df, id_vars = ['index', 'Procedure'], value_vars = ['mark 0', 'mark 1'])
print('test')