import json
import re
import pandas as pd
import yaml

with open('config.yml', 'r') as f:
    config = yaml.safe_load(f)

df = pd.DataFrame(columns=['worker_id', 'question_text', 'model1', 'model2', 'eval_model', 'fluency', 'helpfulness', 'ease', 'helpfulness_freetext', 'no_of_turns', 'accuracy'])

conv_file_name = '../results/conversation_{model1}_{model2}{persona}_prompt-1.csv'.format(
        model1=config['model1'],
        model2=config['model2'],
        persona='_'+config['persona'] if 'general'!=config['persona'] else ''
        )
conv_data = pd.read_csv(conv_file_name)

if config['persona'] == config['conv_persona']:
#    file_name = '../results/predictions_{model1}_{model2}_{eval_model}.json'.format(
    file_name = '../results/predictions_{model1}_{model2}_{eval_model}{persona}_prompt-1.json'.format(
            model1=config['model1'],
            model2=config['model2'],
            eval_model=config['eval_model'],
            persona='_'+config['persona'] if 'general'!=config['persona'] else ''
            )
else:
    file_name = '../results/predictions_{model1}{conv_persona}_{model2}_{eval_model}{persona}_prompt-1.json'.format(
        model1=config['model1'],
        model2=config['model2'],
        eval_model=config['eval_model'],
        persona='_'+config['persona'] if 'general'!=config['persona'] else '',
        conv_persona='_'+config['conv_persona'] if 'general'!=config['conv_persona'] else '',
        )
with open(file_name, 'r') as f:
    predictions = json.load(f)

def parse_line(line):
    if ':' in line:
        match = re.search(' \((\d)\)', line)
        if match is not None:
            number = int(match.group(1))
        else:
            number = int(line.split(':')[-1].strip())
    else:
        number = int(line.strip()[-2])
    return number

for idx, pred in enumerate(predictions):
    pred_text = pred['prediction'].strip()
    pred_text = re.sub('\n\n', '\n', pred_text)
    pred_text = pred_text.split('\n')
    try:
        fluency, helpful, ease = pred_text[:3]
        print(fluency)
        print(helpful)
        print(ease)
        fluency = parse_line(fluency)
        helpful = parse_line(helpful)
        ease = parse_line(ease)
    except Exception as e:
        print(pred_text)
        if '' == ''.join(pred_text).strip():
            fluency, helpful, ease = 0, 0, 0
        else:
            fluency = int(input('fluency: '))
            helpful = int(input('helpful: '))
            ease = int(input('ease: '))
    try:
        helpful_text = '\n'.join(pred_text[3:])
        print(helpful_text)
    except Exception as e:
        helpful_text = ''
    if 'helpfulness:' == helpful_text.lower()[:12]:
        helpful_text = helpful_text[12:].strip()
    elif 'helpfulness (free-form): ' == helpful_text.lower()[:25]:
        helpful_text = helpful_text[25:].strip()
    else:
        helpful_text = helpful_text.strip()

    # number of turns
    worker_id = pred['worker_id']
    line = conv_data[conv_data['worker_id'] == worker_id]
    no_of_turns = len(eval(line['lm_responses'].values[0]))
    print(no_of_turns)

    # accuracy
    answer_text = line['answer_text'].values[0]
    golden_dict = {
        line['choice_a'].values[0]: 'A',
        line['choice_b'].values[0]: 'B',
        line['choice_c'].values[0]: 'C',
        line['choice_d'].values[0]: 'D',
    }
    golden = golden_dict[answer_text]
    answer = line['user_answer'].values[0]
    if answer not in ['A', 'B', 'C', 'D']:
        print(golden_dict)
        print(eval(line['user_queries'].values[0]))
        print(line['choice'].values[0])
        answer = input('Answer: ')
    acc = int(golden == answer)

    row = [idx+1, pred['question'], config['model1'], config['model2'], config['eval_model'], fluency, helpful, ease, helpful_text, no_of_turns, acc]
    df.loc[len(df.index)] = row

if config['persona'] == config['conv_persona']:
    #file_name = '../results/predictions_{model1}_{model2}_{eval_model}.csv'.format(
    file_name = '../results/predictions_{model1}_{model2}_{eval_model}{persona}_prompt-1.csv'.format(
            model1=config['model1'],
            model2=config['model2'],
            eval_model=config['eval_model'],
            persona='_'+config['persona'] if 'general'!=config['persona'] else ''
            )
else:
    file_name = '../results/predictions_{model1}{conv_persona}_{model2}_{eval_model}{persona}_prompt-1.csv'.format(
        model1=config['model1'],
        model2=config['model2'],
        eval_model=config['eval_model'],
        persona='_'+config['persona'] if 'general'!=config['persona'] else '',
        conv_persona='_'+config['conv_persona'] if 'general'!=config['conv_persona'] else '',
        )
df.to_csv(file_name, index=False)

