import os
import sys
import numpy as np
import pandas as pd
import json
import markdown

def dump_markdown_mmlu_gsm(ft_json, orig_json):
    table = '| Question | Original Response | Finetuned Response | Answer |\n'
    table += '|---------|-------------------|--------------------|--------|\n'
    table += '|'
    for question in ft_json.keys():
        ft_response, answer = ft_json[question]
        orig_response, _ = orig_json[question]

        question = question.replace('\n', '<br />')
        question = question.replace('|', '')
        question = question.replace('```', '\"\"\"')
        question = question.replace('*', 'x') 

        ft_response = ft_response[0][-1]['content']
        ft_response = ft_response.replace('\n', '<br />')
        ft_response = ft_response.replace('|', '')
        ft_response = ft_response.replace('```', '\"\"\"')
        ft_response = ft_response.replace('*', 'x')

        orig_response = orig_response[0][-1]['content']
        orig_response = orig_response.replace('\n', '<br />')
        orig_response = orig_response.replace('|', '')
        orig_response = orig_response.replace('```', '\"\"\"')
        orig_response = orig_response.replace('*', 'x')
        
        table += f' {question} |'
        table += f' {orig_response} |'
        table += f' {ft_response} |'
        table += f' {answer} |'
        table += '\n'
    return table

def dump_markdown(model_response, gpt4_response):
    table = '| Question | Model Response | GPT-4 Response |\n'
    table += '|---------|-------------------|--------------------|\n'
    table += '|'
    for question in model_response.keys():
        ft_response = model_response[question]
        orig_response = gpt4_response[question]

        question = question.replace('\n', '<br />')
        question = question.replace('|', '')
        question = question.replace('```', '\"\"\"')
        question = question.replace('*', 'x') 

        ft_response = ft_response[0][-1]['content']
        ft_response = ft_response.replace('\n', '<br />')
        ft_response = ft_response.replace('|', '')
        ft_response = ft_response.replace('```', '\"\"\"')
        ft_response = ft_response.replace('*', 'x')

        orig_response = orig_response[0][-1]['content']
        orig_response = orig_response.replace('\n', '<br />')
        orig_response = orig_response.replace('|', '')
        orig_response = orig_response.replace('```', '\"\"\"')
        orig_response = orig_response.replace('*', 'x')
        
        table += f' {question} |'
        table += f' {ft_response} |'
        table += f' {orig_response} |'
        table += '\n'
    return table

def dump_html(ft_json, orig_json, out_path, num_epochs = 6):
    table = dump_markdown(ft_json, orig_json)
    html = '<!DOCTYPE html>\n<html>\n<body>\n'
    html += f'<h2>Alpaca: Finetuned {num_epochs} epochs vs GPT-4</h2>\n'
    html += '<style>\n table, th, td {\n border: 1px solid black;\n}\n</style>\n'
    html += markdown.markdown(table, extensions=['markdown.extensions.tables'])
    html += '</body>\n</html>'
    with open(out_path, 'w') as f:
        f.write(html)
    return html

if __name__ == '__main__':
    model_response = json.load(open('leaderboard_eval/ft-gpt3.5-v2-epochs6-5k_alpaca_eval.json'))
    gpt4_response = json.load(open('leaderboard_eval/gpt4_alpaca_eval.json'))
    out_path = 'html_visualizations/'
    if not os.path.exists(out_path):
        os.makedirs(out_path)
    out_path = os.path.join(out_path, 'outputs.html')
    dump_html(model_response, gpt4_response, out_path)