# G-Zip compression could be low for the wrong reasons, for example: 
# the model might just produce the same kind-of-text over and over but it's not
# really human.

import os
import json
from typing import Union, List
import gzip
import io
import pandas as pd

from genpaths import *

from tqdm import tqdm

def load(path_list, idx):
    path = path_list[idx]
    df = pd.read_json(path, lines=True)
    text = df[path_list[-1]].tolist()
    if isinstance(text[0], list):
        text = [t[0] for t in text]
    return text

def calculate_gzipability(text: list[str]):
    average = 0.
    for t in tqdm(text):
        average += len(gzip.compress(t.encode("utf-8"))) # / len(t.encode("utf-8"))
    average /= len(text)
    return average

def main():
    human = load(HUMAN, 0)
    human_gzip = calculate_gzipability(human)

    machine = load(MACHINE, 0)
    machine_gzip = calculate_gzipability(machine)
    
    llmopt = load(LLMOPT, 0)
    llmopt_gzip = calculate_gzipability(llmopt)

    prompting = load(PROMPTING, 0)
    prompting_gzip = calculate_gzipability(prompting)

    paraphrasing = load(PARAPHRASING, 0)
    paraphrasing_gzip = calculate_gzipability(paraphrasing)

    tinystyler = load(TINYSTYLER, 0)
    tinystyler_gzip = calculate_gzipability(tinystyler)

    ours = load(OURS, 0)
    ours_gzip = calculate_gzipability(ours)
    
    gzipability = {
        "Human": human_gzip,
        "Machine": machine_gzip,
        "LLMOPT": llmopt_gzip,
        "Paraphrasing": paraphrasing_gzip,
        "Prompting": prompting_gzip,
        "TinyStyler": tinystyler_gzip,
        "Ours": ours_gzip
    }
    # print them like you're printing a LaTeX table:
    print("\\begin{table}[h]")
    print("\\centering")
    print("\\begin{tabular}{|l|l|}")
    print("\\hline")
    print("\\textbf{Method} & \\textbf{Gzipability} \\\\")
    print("\\hline")
    for method in gzipability.keys():
        print(f"{method} & {gzipability[method]:.2f} \\\\")
    print("\\hline")
    print("\\end{tabular}")
    print("\\caption{Gzipability of different methods.}")
    print("\\label{tab:gzipability}")
    print("\\end{table}")    

    return 0

if __name__ == "__main__":
    main()