import os
import pickle
import torch
import numpy as np
import pandas as pd
import argparse
import matplotlib.pyplot as plt
from collections import defaultdict
from tqdm.auto import tqdm
import scipy.stats
from sklearn.metrics import pairwise_distances

def read_vtop_files(dirpath):  
    #example file:
    # diversity: 0.7615999999999999
    # relatedness: 0.15278193251612918
    # diversity baseline: 0.564488, 0.017986268540194774
    # relatedness baseline: -0.29485381216279716, 0.005610484314955831

    diversities = []
    relatednesses = []
    diversity_baselines = []
    diversity_baselines_std = []
    relatedness_baselines = []
    relatedness_baselines_std = []

    files = os.listdir(dirpath)
    files = [f for f in files if "text" not in f and ".csv" in f]
    #sort files alphabetically
    files = sorted(files)
    for f in files:
        with open(os.path.join(dirpath, f), 'r') as f:
            lines = f.readlines()
            diversity = float(lines[0].split(":")[1].strip())
            relatedness = float(lines[1].split(":")[1].strip())
            diversity_baseline = float(lines[2].split(":")[1].split(",")[0].strip())
            diversity_baseline_std = float(lines[2].split(":")[1].split(",")[1].strip())
            relatedness_baseline = float(lines[3].split(":")[1].split(",")[0].strip())
            relatedness_baseline_std = float(lines[3].split(":")[1].split(",")[1].strip())
            diversities.append(diversity)
            relatednesses.append(relatedness)
            diversity_baselines.append(diversity_baseline)
            diversity_baselines_std.append(diversity_baseline_std)
            relatedness_baselines.append(relatedness_baseline)
            relatedness_baselines_std.append(relatedness_baseline_std)

    return diversities, relatednesses, diversity_baselines, diversity_baselines_std, relatedness_baselines, relatedness_baselines_std


def read_text_files(dirpath):  
    #example file:
    # diversity: 0.13803062493062493
    # relatedness: 0.22305367265046275
    # coherence: 0.2388384201197578

    diversities = []
    relatednesses = []
    coherences = []

    files = os.listdir(dirpath)
    files = [f for f in files if "text" in f and ".csv" in f]
    files = sorted(files)
    for f in files:
        with open(os.path.join(dirpath, f), 'r') as f:
            lines = f.readlines()
            diversity = float(lines[0].split(":")[1].strip())
            relatedness = float(lines[1].split(":")[1].strip())
            coherence = float(lines[2].split(":")[1].strip())
            diversities.append(diversity)
            relatednesses.append(relatedness)
            coherences.append(coherence)
    return diversities, relatednesses, coherences


diversities, relatednesses, diversity_baselines, diversity_baselines_std, relatedness_baselines, relatedness_baselines_std = read_vtop_files("metrics/")
text_diversities, text_relatednesses, text_coherences = read_text_files("metrics/")


print("------------------- Diversity -------------------")
diversities_row = "\\textbf{Visual Topics} & "
for d in diversities:
    rounded = round(d, 2)
    diversities_row += str(rounded) + " & "
diversities_row = diversities_row[:-2] + "\\\\"
print(diversities_row)

diversities_text_row = "\\textbf{Text Topics} & "
for d in text_diversities:
    rounded = round(d, 2)
    diversities_text_row += str(rounded) + " & "
diversities_text_row = diversities_text_row[:-2] + "\\\\"
print(diversities_text_row)

diversities_baseline_row = "\\textbf{Visual Topics Baseline} & "
for i in range(0, len(diversity_baselines)):
    rounded_mean = round(diversity_baselines[i], 2)
    # rounded_std = round(diversity_baselines_std[i], 3)
    diversities_baseline_row += str(rounded_mean) + " & "
diversities_baseline_row = diversities_baseline_row[:-2] + "\\\\"
print(diversities_baseline_row)
   

print("\n\n------------------- Relatedness -------------------")
relatednesses_row = "\\textbf{Visual Topics} & "
for r in relatednesses:
    rounded = round(r, 2)
    relatednesses_row += str(rounded) + " & "
relatednesses_row = relatednesses_row[:-2] + "\\\\"
print(relatednesses_row)

relatednesses_text_row = "\\textbf{Text Topics} & "
for r in text_relatednesses:
    rounded = round(r, 2)
    relatednesses_text_row += str(rounded) + " & "
relatednesses_text_row = relatednesses_text_row[:-2] + "\\\\"
print(relatednesses_text_row)

relatednesses_baseline_row = "\\textbf{Visual Topics Baseline} & "
for i in range(0, len(relatedness_baselines)):
   rounded_mean = round(relatedness_baselines[i], 2)
#    rounded_std = round(relatedness_baselines_std[i], 3)
   relatednesses_baseline_row += str(rounded_mean) + " & "
relatednesses_baseline_row = relatednesses_baseline_row[:-2] + "\\\\"
print(relatednesses_baseline_row)


print("\n\n------------------- Coherence -------------------")
coherences_row = "\\textbf{Text Topics} & "
for c in text_coherences:
    rounded = round(c, 2)
    coherences_row += str(rounded) + " & "
coherences_row = coherences_row[:-2] + "\\\\"
print(coherences_row)


