import json
import numpy as np
import argparse


parser = argparse.ArgumentParser()
parser.add_argument(
    "--sd",
    type=str,
    default="ngram",
    help="The speculative dicoding.",
)
parser.add_argument(
    "--temperature",
    type=float,
    default=0.3,
    help="The temperature.",
)
parser.add_argument(
    "--in_data",
    type=str,
    default='EK1',
    help="The input dataset.",
)
parser.add_argument(
    "--trial",
    type=int,
    default=30,
    help="Number of iterations.",
)
args = parser.parse_args()


filename = f"{args.sd}_output/{args.sd}_{args.in_data}_{args.temperature}_{args.trial}.json"

num = args.trial
offset = 0


with open(filename, "r") as f:
   datas = json.load(f)


def pad(lst: list, pad_size: int, padding: int = 0) -> list:
    if len(lst) < pad_size:
        return lst + [padding] * (pad_size - len(lst))
    else:
        return lst[:pad_size]

to_pad = 10
lst1 = [pad(datas["traces"][i * num + offset], to_pad) for i in range(50)]
lst2 = [pad(datas["traces"][i * num + offset + 2], to_pad) for i in range(50)]


def pairwise_cosine_similarity(list1, list2=None):
    # Convert lists of lists to NumPy arrays for efficiency
    array1 = np.array(list1)
    if list2 is None:
        array2 = array1  # If list2 is not provided, compare within list1
    else:
        array2 = np.array(list2)
    
    # Normalize the vectors
    array1 = array1 / np.linalg.norm(array1, axis=1, keepdims=True)
    array2 = array2 / np.linalg.norm(array2, axis=1, keepdims=True)
    
    # Compute the pairwise cosine similarity matrix
    similarity_matrix = np.dot(array1, array2.T)
    
    # if upper_triangular:
    #     # Mask lower triangular part (including diagonal if list1 == list2)
    #     if list2 is None:  # Only applies when comparing the same list
    #         similarity_matrix = np.triu(similarity_matrix, k=1)
    #     else:
    #         raise ValueError("Upper triangular matrix is only valid for self-similarity.")
    
    return similarity_matrix


# Full similarity matrix
full_matrix = pairwise_cosine_similarity(lst2, lst1)
# print("Full Similarity Matrix:\n", full_matrix)
np.savetxt(f"matrix_{args.sd}_{args.in_data}_{args.temperature}_{args.trial}.txt", np.triu(full_matrix), fmt="%.3f", delimiter=",")
# print("Full Similarity Matrix:\n", np.tril(full_matrix, k=0))

# Upper triangular similarity matrix for self-similarity
# upper_triangular_matrix = pairwise_cosine_similarity(lst1, lst2, upper_triangular=True)
# print("Upper Triangular Matrix:\n", upper_triangular_matrix)

# For each row, find the index of the maximum similarity score
max_indices = np.argmax(full_matrix, axis=1)
print("Row-wise indices of the maximum cosine similarity:", max_indices.tolist())

# This means that for row i, the maximum value is at index i
diagonal_matches = np.sum(max_indices == np.arange(full_matrix.shape[0]))
percentage_diagonal = (diagonal_matches / full_matrix.shape[0]) * 100
print(f"Percentage of rows where self-comparison is the highest: {percentage_diagonal:.2f}%")
