import json, argparse
import math
from typing import List

def calculate_PCC(traces: List[int], tokens: List[int]) -> float:
    """
    Calculate the Pearson Correlation Coefficient between two lists of equal length.

    Args:
        traces (List[int]): First list of integer values.
        tokens (List[int]): Second list of integer values.

    Returns:
        float: Pearson correlation coefficient.

    Raises:
        ValueError: If input lists have different lengths or zero variance.
    """
    if len(traces) != len(tokens):
        raise ValueError("Lists must have the same length")
    n = len(traces)
    if n == 0:
        raise ValueError("Lists must not be empty")

    # Compute means
    mean_traces = sum(traces) / n
    mean_tokens = sum(tokens) / n

    # Compute covariance numerator and variance sums
    cov = 0.0
    var_traces = 0.0
    var_tokens = 0.0
    for x, y in zip(traces, tokens):
        dx = x - mean_traces
        dy = y - mean_tokens
        cov += dx * dy
        var_traces += dx * dx
        var_tokens += dy * dy

    # Prevent division by zero
    if var_traces == 0 or var_tokens == 0:
        raise ValueError("At least one of the lists has zero variance")

    # Compute PCC
    return cov / math.sqrt(var_traces * var_tokens)


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--sd",
        type=str,
        default="EAGLE",
        help="The speculative dicoding.",
    )
    parser.add_argument(
        "--temperature",
        type=float,
        default=0.3,
        help="The temperature.",
    )
    parser.add_argument(
        "--in_data",
        type=str,
        default='EK1',
        help="The input dataset.",
    )
    parser.add_argument(
        "--trial",
        type=int,
        default=5,
        help="Number of iterations.",
    )
    args = parser.parse_args()

    input_filename = f"{args.in_data}.txt"
    data_filename = f"./non-deepmind/{args.sd}_output/{args.sd}_{args.in_data}_{args.temperature}_{args.trial}.json"

    with open(data_filename, "r") as f:
        data = json.load(f)

    raw_traces = data.get("traces", [])
    raw_tokens = data.get("tokens", [])

    # all_pcc_results = []
    # for trace, tokens in zip(raw_traces, raw_tokens):
    #     try:
    #         pcc = calculate_PCC(trace, tokens)
    #         all_pcc_results.append(pcc)
    #     except ValueError as e:
    #         print(f"Skipping pair at index {len(all_pcc_results)}: {e}")

    # print("Max Pearson Correlation Coefficients:", max(all_pcc_results))


    # Flatten nested lists
    flat_traces = [x for sublist in raw_traces for x in sublist]
    flat_tokens = [y for sublist in raw_tokens for y in sublist]

    try:
        pcc = calculate_PCC(flat_traces, flat_tokens)
        print("Pearson Correlation Coefficient (flattened):", pcc)
    except ValueError as e:
        print(f"Error calculating PCC: {e}")



