import numpy as np
from itertools import combinations
import re
from collections import Counter
from scipy import stats

def tokenize(text):
    """Tokenize text into words."""
    return re.findall(r'\b\w+\b', text.lower())

def kendalls_tau(a, b):
    res = stats.kendalltau(a, b)
    return res.correlation

def weighted_kendalls_tau(a, b):
    res = stats.weightedtau(a, b)
    return res.correlation


def fidelity_and_wfidelity(
    query, documents, model, original_ranking, token_attributions):
    """
    Compute Fidelity and Weighted Fidelity for feature attributions.
    Parameters:
        original_ranking: List[int]
            Ranking produced by the original model.
        documents: List[str]
            Documents used for ranking.
        token_attributions: Dict[str, float]
            Feature attributions for each token.
        model_predictions: Callable[[List[str]], List[float]]
            Function to produce scores for documents given attributions.
    Returns:
        fidelity: float
            Simplified Kendall's Tau.
        wfidelity: float
            Weighted Kendall's Tau.
    """
    # Reconstruct scores using token-level attributions
    reconstructed_scores = []
    for doc in documents:
        tokens = tokenize(doc)
        token_counts = Counter(tokens)  # Count occurrences of each token
        doc_score = sum(token_counts[token] * token_attributions.get(token, 0.0) for token in token_counts)
        reconstructed_scores.append(doc_score)

    # Reconstruct ranking from scores
    reconstructed_ranking = np.argsort(-np.array(reconstructed_scores))
    print('rankings: ',original_ranking,reconstructed_ranking)

    # Compute Fidelity (Kendall's Tau)
    fidelity = kendalls_tau(original_ranking, reconstructed_ranking)

    # Compute Weighted Fidelity (Weighted Kendall's Tau)
    wfidelity = weighted_kendalls_tau(original_ranking, reconstructed_ranking)

    return fidelity, wfidelity


def mean_average_precision(y_true, y_pred):
    """
    Computes the Mean Average Precision (MAP) for a ranked list.

    Parameters:
        y_true (List[int]): Binary relevance values for the documents 
                            (e.g., [1, 0, 1] where 1 is relevant, 0 is not).
        y_pred (List[float]): Predicted scores for the documents.

    Returns:
        float: Mean Average Precision (MAP).
    """
    #print(y_true,y_pred)
    y_true = np.asarray(y_true).flatten()
    y_pred = np.asarray(y_pred).flatten()

    # Sort the documents by predicted scores in descending order
    sorted_indices = np.argsort(-np.array(y_pred))
    y_true_sorted = np.array(y_true)[sorted_indices]

    # Compute precision at each rank where a relevant document appears
    precisions = []
    num_relevant = 0
    for rank, is_relevant in enumerate(y_true_sorted, start=1):
        if is_relevant == 1:
            num_relevant += 1
            precisions.append(num_relevant / rank)
    
    # Mean Average Precision
    return np.mean(precisions) if precisions else 0.0


def mean_average_precision_graded(y_true, y_pred):
    """
    Computes the Mean Average Precision (MAP) for a ranked list with graded relevance.

    Parameters:
        y_true (List[float]): Relevance values (graded, e.g., [3, 0, 2, 1]).
        y_pred (List[float]): Predicted scores for the documents.

    Returns:
        float: Mean Average Precision (MAP) with graded relevance.
    """
    y_true = np.asarray(y_true).flatten()
    y_pred = np.asarray(y_pred).flatten()

    # Sort the documents by predicted scores in descending order
    sorted_indices = np.argsort(-np.array(y_pred))
    y_true_sorted = np.array(y_true)[sorted_indices]

    # Compute ideal gain for normalization
    ideal_gain = np.sum(y_true)

    # Compute precision at each rank weighted by relevance
    weighted_precisions = []
    relevant_sum = 0
    for rank, relevance in enumerate(y_true_sorted, start=1):
        relevant_sum += relevance
        precision_at_k = relevant_sum / rank
        weighted_precisions.append(precision_at_k * relevance)

    # Return normalized weighted precision
    return np.sum(weighted_precisions) / ideal_gain if ideal_gain > 0 else 0.0



def generate_original_ordering(query, documents, model, predictive_model):
    """
    Generates the original ordering of documents based on the model's scores.

    Parameters:
        documents (List[str]): List of documents to be ranked.
        predictive_model (Callable[[List[str]], List[float]]): 
            A function that takes a list of documents and returns their scores.

    Returns:
        List[int]: Indices of documents in descending order of their scores.
    """
    # Get model scores for the documents
    scores = predictive_model(model, query, documents)
    
    # Generate the ordering based on descending scores
    original_ordering = np.argsort(-np.array(scores))
    
    return original_ordering

