import numpy as np
from typing import List


def fsp(num_models: int, newest_prob: float = None):
    """(Heuristic) fictitious self-play, i.e., uniformly sample from all opponent models.

    Args:
        Size of the opponent pool

    Returns:
        Probability distribution over opponent models
    """
    assert num_models > 0 and 0.0 <= newest_prob <= 1.0
    if newest_prob is None:  # Naive FSP
        probs = [1.0 / num_models] * num_models
    else:  # Heuristic FSP
        if num_models > 1:
            probs = [(1.0 - newest_prob) / (num_models - 1)] * (num_models - 1) + [
                newest_prob
            ]
        else:
            probs = [1.0]
    return np.array(probs)


def pfsp(win_rates: List[float], weighting="linear"):
    """Prioritized fictitious self-play

    Choose models according to win-rates and weighting function.

    P = f(p[A beats B]) / sum_C(f(p[A beats C])), which p is win rate, f is weighting function

    Args:
        win_rates: win rates to each opponent model
        weighting: weighting func name, we have four choices:
            variance: choose a well-matched opponent, i.e. win rate is around 50%
            linear: tend to choose a hard opponent
            linear_capped: tend to choose a well-matched or hard opponent
            squared: choose a hard opponent

    Returns:
        Probability distribution over opponent models
    """
    for w in win_rates:
        assert 0.0 <= w <= 1.0
    weightings = {
        "alp": lambda x: np.maximum(0.001, x),
        "variance": lambda x: x * (1 - x),
        "linear": lambda x: 1 - x,
        "linear_capped": lambda x: np.maximum(0.1, 1 - x),
        "squared": lambda x: (1 - x) ** 2,
    }
    fn = weightings[weighting]
    probs = fn(np.asarray(win_rates))
    norm = probs.sum()
    if norm < 1e-10:
        return np.ones_like(win_rates) / len(win_rates)
    return probs / norm
