"""
Min and Max score for each env for normalization when plotting.
Min score corresponds to random agent.
Max score corresponds to acceptable performance, for instance
human level performance in the case of Atari games.
"""
from typing import NamedTuple

import numpy as np


class ReferenceScore(NamedTuple):
    env_id: str
    min: float
    max: float


# Min is a random policy
# Max is the open-loop baseline (best score)
# reference_scores = [
#     # MuJoCo Envs
#     ReferenceScore("Ant-v4", -50, 2500),
#     ReferenceScore("HalfCheetah-v4", -250, 2500),
#     ReferenceScore("Hopper-v4", 20, 2000),
#     ReferenceScore("Swimmer-v4", 0, 340),
#     ReferenceScore("Walker2d-v4", 0, 2500),
# ]
reference_scores = [
    # MuJoCo Envs
    ReferenceScore("Ant-v4", -50, 1500),
    ReferenceScore("HalfCheetah-v4", -250, 2500),
    ReferenceScore("Hopper-v4", 20, 1500),
    ReferenceScore("Swimmer-v4", 0, 355),
    ReferenceScore("Walker2d-v4", 0, 2000),
]
# Altertive min: poorly optimized policy
# Ant-v4: -1700
# HalfCheetah-v4: 440
# Hopper-v4: 74
# Swimmer-v4: 40
# Walker2d-v4: 245

min_max_score_per_env = {reference_score.env_id: reference_score for reference_score in reference_scores}


def normalize_score(score: np.ndarray, env_id: str) -> np.ndarray:
    """
    Normalize score to be in [0, 1] where 1 is maximal performance.

    :param score: unnormalized score
    :param env_id: environment id
    :return: normalized score
    """
    if env_id not in min_max_score_per_env:
        raise KeyError(f"No reference score for {env_id}")
    reference_score = min_max_score_per_env[env_id]
    return (score - reference_score.min) / (reference_score.max - reference_score.min)
