import numpy as np


def angle_normalize(x):
    return (((x+np.pi) % (2*np.pi)) - np.pi)

def reward_func(observations):
    """Computation of the reward from the observations of the
    pendulum env.

    The observations are also the ones predicted by the model.
    For compatibility with other environments the actions should be appended to
    the inputs observations as the reward can be a function of both the actions
    and the observations.

    Parameters
    ----------
    observations : array, shape (n_samples, n_observations + n_actions)
        Observations and actions. The last features are the actions.
        Note that this is the action leading to the obtained observations.

    Return
    ------
    reward : float
        Reward.
    """
    th = np.arccos(observations[:, 0])
    thdot = observations[:, 2]
    u = observations[:, 3]
    costs = angle_normalize(th) ** 2 + .1 * thdot ** 2 + .001 * (u ** 2)

    return -costs
