import numpy as np
np.random.seed(21)

class CF_policy():
    def __init__(self, env, original_policy):
        self.env = env
        self.original_policy = original_policy

    def predict(self, state, deterministic=True):
        # state is scaled, so unscale to check threshold conditions
        x = self.env._unscale_X(state)
        v1 = self.env.a_space['low'][0]  # fallback: minimum value
        v2 = self.env.a_space['low'][1]

        # Rule: if h2 < 0.4 ¡æ v1 = 2.8; if h3 > 0.6 ¡æ v2 = 1.8
        # h2 = x[1], h3 = x[2] for unscaled state
        if x[1] < 0.4:
            v1 = 2.8
        if x[2] > 0.6:
            v2 = 1.8

        action = np.array([v1, v2])
        # scale and normalize action to [-1, 1]
        action = self.env._scale_U(action)
        return action