from environments.kitchen import reward_utils
from environments.kitchen.kitchen_base import KitchenSingleTaskEnv


class KitchenMicrowaveCloseEnvV0(KitchenSingleTaskEnv):
    TASK_NAME = "microwave-close"
    BONUS_THRESH = 0.1

    @classmethod
    def _compute_reward(cls, obs_dict, dists):
        in_place = reward_utils.tolerance(
            dists["goal"],
            bounds=(0, cls.BONUS_THRESH),
            margin=abs(dists["goal_init"] - cls.BONUS_THRESH),
            sigmoid="long_tail",
        )

        handle_reach_radius = 0.07
        reach = reward_utils.tolerance(
            dists["hand"],
            bounds=(0, handle_reach_radius),
            margin=abs(dists["hand_init"] - handle_reach_radius),
            sigmoid="gaussian",
        )

        reward = reward_utils.hamacher_product(reach, in_place)
        if dists["goal"] < cls.BONUS_THRESH:
            reward = 1.
        return reward
