from environments.kitchen import reward_utils
from environments.kitchen.kitchen_base import KitchenSingleTaskEnv
from environments.kitchen.v0.kitchen_tasks import KitchenInfosBase


class KitchenMicrowaveCloseEnvV0(KitchenInfosBase):
    TASK_NAME = "microwave-close"
    BONUS_THRESH = 0.1

    @classmethod
    def _compute_reward(cls, obs_dict, dists):
        in_place = reward_utils.tolerance(
            dists["goal"],
            bounds=(0, cls.BONUS_THRESH),
            margin=abs(dists["goal_init"] - cls.BONUS_THRESH),
            sigmoid="long_tail",
        )

        handle_reach_radius = 0.07
        reach = reward_utils.tolerance(
            dists["hand"],
            bounds=(0, handle_reach_radius),
            margin=abs(dists["hand_init"] - handle_reach_radius),
            sigmoid="gaussian",
        )

        reward = reward_utils.hamacher_product(reach, in_place)
        success = False
        if dists["goal"] < cls.BONUS_THRESH:
            success = True
        return reward, success
