from environments.kitchen import reward_utils
from environments.kitchen.kitchen_base import KitchenSingleTaskEnv
from environments.kitchen.v0.kitchen_tasks import KitchenInfosBase

class KitchenBottomBurnerOffEnvV0(KitchenInfosBase):
    TASK_NAME = "bottom burner-off"
    BONUS_THRESH = 0.3

    @classmethod
    def _compute_reward(cls, obs_dict, dists):
        in_place = reward_utils.tolerance(
            dists["goal"],
            bounds=(0, cls.BONUS_THRESH),
            margin=abs(dists["goal_init"] - cls.BONUS_THRESH),
            sigmoid="long_tail",
        )

        handle_reach_radius = 0.08
        reach = reward_utils.tolerance(
            dists["hand"],
            bounds=(0, handle_reach_radius),
            margin=abs(dists["hand_init"] - handle_reach_radius),
            sigmoid="gaussian",
        )

        # gripper_closed = 1 - dists["gripper"]
        # reach = reward_utils.hamacher_product(reach, gripper_closed)
        # reward = 0.6 * in_place + 0.4 * reach

        reward = reward_utils.hamacher_product(reach, in_place)

        success = False
        if dists["goal"] < cls.BONUS_THRESH:
            success = True
        return reward, success
