from environments.kitchen import reward_utils
from environments.kitchen.kitchen_base import KitchenSingleTaskEnv
from environments.kitchen.v0.kitchen_tasks import KitchenInfosBase


class KitchenTopBurnerOffEnvV0(KitchenInfosBase):
    TASK_NAME = "top burner-off"
    BONUS_THRESH = 0.3

    @classmethod
    def _compute_reward(cls, obs_dict, dists):
        in_place = reward_utils.tolerance(
            dists["goal"],
            bounds=(0, cls.BONUS_THRESH),
            margin=abs(
                dists["goal_init"] - cls.BONUS_THRESH
            ),  ### margin = total distance from goal?
            sigmoid="long_tail",
        )  ## between 0 and 1, 1 when within bounds

        handle_reach_radius = 0.08
        reach = reward_utils.tolerance(
            dists["hand"],
            bounds=(0, handle_reach_radius),
            margin=abs(dists["hand_init"] - handle_reach_radius),
            sigmoid="gaussian",
        )  ## between 0 and 1, 1 when within bounds

        reward = reward_utils.hamacher_product(reach, in_place)
        # reward = 0.6 * in_place + 0.4 * reach  ## between 0 and 1? old reward, why is it different than others?

        success = False
        if dists["goal"] < cls.BONUS_THRESH:
            success = True
        return reward, success
