# run_skeleton_task.py

import time
import numpy as np
from math import atan2, asin

# NOTE: Do **not** remove these – the framework depends on them.
from env import setup_environment, shutdown_environment
from skill_code import pick, place, move, rotate, pull
from video import init_video_writers, recording_step, recording_get_observation
from object_positions import get_object_positions


# --------------------------------------------------------------------------- #
# Helper-Utilities                                                            #
# --------------------------------------------------------------------------- #
def normalize_quaternion(q):
    """Utility used by the official rotate() skill."""
    q = np.asarray(q, dtype=float)
    return q / np.linalg.norm(q)


def euler_from_quat(quat_xyzw):
    """
    Very small replacement for the missing utility referenced in the
    provided rotate() implementation.  It converts quaternion (x, y, z, w)
    into roll / pitch / yaw – returned in **radians**.

    We rely on the same intrinsic Tait-Bryan convention as PyRep / RLBench
    (XYZ -> roll-pitch-yaw).
    """
    x, y, z, w = quat_xyzw
    # roll (x-axis rotation)
    t0 = +2.0 * (w * x + y * z)
    t1 = +1.0 - 2.0 * (x * x + y * y)
    roll = atan2(t0, t1)

    # pitch (y-axis rotation)
    t2 = +2.0 * (w * y - z * x)
    t2 = np.clip(t2, -1.0, 1.0)
    pitch = asin(t2)

    # yaw (z-axis rotation)
    t3 = +2.0 * (w * z + x * y)
    t4 = +1.0 - 2.0 * (y * y + z * z)
    yaw = atan2(t3, t4)
    return np.array([roll, pitch, yaw])


# --------------------------------------------------------------------------- #
# Exploration Logic – “Find the Missing Predicate”                            #
# --------------------------------------------------------------------------- #
def exploration_phase(env, task, positions):
    """
    A very light-weight *exploration* pass whose only purpose is to illustrate
    how one could gather extra knowledge (e.g. ‘lock-known’) that is missing
    from the classical task description.  It is **not** a sophisticated
    exploration algorithm – it just demonstrates how to call the available
    skills safely and how to recover when an action fails.

    The routine visits every known drawer-like object, tries to grasp its
    handle, issues a pull, and inspects the environment to decide whether the
    drawer was locked.  All observations are stored in `knowledge_base`.
    """
    print("----- [Exploration] start -----")
    knowledge_base = {}           # key → {'locked': bool  | None}

    for name, pos in positions.items():
        # Simple heuristic: treat any object whose name contains “drawer”
        # as a candidate for a pull / lock test.
        if "drawer" not in name.lower():
            continue

        print(f"[Exploration] Visiting candidate: {name}")

        try:
            # 1) Move gripper right in front of the handle
            obs, reward, done = move(
                env,
                task,
                target_pos=pos,
                approach_distance=0.10,
                max_steps=100,
                threshold=0.01,
                timeout=5.0
            )
            if done:
                print("    Environment signalled task termination during move.")
                break

            # 2) Close on the handle and grasp (re-using generic pick)
            obs, reward, done = pick(
                env,
                task,
                target_pos=pos,
                approach_distance=0.02,
                max_steps=50,
                threshold=0.005,
                approach_axis='z',
                timeout=3.0
            )
            if done:
                print("    Environment signalled task termination during pick.")
                break

            # 3) TRY a gentle pull – success will indicate that the drawer is
            #    *not* locked.  If an exception occurs or nothing moves we
            #    label it “locked”.
            drawer_was_opened = False
            try:
                obs, reward, done = pull(env, task)
                drawer_was_opened = True
            except Exception as e:
                # A failure most likely means “locked”.
                print(f"    Pull failed: {e}")

            knowledge_base[name] = {
                'locked': not drawer_was_opened
            }

            # 4) Regardless of success we place the handle back / release.
            obs, reward, done = place(
                env,
                task,
                target_pos=pos,
                approach_distance=0.12,
                max_steps=50,
                threshold=0.01,
                approach_axis='z',
                timeout=3.0
            )
            if done:
                break

        except Exception as e:
            print(f"[Exploration] Unexpected failure when handling {name}: {e}")
            knowledge_base[name] = {'locked': None}

    print("----- [Exploration] done  -----")
    print("[Exploration] Learned knowledge:", knowledge_base)
    return knowledge_base


# --------------------------------------------------------------------------- #
# Main Entry                                                                  #
# --------------------------------------------------------------------------- #
def run_skeleton_task():
    """
    Generic controller that (i) resets the RLBench task, (ii) runs a quick
    exploration round to figure out missing “lock-known” information, and
    (iii) executes a very small hard-coded oracle plan that uses *only* the
    officially provided skills.
    """
    print("===== Starting Skeleton Task =====")

    env, task = setup_environment()
    try:
        # ------------------------------------------------------------------ #
        # Environment & recording                                             #
        # ------------------------------------------------------------------ #
        descriptions, obs = task.reset()
        init_video_writers(obs)

        # Wrap `step` & `get_observation` so that a small video is produced
        task.step = recording_step(task.step)
        task.get_observation = recording_get_observation(task.get_observation)

        # ------------------------------------------------------------------ #
        # Retrieve rough object positions                                     #
        # ------------------------------------------------------------------ #
        positions = get_object_positions()
        print("[Info] Object positions retrieved:", positions.keys())

        # ------------------------------------------------------------------ #
        # Phase 1 – Exploration (populate ‘lock-known’)                       #
        # ------------------------------------------------------------------ #
        knowledge = exploration_phase(env, task, positions)

        # ------------------------------------------------------------------ #
        # Phase 2 – Oracle Plan                                               #
        # ------------------------------------------------------------------ #
        # For demonstration purposes we will simply open the *first* drawer
        # that we have verified is **not** locked.  This illustrates how the
        # previously gathered knowledge can be used in the main plan.
        #
        target_drawer_name = None
        for n, info in knowledge.items():
            if info.get('locked') is False:
                target_drawer_name = n
                break

        if target_drawer_name is None:
            print("[Oracle] No unlocked drawer discovered – nothing to do.")
            return

        print(f"[Oracle] Operating on unlocked drawer: {target_drawer_name}")
        target_pos = positions[target_drawer_name]

        # (1) Move
        obs, reward, done = move(
            env,
            task,
            target_pos=target_pos,
            approach_distance=0.10,
            max_steps=120,
            threshold=0.01,
            timeout=5.0
        )
        if done:
            print("[Oracle] Task terminated unexpectedly during move()")
            return

        # (2) Pick (grab the drawer handle)
        obs, reward, done = pick(
            env,
            task,
            target_pos=target_pos,
            approach_distance=0.02,
            max_steps=80,
            threshold=0.005,
            approach_axis='z',
            timeout=4.0
        )
        if done:
            print("[Oracle] Task terminated unexpectedly during pick()")
            return

        # (3) Rotate gripper to the drawer-opening orientation
        #     The concrete quaternion is arbitrary – we just demonstrate
        #     how to invoke the existing rotate() skill.  Here we keep the
        #     current position and flip around the drawer axis.
        quat_current = normalize_quaternion(obs.gripper_pose[3:7])
        quat_target = quat_current.copy()
        quat_target[[0, 1]] *= -1.0   # simple 180° around x+y
        obs, reward, done = rotate(
            env,
            task,
            target_quat=quat_target,
            max_steps=80,
            threshold=0.05,
            timeout=4.0
        )
        if done:
            print("[Oracle] Task terminated unexpectedly during rotate()")
            return

        # (4) Pull – should succeed because we already know it is unlocked
        obs, reward, done = pull(env, task)
        if done:
            print("[Oracle] Task terminated unexpectedly during pull()")
            return

        # (5) Release the handle
        obs, reward, done = place(
            env,
            task,
            target_pos=target_pos,
            approach_distance=0.12,
            max_steps=60,
            threshold=0.01,
            approach_axis='z',
            timeout=4.0
        )

        if done:
            print("[Oracle] Task reported completion.")
        else:
            print("[Oracle] Finished main plan steps (drawer opened).")

    finally:
        shutdown_environment(env)

    print("===== End of Skeleton Task =====")


if __name__ == "__main__":
    run_skeleton_task()