from typing import Optional

import numpy as np


def reward_component_movement(
    movement_direction: np.ndarray,
    com_prev: np.ndarray,
    com_current: np.ndarray,
    observation_frequency: int,
) -> float:
    """Compute reward for movement aligned with a target direction.

    Args:
        movement_direction: Desired movement direction, shape [3].
        com_prev: Previous center of mass position, shape [3].
        com_current: Current center of mass position, shape [3].
        observation_frequency: Observation frequency in Hz (e.g., 10).

    Returns:
        Movement reward as a signed scalar based on direction alignment.
    """
    direction = np.asarray(movement_direction, dtype=np.float32)
    if direction.shape != (3,):
        raise ValueError("movement_direction must be shape [3]")
    direction_norm = float(np.linalg.norm(direction))
    if direction_norm < 1e-6:
        direction = np.array([1.0, 0.0, 0.0], dtype=np.float32)
        direction_norm = 1.0
    direction_unit = direction / direction_norm

    prev = np.asarray(com_prev, dtype=np.float32)
    current = np.asarray(com_current, dtype=np.float32)
    if prev.shape != (3,) or current.shape != (3,):
        raise ValueError("com_prev and com_current must be shape [3]")

    freq = float(observation_frequency)
    if freq <= 0:
        raise ValueError("observation_frequency must be positive")

    # Speed uses com_current - com_prev per reward specification (dt = 1/f).
    movement_speed = (current - prev) * freq
    return float(np.dot(movement_speed, direction_unit))


def reward_component_standing(
    body_vector: Optional[np.ndarray],
    world_up_vector: np.ndarray,
    cos_threshold: float,
) -> float:
    """Compute reward for upright standing based on cosine similarity.

    Args:
        body_vector: Body up vector, shape [3]. Returns -1 if unavailable.
        world_up_vector: World up vector, shape [3].
        cos_threshold: Cosine similarity threshold for success.

    Returns:
        1.0 if the cosine similarity exceeds the threshold, otherwise -1.0.
    """
    if body_vector is None:
        return -1.0

    body = np.asarray(body_vector, dtype=np.float32)
    world_up = np.asarray(world_up_vector, dtype=np.float32)
    if body.shape != (3,) or world_up.shape != (3,):
        raise ValueError("body_vector and world_up_vector must be shape [3]")

    body_norm = float(np.linalg.norm(body))
    world_up_norm = float(np.linalg.norm(world_up))
    if body_norm < 1e-6 or world_up_norm < 1e-6:
        return -1.0

    cos_value = float(np.dot(body, world_up) / (body_norm * world_up_norm))
    return 1.0 if cos_value > cos_threshold else -1.0


def reward_component_height(body_height: float, height_threshold: float) -> float:
    """Compute reward for exceeding a body height threshold.

    Args:
        body_height: Current body height (z value).
        height_threshold: Height threshold to consider upright.

    Returns:
        1.0 if above the threshold, otherwise -1.0.
    """
    return 1.0 if body_height > height_threshold else -1.0


def reward_component_action_cost(
    current_action: np.ndarray,
    prev_action: Optional[np.ndarray],
) -> float:
    """Compute action change cost normalized to the range [0, -1].

    Args:
        current_action: Current discrete action array, shape [num_joints].
        prev_action: Previous action array or None for the first step.

    Returns:
        Negative mean normalized action delta, in the range [0, -1].
    """
    if prev_action is None:
        return 0.0

    current = np.asarray(current_action, dtype=np.float32)
    previous = np.asarray(prev_action, dtype=np.float32)
    if current.shape != previous.shape:
        raise ValueError("current_action and prev_action must have the same shape")

    delta = np.abs((current - previous) / 4.0)
    cost = -float(np.mean(delta))
    return float(np.clip(cost, -1.0, 0.0))
