import numpy as np

def four_tank_reward_decomposed(self, x, u, con):
    Sp_i = 0
    R = 0.1
    if not hasattr(self, 'u_prev'):
        self.u_prev = u

    state_costs = []
    # State tracking costs for each controlled variable (e.g., h1, h2)
    for k in self.env_params["SP"]:
        i = self.model.info()["states"].index(k)
        SP = self.SP[k]

        o_space_low = self.env_params["o_space"]["low"][i]
        o_space_high = self.env_params["o_space"]["high"][i]

        x_normalized = (x[i] - o_space_low) / (o_space_high - o_space_low)
        setpoint_normalized = (SP - o_space_low) / (o_space_high - o_space_low)

        r_scale = self.env_params.get("r_scale", {})
        state_error = (np.sum(x_normalized - setpoint_normalized[self.t]) ** 2) * r_scale.get(k, 1)
        state_costs.append(-state_error)

        Sp_i += 1

    # Control effort penalty
    u_normalized = (u - self.env_params["a_space"]["low"]) / (
            self.env_params["a_space"]["high"] - self.env_params["a_space"]["low"]
    )
    u_prev_norm = (self.u_prev - self.env_params["a_space"]["low"]) / (
            self.env_params["a_space"]["high"] - self.env_params["a_space"]["low"]
    )
    self.u_prev = u

    control_cost = -np.sum(R * (u_normalized - u_prev_norm) ** 2)

    # Return as a tuple: (h1 tracking, h2 tracking, control effort)
    components = tuple(state_costs) + (control_cost,)

    # Optionally, this keeps the return format consistent with original
    try:
        return components[0] if len(components) == 1 else components
    except Exception:
        return components
