diff freedpp/train/sac.py
@@ -88,7 +88,7 @@ class SAC:
             entropy = action.entropy()
             _, q_target = self.critic_target(next_state, action.index, from_index=True)
             alpha = self.log_alpha.exp().item()
-            target = reward + self.gamma * (1 - done) * (q_target + alpha * entropy)
+            target = reward + self.gamma * (1 - done) * q_target
 
         loss_critic = sum(map(partial(F.mse_loss, target, reduction='none'), q_values))
 
