import torch

def calculate_cumulative_rewards(reward_tensor, gamma=0.9):
    """
    This function helps to calculate the cumulative return for each step for the reward tensor

    Args:
        reward_tensor (Torch.Tensor): Shape [batch_size, context_lengh, 1]
    """
    batch_size, context_length, _ = reward_tensor.shape
    calculated_result = torch.zeros_like(reward_tensor).to(reward_tensor.device)
    cumulative_rewards = torch.zeros(batch_size, 1).to(reward_tensor.device)
    for i in range(context_length - 1, -1, -1):
        current_rewards = reward_tensor[:, i, :] + gamma * cumulative_rewards
        calculated_result[:, i, :] = current_rewards
        cumulative_rewards = current_rewards
    return calculated_result
        
    
    