episode: 0 training return: tensor(-3050.8545, device='cuda:0')
episode: 1 training return: tensor(-3535.9407, device='cuda:0')
episode: 2 training return: tensor(-4195.5693, device='cuda:0')
episode: 3 training return: tensor(-3227.4429, device='cuda:0')
