['--alg', 'sac', '--env', 'HalfCheetah-v2', '--learn', 'behavior', '--traj', 'medium', '--seed', '0', '--data', '30000', '--sub']
5137.531234433067
episode: 0 training return: tensor(176.9469, device='cuda:0')
episode: 1 training return: tensor(52.5423, device='cuda:0')
episode: 2 training return: tensor(2.1221, device='cuda:0')
episode: 3 training return: tensor(-24.9353, device='cuda:0')
