['--alg', 'sac', '--env', 'Hopper-v3', '--learn', 'baseline', '--traj', 'expert', '--seed', '0']
episode: 0 training return: tensor(-998.3735, device='cuda:0')
episode: 1 training return: tensor(-995.5990, device='cuda:0')
episode: 2 training return: tensor(-994.3307, device='cuda:0')
episode: 3 training return: tensor(-999.4863, device='cuda:0')
epoch: 1 test_true_pfm: 2.7737722086428 sim_pfm: -997.4981498718262
episode: 4 training return: tensor(-999.9888, device='cuda:0')
episode: 5 training return: tensor(-999.1409, device='cuda:0')
episode: 6 training return: tensor(-999.9100, device='cuda:0')
episode: 7 training return: tensor(-998.8019, device='cuda:0')
epoch: 2 test_true_pfm: -7.1117482363067595 sim_pfm: -998.4822010397911
episode: 8 training return: tensor(-996.0434, device='cuda:0')
episode: 9 training return: tensor(-998.6669, device='cuda:0')
episode: 10 training return: tensor(-992.7944, device='cuda:0')
episode: 11 training return: tensor(-959.2153, device='cuda:0')
epoch: 3 test_true_pfm: 238.28095713844687 sim_pfm: -964.5581487474652
episode: 12 training return: tensor(-996.3879, device='cuda:0')
episode: 13 training return: tensor(-997.3432, device='cuda:0')
episode: 14 training return: tensor(-950.2977, device='cuda:0')
episode: 15 training return: tensor(-934.3101, device='cuda:0')
epoch: 4 test_true_pfm: 236.6592996984779 sim_pfm: -914.2980549702964
episode: 16 training return: tensor(-910.7679, device='cuda:0')
episode: 17 training return: tensor(-906.5930, device='cuda:0')
episode: 18 training return: tensor(-909.5102, device='cuda:0')
episode: 19 training return: tensor(-908.4229, device='cuda:0')
epoch: 5 test_true_pfm: 290.90560063504404 sim_pfm: -898.8051525861956
episode: 20 training return: tensor(-894.4858, device='cuda:0')
episode: 21 training return: tensor(-892.0298, device='cuda:0')
episode: 22 training return: tensor(-918.7050, device='cuda:0')
episode: 23 training return: tensor(-891.2036, device='cuda:0')
