['--alg', 'ddpg', '--env', 'Swimmer-v3', '--learn', 'behavior', '--traj', 'expert', '--seed', '0', '--data', '100000']
132.3172051062231
episode: 0 training return: tensor(99.5840, device='cuda:0')
episode: 1 training return: tensor(104.7600, device='cuda:0')
episode: 2 training return: tensor(103.2435, device='cuda:0')
episode: 3 training return: tensor(99.2081, device='cuda:0')
epoch: 1 test_true_pfm: 127.46953095486124 sim_pfm: 90.10654205085011
episode: 4 training return: tensor(112.0314, device='cuda:0')
episode: 5 training return: tensor(98.1870, device='cuda:0')
episode: 6 training return: tensor(110.2223, device='cuda:0')
episode: 7 training return: tensor(112.3823, device='cuda:0')
epoch: 2 test_true_pfm: 127.17493194304711 sim_pfm: 75.70593599612475
episode: 8 training return: tensor(99.4620, device='cuda:0')
episode: 9 training return: tensor(121.4443, device='cuda:0')
