initial performance: 8203
episode: 0 training return: tensor(317243.0625, device='cuda:0', grad_fn=<AddBackward0>)
episode: 1 training return: tensor(2.7665e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 2 training return: tensor(2.4396e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 3 training return: tensor(3.9503e-06, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 1 test_true_pfm: -618 test_simulate_pfm tensor(1.1801e-11, device='cuda:0', grad_fn=<DivBackward0>)
episode: 4 training return: tensor(2.4993e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 5 training return: tensor(6.7964e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 6 training return: tensor(4.9989e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 7 training return: tensor(0.0652, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 2 test_true_pfm: -604 test_simulate_pfm tensor(0.0093, device='cuda:0', grad_fn=<DivBackward0>)
episode: 8 training return: tensor(1.0680e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 9 training return: tensor(0.0003, device='cuda:0', grad_fn=<AddBackward0>)
episode: 10 training return: tensor(3.8495e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 11 training return: tensor(0.0035, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 3 test_true_pfm: -799 test_simulate_pfm tensor(3.4361e-09, device='cuda:0', grad_fn=<DivBackward0>)
episode: 12 training return: tensor(0.0007, device='cuda:0', grad_fn=<AddBackward0>)
episode: 13 training return: tensor(0.0004, device='cuda:0', grad_fn=<AddBackward0>)
episode: 14 training return: tensor(0.0016, device='cuda:0', grad_fn=<AddBackward0>)
episode: 15 training return: tensor(0.0003, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 4 test_true_pfm: -763 test_simulate_pfm tensor(8.3252e-09, device='cuda:0', grad_fn=<DivBackward0>)
episode: 16 training return: tensor(5.1132e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 17 training return: tensor(0.0007, device='cuda:0', grad_fn=<AddBackward0>)
episode: 18 training return: tensor(1.4383e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 19 training return: tensor(8.8521, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 5 test_true_pfm: -806 test_simulate_pfm tensor(4.2184e-08, device='cuda:0', grad_fn=<DivBackward0>)
episode: 20 training return: tensor(0.0163, device='cuda:0', grad_fn=<AddBackward0>)
episode: 21 training return: tensor(2.8563e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 22 training return: tensor(2.5157e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 23 training return: tensor(0.1429, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 6 test_true_pfm: -707 test_simulate_pfm tensor(1.4162e-07, device='cuda:0', grad_fn=<DivBackward0>)
episode: 24 training return: tensor(0.0068, device='cuda:0', grad_fn=<AddBackward0>)
episode: 25 training return: tensor(1.6858e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 26 training return: tensor(5.4658e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 27 training return: tensor(1.9010e-14, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 7 test_true_pfm: -662 test_simulate_pfm tensor(1.8790e-06, device='cuda:0', grad_fn=<DivBackward0>)
episode: 28 training return: tensor(6.7686e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 29 training return: tensor(7.7819e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 30 training return: tensor(0.0041, device='cuda:0', grad_fn=<AddBackward0>)
episode: 31 training return: tensor(0.0095, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 8 test_true_pfm: -766 test_simulate_pfm tensor(2.2578e-07, device='cuda:0', grad_fn=<DivBackward0>)
episode: 32 training return: tensor(0.0165, device='cuda:0', grad_fn=<AddBackward0>)
episode: 33 training return: tensor(0.0014, device='cuda:0', grad_fn=<AddBackward0>)
episode: 34 training return: tensor(5.2532e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 35 training return: tensor(1.3671e-05, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 9 test_true_pfm: -615 test_simulate_pfm tensor(9.5422e-13, device='cuda:0', grad_fn=<DivBackward0>)
episode: 36 training return: tensor(12.4647, device='cuda:0', grad_fn=<AddBackward0>)
episode: 37 training return: tensor(0.0010, device='cuda:0', grad_fn=<AddBackward0>)
episode: 38 training return: tensor(0.0008, device='cuda:0', grad_fn=<AddBackward0>)
episode: 39 training return: tensor(2.2810e-05, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 10 test_true_pfm: -923 test_simulate_pfm tensor(1.5201e-09, device='cuda:0', grad_fn=<DivBackward0>)
episode: 40 training return: tensor(5.6663e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 41 training return: tensor(0.0008, device='cuda:0', grad_fn=<AddBackward0>)
episode: 42 training return: tensor(1.2155e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 43 training return: tensor(7.4046e-09, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 11 test_true_pfm: -743 test_simulate_pfm tensor(1.9754e-11, device='cuda:0', grad_fn=<DivBackward0>)
episode: 44 training return: tensor(0.0004, device='cuda:0', grad_fn=<AddBackward0>)
episode: 45 training return: tensor(1.6164e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 46 training return: tensor(2.5877e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 47 training return: tensor(3.9301e-06, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 12 test_true_pfm: -848 test_simulate_pfm tensor(2.3375e-11, device='cuda:0', grad_fn=<DivBackward0>)
episode: 48 training return: tensor(6.8096e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 49 training return: tensor(5.6699e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 50 training return: tensor(1.0170e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 51 training return: tensor(0.2243, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 13 test_true_pfm: -659 test_simulate_pfm tensor(4.9418e-08, device='cuda:0', grad_fn=<DivBackward0>)
episode: 52 training return: tensor(0.0003, device='cuda:0', grad_fn=<AddBackward0>)
episode: 53 training return: tensor(3.5692e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 54 training return: tensor(0.0003, device='cuda:0', grad_fn=<AddBackward0>)
episode: 55 training return: tensor(3.6716e-07, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 14 test_true_pfm: -710 test_simulate_pfm tensor(1.5845e-09, device='cuda:0', grad_fn=<DivBackward0>)
episode: 56 training return: tensor(0.1010, device='cuda:0', grad_fn=<AddBackward0>)
episode: 57 training return: tensor(0.1998, device='cuda:0', grad_fn=<AddBackward0>)
episode: 58 training return: tensor(3.8754e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 59 training return: tensor(0.0002, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 15 test_true_pfm: -537 test_simulate_pfm tensor(3.0532e-12, device='cuda:0', grad_fn=<DivBackward0>)
episode: 60 training return: tensor(5.8171e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 61 training return: tensor(8.0301e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 62 training return: tensor(1.2140e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 63 training return: tensor(0.0137, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 16 test_true_pfm: -1069 test_simulate_pfm tensor(2.3546e-10, device='cuda:0', grad_fn=<DivBackward0>)
episode: 64 training return: tensor(0.0004, device='cuda:0', grad_fn=<AddBackward0>)
episode: 65 training return: tensor(0.0001, device='cuda:0', grad_fn=<AddBackward0>)
episode: 66 training return: tensor(0.0288, device='cuda:0', grad_fn=<AddBackward0>)
episode: 67 training return: tensor(0.0003, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 17 test_true_pfm: -683 test_simulate_pfm tensor(1.0617e-07, device='cuda:0', grad_fn=<DivBackward0>)
episode: 68 training return: tensor(0.0147, device='cuda:0', grad_fn=<AddBackward0>)
episode: 69 training return: tensor(0.0014, device='cuda:0', grad_fn=<AddBackward0>)
episode: 70 training return: tensor(3.4891e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 71 training return: tensor(0.0024, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 18 test_true_pfm: -821 test_simulate_pfm tensor(9.2041e-08, device='cuda:0', grad_fn=<DivBackward0>)
episode: 72 training return: tensor(0.0097, device='cuda:0', grad_fn=<AddBackward0>)
episode: 73 training return: tensor(0.0074, device='cuda:0', grad_fn=<AddBackward0>)
episode: 74 training return: tensor(2.4388e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 75 training return: tensor(0.0414, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 19 test_true_pfm: -894 test_simulate_pfm tensor(1.9438e-05, device='cuda:0', grad_fn=<DivBackward0>)
episode: 76 training return: tensor(0.0002, device='cuda:0', grad_fn=<AddBackward0>)
episode: 77 training return: tensor(0.0749, device='cuda:0', grad_fn=<AddBackward0>)
episode: 78 training return: tensor(0.1667, device='cuda:0', grad_fn=<AddBackward0>)
episode: 79 training return: tensor(0.0033, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 20 test_true_pfm: -891 test_simulate_pfm tensor(8.8621e-05, device='cuda:0', grad_fn=<DivBackward0>)
episode: 80 training return: tensor(0.0027, device='cuda:0', grad_fn=<AddBackward0>)
episode: 81 training return: tensor(9.3205e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 82 training return: tensor(2.0022e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 83 training return: tensor(0.0002, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 21 test_true_pfm: -550 test_simulate_pfm tensor(1.5426e-06, device='cuda:0', grad_fn=<DivBackward0>)
episode: 84 training return: tensor(15.1070, device='cuda:0', grad_fn=<AddBackward0>)
episode: 85 training return: tensor(0.0078, device='cuda:0', grad_fn=<AddBackward0>)
episode: 86 training return: tensor(1.7046e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 87 training return: tensor(2.0327, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 22 test_true_pfm: -650 test_simulate_pfm tensor(1.8769e-07, device='cuda:0', grad_fn=<DivBackward0>)
episode: 88 training return: tensor(0.0252, device='cuda:0', grad_fn=<AddBackward0>)
episode: 89 training return: tensor(1.3238e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 90 training return: tensor(0.0009, device='cuda:0', grad_fn=<AddBackward0>)
episode: 91 training return: tensor(2.1423e-06, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 23 test_true_pfm: -695 test_simulate_pfm tensor(4.7569e-06, device='cuda:0', grad_fn=<DivBackward0>)
episode: 92 training return: tensor(0.0046, device='cuda:0', grad_fn=<AddBackward0>)
episode: 93 training return: tensor(0.0002, device='cuda:0', grad_fn=<AddBackward0>)
episode: 94 training return: tensor(4.9955e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 95 training return: tensor(1.3283e-05, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 24 test_true_pfm: -885 test_simulate_pfm tensor(1.6660e-08, device='cuda:0', grad_fn=<DivBackward0>)
episode: 96 training return: tensor(2.6272e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 97 training return: tensor(0.0002, device='cuda:0', grad_fn=<AddBackward0>)
episode: 98 training return: tensor(7.9305e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 99 training return: tensor(2.8553e-06, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 25 test_true_pfm: -453 test_simulate_pfm tensor(4.1549e-07, device='cuda:0', grad_fn=<DivBackward0>)
episode: 100 training return: tensor(0.0018, device='cuda:0', grad_fn=<AddBackward0>)
episode: 101 training return: tensor(0.0020, device='cuda:0', grad_fn=<AddBackward0>)
episode: 102 training return: tensor(6.0380e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 103 training return: tensor(0.0107, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 26 test_true_pfm: -968 test_simulate_pfm tensor(1.8975e-06, device='cuda:0', grad_fn=<DivBackward0>)
episode: 104 training return: tensor(0.0043, device='cuda:0', grad_fn=<AddBackward0>)
episode: 105 training return: tensor(7.7611e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 106 training return: tensor(5.5492e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 107 training return: tensor(6.1509e-05, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 27 test_true_pfm: -943 test_simulate_pfm tensor(7.8882e-08, device='cuda:0', grad_fn=<DivBackward0>)
episode: 108 training return: tensor(0.0006, device='cuda:0', grad_fn=<AddBackward0>)
episode: 109 training return: tensor(9.4838e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 110 training return: tensor(0.0065, device='cuda:0', grad_fn=<AddBackward0>)
episode: 111 training return: tensor(0.0093, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 28 test_true_pfm: -725 test_simulate_pfm tensor(8.1845e-08, device='cuda:0', grad_fn=<DivBackward0>)
episode: 112 training return: tensor(0.0559, device='cuda:0', grad_fn=<AddBackward0>)
episode: 113 training return: tensor(2.1160e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 114 training return: tensor(0.0015, device='cuda:0', grad_fn=<AddBackward0>)
episode: 115 training return: tensor(0.0019, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 29 test_true_pfm: -658 test_simulate_pfm tensor(1.0238e-07, device='cuda:0', grad_fn=<DivBackward0>)
episode: 116 training return: tensor(8.3070e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 117 training return: tensor(4.2939e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 118 training return: tensor(2.0220e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 119 training return: tensor(1.1066, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 30 test_true_pfm: -1196 test_simulate_pfm tensor(2.9601e-06, device='cuda:0', grad_fn=<DivBackward0>)
episode: 120 training return: tensor(1.1396e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 121 training return: tensor(4.4919e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 122 training return: tensor(1.1266e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 123 training return: tensor(6.6138e-09, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 31 test_true_pfm: -730 test_simulate_pfm tensor(9.6834e-09, device='cuda:0', grad_fn=<DivBackward0>)
episode: 124 training return: tensor(2.2080e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 125 training return: tensor(0.0208, device='cuda:0', grad_fn=<AddBackward0>)
episode: 126 training return: tensor(0.0002, device='cuda:0', grad_fn=<AddBackward0>)
episode: 127 training return: tensor(1.3676e-09, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 32 test_true_pfm: -612 test_simulate_pfm tensor(6.1549e-11, device='cuda:0', grad_fn=<DivBackward0>)
episode: 128 training return: tensor(0.0007, device='cuda:0', grad_fn=<AddBackward0>)
episode: 129 training return: tensor(3.2850e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 130 training return: tensor(0.0037, device='cuda:0', grad_fn=<AddBackward0>)
episode: 131 training return: tensor(1.7461e-05, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 33 test_true_pfm: -486 test_simulate_pfm tensor(1.8093e-07, device='cuda:0', grad_fn=<DivBackward0>)
episode: 132 training return: tensor(2.0027e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 133 training return: tensor(0.0114, device='cuda:0', grad_fn=<AddBackward0>)
episode: 134 training return: tensor(1.0221e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 135 training return: tensor(0.0035, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 34 test_true_pfm: -774 test_simulate_pfm tensor(2.5408e-12, device='cuda:0', grad_fn=<DivBackward0>)
episode: 136 training return: tensor(1.8279e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 137 training return: tensor(6.4622e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 138 training return: tensor(1.1292e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 139 training return: tensor(6.6259e-07, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 35 test_true_pfm: -670 test_simulate_pfm tensor(1.7910e-08, device='cuda:0', grad_fn=<DivBackward0>)
episode: 140 training return: tensor(2.3628e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 141 training return: tensor(0.0004, device='cuda:0', grad_fn=<AddBackward0>)
episode: 142 training return: tensor(7.0845e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 143 training return: tensor(1.7566e-05, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 36 test_true_pfm: -688 test_simulate_pfm tensor(1.0156e-08, device='cuda:0', grad_fn=<DivBackward0>)
episode: 144 training return: tensor(0.0001, device='cuda:0', grad_fn=<AddBackward0>)
episode: 145 training return: tensor(1.6326e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 146 training return: tensor(1.1478e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 147 training return: tensor(3.2449e-08, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 37 test_true_pfm: -486 test_simulate_pfm tensor(6.1299e-10, device='cuda:0', grad_fn=<DivBackward0>)
episode: 148 training return: tensor(5.7798e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 149 training return: tensor(0.0004, device='cuda:0', grad_fn=<AddBackward0>)
episode: 150 training return: tensor(1.8940e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 151 training return: tensor(1.2289e-05, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 38 test_true_pfm: -566 test_simulate_pfm tensor(4.5670e-08, device='cuda:0', grad_fn=<DivBackward0>)
episode: 152 training return: tensor(4.6474e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 153 training return: tensor(2.8769e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 154 training return: tensor(0.0003, device='cuda:0', grad_fn=<AddBackward0>)
episode: 155 training return: tensor(11.6619, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 39 test_true_pfm: -537 test_simulate_pfm tensor(1.1579e-08, device='cuda:0', grad_fn=<DivBackward0>)
episode: 156 training return: tensor(3.4226e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 157 training return: tensor(0.0001, device='cuda:0', grad_fn=<AddBackward0>)
episode: 158 training return: tensor(4.6914e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 159 training return: tensor(1.3749e-05, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 40 test_true_pfm: -601 test_simulate_pfm tensor(8.8592e-10, device='cuda:0', grad_fn=<DivBackward0>)
episode: 160 training return: tensor(0.0096, device='cuda:0', grad_fn=<AddBackward0>)
episode: 161 training return: tensor(0.0003, device='cuda:0', grad_fn=<AddBackward0>)
episode: 162 training return: tensor(0.0203, device='cuda:0', grad_fn=<AddBackward0>)
episode: 163 training return: tensor(0.0030, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 41 test_true_pfm: -777 test_simulate_pfm tensor(9.6459e-09, device='cuda:0', grad_fn=<DivBackward0>)
episode: 164 training return: tensor(3.5458e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 165 training return: tensor(1.9482e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 166 training return: tensor(2.3983e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 167 training return: tensor(1.6493e-05, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 42 test_true_pfm: -752 test_simulate_pfm tensor(1.5400e-10, device='cuda:0', grad_fn=<DivBackward0>)
episode: 168 training return: tensor(5.2445e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 169 training return: tensor(0.0002, device='cuda:0', grad_fn=<AddBackward0>)
episode: 170 training return: tensor(4.1277e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 171 training return: tensor(3.5834e-11, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 43 test_true_pfm: -525 test_simulate_pfm tensor(1.5117e-11, device='cuda:0', grad_fn=<DivBackward0>)
episode: 172 training return: tensor(0.0004, device='cuda:0', grad_fn=<AddBackward0>)
episode: 173 training return: tensor(1.6675e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 174 training return: tensor(4.0441e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 175 training return: tensor(0.0046, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 44 test_true_pfm: -505 test_simulate_pfm tensor(8.1106e-09, device='cuda:0', grad_fn=<DivBackward0>)
episode: 176 training return: tensor(9.7035e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 177 training return: tensor(0.0001, device='cuda:0', grad_fn=<AddBackward0>)
episode: 178 training return: tensor(0.0009, device='cuda:0', grad_fn=<AddBackward0>)
episode: 179 training return: tensor(1.5192e-06, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 45 test_true_pfm: -539 test_simulate_pfm tensor(1.7102e-06, device='cuda:0', grad_fn=<DivBackward0>)
episode: 180 training return: tensor(8.3956e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 181 training return: tensor(1.1618e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 182 training return: tensor(1.9102e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 183 training return: tensor(2.1876e-07, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 46 test_true_pfm: -602 test_simulate_pfm tensor(5.8504e-12, device='cuda:0', grad_fn=<DivBackward0>)
episode: 184 training return: tensor(0.0001, device='cuda:0', grad_fn=<AddBackward0>)
episode: 185 training return: tensor(3.9087e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 186 training return: tensor(5.2333e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 187 training return: tensor(0.0030, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 47 test_true_pfm: -149 test_simulate_pfm tensor(1.4592e-06, device='cuda:0', grad_fn=<DivBackward0>)
episode: 188 training return: tensor(7.8153e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 189 training return: tensor(0.0007, device='cuda:0', grad_fn=<AddBackward0>)
episode: 190 training return: tensor(2.2182e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 191 training return: tensor(2.2646e-08, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 48 test_true_pfm: -906 test_simulate_pfm tensor(6.4662e-08, device='cuda:0', grad_fn=<DivBackward0>)
episode: 192 training return: tensor(0.0002, device='cuda:0', grad_fn=<AddBackward0>)
episode: 193 training return: tensor(0.0745, device='cuda:0', grad_fn=<AddBackward0>)
episode: 194 training return: tensor(3.0018e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 195 training return: tensor(4.8273e-05, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 49 test_true_pfm: -567 test_simulate_pfm tensor(4.1154e-09, device='cuda:0', grad_fn=<DivBackward0>)
episode: 196 training return: tensor(0.0065, device='cuda:0', grad_fn=<AddBackward0>)
episode: 197 training return: tensor(1.5992e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 198 training return: tensor(3.6198e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 199 training return: tensor(1.1838e-08, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 50 test_true_pfm: -536 test_simulate_pfm tensor(8.0102e-10, device='cuda:0', grad_fn=<DivBackward0>)
episode: 200 training return: tensor(3.1824e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 201 training return: tensor(4.6686e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 202 training return: tensor(4.7825e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 203 training return: tensor(0.0039, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 51 test_true_pfm: -515 test_simulate_pfm tensor(4.4039e-12, device='cuda:0', grad_fn=<DivBackward0>)
episode: 204 training return: tensor(7.0885e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 205 training return: tensor(5.4147e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 206 training return: tensor(3.5157e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 207 training return: tensor(0.0006, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 52 test_true_pfm: -527 test_simulate_pfm tensor(2.0886e-07, device='cuda:0', grad_fn=<DivBackward0>)
episode: 208 training return: tensor(4.9633e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 209 training return: tensor(2.9010e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 210 training return: tensor(2.5798e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 211 training return: tensor(5.5421e-08, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 53 test_true_pfm: -630 test_simulate_pfm tensor(6.6053e-09, device='cuda:0', grad_fn=<DivBackward0>)
episode: 212 training return: tensor(0.0001, device='cuda:0', grad_fn=<AddBackward0>)
episode: 213 training return: tensor(0.0003, device='cuda:0', grad_fn=<AddBackward0>)
episode: 214 training return: tensor(4.0445e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 215 training return: tensor(3.7095e-11, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 54 test_true_pfm: -88 test_simulate_pfm tensor(4.3675e-08, device='cuda:0', grad_fn=<DivBackward0>)
episode: 216 training return: tensor(1.0369e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 217 training return: tensor(5.3585e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 218 training return: tensor(1.4310e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 219 training return: tensor(0.0074, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 55 test_true_pfm: -382 test_simulate_pfm tensor(8.3616e-09, device='cuda:0', grad_fn=<DivBackward0>)
episode: 220 training return: tensor(2.8209e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 221 training return: tensor(0.0333, device='cuda:0', grad_fn=<AddBackward0>)
episode: 222 training return: tensor(8.5148e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 223 training return: tensor(0.0006, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 56 test_true_pfm: -580 test_simulate_pfm tensor(2.5547e-09, device='cuda:0', grad_fn=<DivBackward0>)
episode: 224 training return: tensor(4.6946e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 225 training return: tensor(4.4627e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 226 training return: tensor(0.0014, device='cuda:0', grad_fn=<AddBackward0>)
episode: 227 training return: tensor(0.0002, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 57 test_true_pfm: -629 test_simulate_pfm tensor(2.0816e-08, device='cuda:0', grad_fn=<DivBackward0>)
episode: 228 training return: tensor(0.0001, device='cuda:0', grad_fn=<AddBackward0>)
episode: 229 training return: tensor(2.3440e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 230 training return: tensor(6.1825e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 231 training return: tensor(3.5111e-06, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 58 test_true_pfm: -888 test_simulate_pfm tensor(5.4911e-10, device='cuda:0', grad_fn=<DivBackward0>)
episode: 232 training return: tensor(7.8148e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 233 training return: tensor(2.0544e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 234 training return: tensor(4.8941e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 235 training return: tensor(5.5553e-06, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 59 test_true_pfm: -600 test_simulate_pfm tensor(3.3038e-11, device='cuda:0', grad_fn=<DivBackward0>)
episode: 236 training return: tensor(1.6859e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 237 training return: tensor(1.4068e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 238 training return: tensor(1.1302e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 239 training return: tensor(5.8764e-06, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 60 test_true_pfm: -372 test_simulate_pfm tensor(2.0874e-06, device='cuda:0', grad_fn=<DivBackward0>)
episode: 240 training return: tensor(0.0007, device='cuda:0', grad_fn=<AddBackward0>)
episode: 241 training return: tensor(0.0002, device='cuda:0', grad_fn=<AddBackward0>)
episode: 242 training return: tensor(7.7128e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 243 training return: tensor(5.8824e-06, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 61 test_true_pfm: -377 test_simulate_pfm tensor(2.7033e-17, device='cuda:0', grad_fn=<DivBackward0>)
episode: 244 training return: tensor(3.9682e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 245 training return: tensor(8.2159e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 246 training return: tensor(1.2982e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 247 training return: tensor(1.0968e-08, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 62 test_true_pfm: -526 test_simulate_pfm tensor(2.4424e-08, device='cuda:0', grad_fn=<DivBackward0>)
episode: 248 training return: tensor(1.1786e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 249 training return: tensor(2.0416e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 250 training return: tensor(1.6373e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 251 training return: tensor(1.8228e-05, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 63 test_true_pfm: -581 test_simulate_pfm tensor(1.0368e-10, device='cuda:0', grad_fn=<DivBackward0>)
episode: 252 training return: tensor(1.2318e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 253 training return: tensor(1.5254e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 254 training return: tensor(3.1838e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 255 training return: tensor(8.9710e-13, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 64 test_true_pfm: -617 test_simulate_pfm tensor(4.8517e-13, device='cuda:0', grad_fn=<DivBackward0>)
episode: 256 training return: tensor(1.1332e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 257 training return: tensor(4.0772e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 258 training return: tensor(2.0424e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 259 training return: tensor(1.1802e-13, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 65 test_true_pfm: -563 test_simulate_pfm tensor(1.7974e-14, device='cuda:0', grad_fn=<DivBackward0>)
episode: 260 training return: tensor(8.4305e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 261 training return: tensor(3.1460e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 262 training return: tensor(2.2503e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 263 training return: tensor(9.6833e-08, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 66 test_true_pfm: -642 test_simulate_pfm tensor(3.1533e-11, device='cuda:0', grad_fn=<DivBackward0>)
episode: 264 training return: tensor(1.3242e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 265 training return: tensor(1.4322e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 266 training return: tensor(0.0662, device='cuda:0', grad_fn=<AddBackward0>)
episode: 267 training return: tensor(0.0009, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 67 test_true_pfm: -219 test_simulate_pfm tensor(1.8198e-06, device='cuda:0', grad_fn=<DivBackward0>)
episode: 268 training return: tensor(0.7271, device='cuda:0', grad_fn=<AddBackward0>)
episode: 269 training return: tensor(4.3111e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 270 training return: tensor(6.6956e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 271 training return: tensor(12.1627, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 68 test_true_pfm: -390 test_simulate_pfm tensor(2.6674e-05, device='cuda:0', grad_fn=<DivBackward0>)
episode: 272 training return: tensor(0.0273, device='cuda:0', grad_fn=<AddBackward0>)
episode: 273 training return: tensor(4.8297e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 274 training return: tensor(743.9111, device='cuda:0', grad_fn=<AddBackward0>)
episode: 275 training return: tensor(9.8203e-09, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 69 test_true_pfm: -506 test_simulate_pfm tensor(5.7428e-05, device='cuda:0', grad_fn=<DivBackward0>)
episode: 276 training return: tensor(6.4323e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 277 training return: tensor(1.2106e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 278 training return: tensor(0.0049, device='cuda:0', grad_fn=<AddBackward0>)
episode: 279 training return: tensor(9.6297e-12, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 70 test_true_pfm: -587 test_simulate_pfm tensor(7.5733e-11, device='cuda:0', grad_fn=<DivBackward0>)
episode: 280 training return: tensor(3.2396e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 281 training return: tensor(3.3973e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 282 training return: tensor(1.6177, device='cuda:0', grad_fn=<AddBackward0>)
episode: 283 training return: tensor(3.6279e-05, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 71 test_true_pfm: -234 test_simulate_pfm tensor(7.3164e-06, device='cuda:0', grad_fn=<DivBackward0>)
episode: 284 training return: tensor(2.2376, device='cuda:0', grad_fn=<AddBackward0>)
episode: 285 training return: tensor(3.4381, device='cuda:0', grad_fn=<AddBackward0>)
episode: 286 training return: tensor(0.2452, device='cuda:0', grad_fn=<AddBackward0>)
episode: 287 training return: tensor(0.0342, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 72 test_true_pfm: 80 test_simulate_pfm tensor(7.8259e-06, device='cuda:0', grad_fn=<DivBackward0>)
episode: 288 training return: tensor(0.0008, device='cuda:0', grad_fn=<AddBackward0>)
episode: 289 training return: tensor(0.0001, device='cuda:0', grad_fn=<AddBackward0>)
episode: 290 training return: tensor(3.2821, device='cuda:0', grad_fn=<AddBackward0>)
episode: 291 training return: tensor(7.1981e-08, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 73 test_true_pfm: -430 test_simulate_pfm tensor(2.1005e-09, device='cuda:0', grad_fn=<DivBackward0>)
episode: 292 training return: tensor(3.1545e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 293 training return: tensor(4.1443e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 294 training return: tensor(0.0001, device='cuda:0', grad_fn=<AddBackward0>)
episode: 295 training return: tensor(0.0012, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 74 test_true_pfm: -630 test_simulate_pfm tensor(0.0002, device='cuda:0', grad_fn=<DivBackward0>)
episode: 296 training return: tensor(3.1955e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 297 training return: tensor(0.0772, device='cuda:0', grad_fn=<AddBackward0>)
episode: 298 training return: tensor(1.1703e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 299 training return: tensor(0.0066, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 75 test_true_pfm: -215 test_simulate_pfm tensor(2.4190e-07, device='cuda:0', grad_fn=<DivBackward0>)
episode: 300 training return: tensor(0.0014, device='cuda:0', grad_fn=<AddBackward0>)
episode: 301 training return: tensor(0.0478, device='cuda:0', grad_fn=<AddBackward0>)
episode: 302 training return: tensor(0.0010, device='cuda:0', grad_fn=<AddBackward0>)
episode: 303 training return: tensor(1.7609e-07, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 76 test_true_pfm: -400 test_simulate_pfm tensor(5.8416e-07, device='cuda:0', grad_fn=<DivBackward0>)
episode: 304 training return: tensor(0.0066, device='cuda:0', grad_fn=<AddBackward0>)
episode: 305 training return: tensor(3.1063e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 306 training return: tensor(1.8453, device='cuda:0', grad_fn=<AddBackward0>)
episode: 307 training return: tensor(8.6285e-07, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 77 test_true_pfm: -589 test_simulate_pfm tensor(1.6711e-09, device='cuda:0', grad_fn=<DivBackward0>)
episode: 308 training return: tensor(4.4493e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 309 training return: tensor(0.1417, device='cuda:0', grad_fn=<AddBackward0>)
episode: 310 training return: tensor(0.0068, device='cuda:0', grad_fn=<AddBackward0>)
episode: 311 training return: tensor(71.6219, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 78 test_true_pfm: 49 test_simulate_pfm tensor(6.8097e-07, device='cuda:0', grad_fn=<DivBackward0>)
episode: 312 training return: tensor(0.0018, device='cuda:0', grad_fn=<AddBackward0>)
episode: 313 training return: tensor(0.1677, device='cuda:0', grad_fn=<AddBackward0>)
episode: 314 training return: tensor(0.8596, device='cuda:0', grad_fn=<AddBackward0>)
episode: 315 training return: tensor(0.0020, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 79 test_true_pfm: -836 test_simulate_pfm tensor(2.5850e-08, device='cuda:0', grad_fn=<DivBackward0>)
episode: 316 training return: tensor(4.2300e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 317 training return: tensor(3.2039e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 318 training return: tensor(1.3993e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 319 training return: tensor(4.1879e-07, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 80 test_true_pfm: -333 test_simulate_pfm tensor(1.1947e-10, device='cuda:0', grad_fn=<DivBackward0>)
episode: 320 training return: tensor(1.1144e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 321 training return: tensor(5.2454e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 322 training return: tensor(0.0048, device='cuda:0', grad_fn=<AddBackward0>)
episode: 323 training return: tensor(0.0004, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 81 test_true_pfm: -493 test_simulate_pfm tensor(6.1079e-11, device='cuda:0', grad_fn=<DivBackward0>)
