initial performance: -3
episode: 0 training return: tensor(9.6639e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 1 training return: tensor(3.3025e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 2 training return: tensor(0.0004, device='cuda:0', grad_fn=<AddBackward0>)
episode: 3 training return: tensor(0.0002, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 1 test_true_pfm: -387 test_simulate_pfm tensor(7.0747e-06, device='cuda:0', grad_fn=<DivBackward0>)
episode: 4 training return: tensor(7.2885e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 5 training return: tensor(3.2737e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 6 training return: tensor(0.0002, device='cuda:0', grad_fn=<AddBackward0>)
episode: 7 training return: tensor(0.0017, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 2 test_true_pfm: -586 test_simulate_pfm tensor(5.8761e-05, device='cuda:0', grad_fn=<DivBackward0>)
episode: 8 training return: tensor(0.0002, device='cuda:0', grad_fn=<AddBackward0>)
episode: 9 training return: tensor(3.0337e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 10 training return: tensor(1.5747, device='cuda:0', grad_fn=<AddBackward0>)
episode: 11 training return: tensor(0.0011, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 3 test_true_pfm: -594 test_simulate_pfm tensor(2.1599e-29, device='cuda:0', grad_fn=<DivBackward0>)
episode: 12 training return: tensor(4.6098e-22, device='cuda:0', grad_fn=<AddBackward0>)
episode: 13 training return: tensor(2.8156e-24, device='cuda:0', grad_fn=<AddBackward0>)
episode: 14 training return: tensor(2.4481e-24, device='cuda:0', grad_fn=<AddBackward0>)
episode: 15 training return: tensor(3.5458e-24, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 4 test_true_pfm: -595 test_simulate_pfm tensor(2.0340e-30, device='cuda:0', grad_fn=<DivBackward0>)
episode: 16 training return: tensor(4.6227e-23, device='cuda:0', grad_fn=<AddBackward0>)
episode: 17 training return: tensor(1.3586e-23, device='cuda:0', grad_fn=<AddBackward0>)
episode: 18 training return: tensor(1.7491e-23, device='cuda:0', grad_fn=<AddBackward0>)
episode: 19 training return: tensor(1.2415e-21, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 5 test_true_pfm: -594 test_simulate_pfm tensor(3.7970e-30, device='cuda:0', grad_fn=<DivBackward0>)
episode: 20 training return: tensor(1.6648e-23, device='cuda:0', grad_fn=<AddBackward0>)
episode: 21 training return: tensor(8.5270e-24, device='cuda:0', grad_fn=<AddBackward0>)
episode: 22 training return: tensor(6.5398e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 23 training return: tensor(0.0021, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 6 test_true_pfm: -594 test_simulate_pfm tensor(1.0373e-25, device='cuda:0', grad_fn=<DivBackward0>)
episode: 24 training return: tensor(7.2074e-22, device='cuda:0', grad_fn=<AddBackward0>)
episode: 25 training return: tensor(1.1287e-23, device='cuda:0', grad_fn=<AddBackward0>)
episode: 26 training return: tensor(5.0192e-24, device='cuda:0', grad_fn=<AddBackward0>)
episode: 27 training return: tensor(3.1841e-23, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 7 test_true_pfm: -594 test_simulate_pfm tensor(7.0375e-30, device='cuda:0', grad_fn=<DivBackward0>)
episode: 28 training return: tensor(2.9083e-23, device='cuda:0', grad_fn=<AddBackward0>)
episode: 29 training return: tensor(3.8311e-24, device='cuda:0', grad_fn=<AddBackward0>)
episode: 30 training return: tensor(4.3775e-24, device='cuda:0', grad_fn=<AddBackward0>)
episode: 31 training return: tensor(4.0944e-24, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 8 test_true_pfm: -595 test_simulate_pfm tensor(3.1399e-29, device='cuda:0', grad_fn=<DivBackward0>)
episode: 32 training return: tensor(1.0599e-24, device='cuda:0', grad_fn=<AddBackward0>)
episode: 33 training return: tensor(8.8234e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 34 training return: tensor(1.8811e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 35 training return: tensor(1.9012e-15, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 9 test_true_pfm: -594 test_simulate_pfm tensor(9.0941e-29, device='cuda:0', grad_fn=<DivBackward0>)
episode: 36 training return: tensor(2.3033e-20, device='cuda:0', grad_fn=<AddBackward0>)
episode: 37 training return: tensor(1.8055e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 38 training return: tensor(2.1770e-24, device='cuda:0', grad_fn=<AddBackward0>)
episode: 39 training return: tensor(3.5980e-24, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 10 test_true_pfm: -594 test_simulate_pfm tensor(4.2988e-29, device='cuda:0', grad_fn=<DivBackward0>)
episode: 40 training return: tensor(3.8201e-24, device='cuda:0', grad_fn=<AddBackward0>)
episode: 41 training return: tensor(4.5903e-24, device='cuda:0', grad_fn=<AddBackward0>)
episode: 42 training return: tensor(1.7011e-23, device='cuda:0', grad_fn=<AddBackward0>)
episode: 43 training return: tensor(1.6083e-16, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 11 test_true_pfm: -596 test_simulate_pfm tensor(1.9742e-20, device='cuda:0', grad_fn=<DivBackward0>)
episode: 44 training return: tensor(5.6422e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 45 training return: tensor(1.5738e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 46 training return: tensor(1.5344e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 47 training return: tensor(3.3410e-15, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 12 test_true_pfm: -596 test_simulate_pfm tensor(1.6285e-20, device='cuda:0', grad_fn=<DivBackward0>)
episode: 48 training return: tensor(2.4611e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 49 training return: tensor(1.8820e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 50 training return: tensor(5.2337e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 51 training return: tensor(1.6826e-14, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 13 test_true_pfm: -596 test_simulate_pfm tensor(7.2635e-21, device='cuda:0', grad_fn=<DivBackward0>)
episode: 52 training return: tensor(1.0651e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 53 training return: tensor(7.3250e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 54 training return: tensor(2.0864e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 55 training return: tensor(2.4101e-14, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 14 test_true_pfm: -597 test_simulate_pfm tensor(1.4662e-20, device='cuda:0', grad_fn=<DivBackward0>)
episode: 56 training return: tensor(1.5887e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 57 training return: tensor(5.5904e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 58 training return: tensor(3.2336e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 59 training return: tensor(2.6581e-15, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 15 test_true_pfm: -596 test_simulate_pfm tensor(5.3804e-21, device='cuda:0', grad_fn=<DivBackward0>)
episode: 60 training return: tensor(3.3884e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 61 training return: tensor(7.5153e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 62 training return: tensor(1.4294e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 63 training return: tensor(1.0191e-13, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 16 test_true_pfm: -596 test_simulate_pfm tensor(5.4254e-21, device='cuda:0', grad_fn=<DivBackward0>)
episode: 64 training return: tensor(2.7685e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 65 training return: tensor(1.1989e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 66 training return: tensor(4.0478e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 67 training return: tensor(3.3207e-15, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 17 test_true_pfm: -596 test_simulate_pfm tensor(7.5644e-21, device='cuda:0', grad_fn=<DivBackward0>)
episode: 68 training return: tensor(1.7061e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 69 training return: tensor(1.8458e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 70 training return: tensor(5.6009e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 71 training return: tensor(5.9246e-15, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 18 test_true_pfm: -596 test_simulate_pfm tensor(4.9019e-21, device='cuda:0', grad_fn=<DivBackward0>)
episode: 72 training return: tensor(1.0350e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 73 training return: tensor(1.1532e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 74 training return: tensor(7.3005e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 75 training return: tensor(6.9399e-15, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 19 test_true_pfm: -596 test_simulate_pfm tensor(4.7056e-20, device='cuda:0', grad_fn=<DivBackward0>)
episode: 76 training return: tensor(9.0168e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 77 training return: tensor(2.7120e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 78 training return: tensor(7.9952e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 79 training return: tensor(4.2915e-15, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 20 test_true_pfm: -596 test_simulate_pfm tensor(4.2111e-21, device='cuda:0', grad_fn=<DivBackward0>)
episode: 80 training return: tensor(2.9586e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 81 training return: tensor(4.9680e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 82 training return: tensor(3.7005e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 83 training return: tensor(9.8660e-15, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 21 test_true_pfm: -596 test_simulate_pfm tensor(2.7240e-21, device='cuda:0', grad_fn=<DivBackward0>)
episode: 84 training return: tensor(1.0393e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 85 training return: tensor(3.5311e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 86 training return: tensor(3.1443e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 87 training return: tensor(3.3465e-15, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 22 test_true_pfm: -597 test_simulate_pfm tensor(1.3435e-21, device='cuda:0', grad_fn=<DivBackward0>)
episode: 88 training return: tensor(6.0962e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 89 training return: tensor(2.4743e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 90 training return: tensor(1.1630e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 91 training return: tensor(7.8681e-15, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 23 test_true_pfm: -596 test_simulate_pfm tensor(8.4890e-21, device='cuda:0', grad_fn=<DivBackward0>)
episode: 92 training return: tensor(1.4104e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 93 training return: tensor(2.7440e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 94 training return: tensor(6.2321e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 95 training return: tensor(3.1013e-15, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 24 test_true_pfm: -596 test_simulate_pfm tensor(6.7427e-21, device='cuda:0', grad_fn=<DivBackward0>)
episode: 96 training return: tensor(9.1187e-16, device='cuda:0', grad_fn=<AddBackward0>)
episode: 97 training return: tensor(2.3702e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 98 training return: tensor(2.9829e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 99 training return: tensor(2.5208e-13, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 25 test_true_pfm: -596 test_simulate_pfm tensor(1.4577e-20, device='cuda:0', grad_fn=<DivBackward0>)
episode: 100 training return: tensor(2.8477e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 101 training return: tensor(3.0035e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 102 training return: tensor(4.1661e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 103 training return: tensor(1.5785e-14, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 26 test_true_pfm: -596 test_simulate_pfm tensor(4.3178e-21, device='cuda:0', grad_fn=<DivBackward0>)
episode: 104 training return: tensor(3.1914e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 105 training return: tensor(6.6412e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 106 training return: tensor(1.5244e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 107 training return: tensor(7.8421e-15, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 27 test_true_pfm: -596 test_simulate_pfm tensor(1.4671e-20, device='cuda:0', grad_fn=<DivBackward0>)
episode: 108 training return: tensor(1.3412e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 109 training return: tensor(2.8425e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 110 training return: tensor(9.9461e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 111 training return: tensor(1.2668e-14, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 28 test_true_pfm: -596 test_simulate_pfm tensor(1.0905e-20, device='cuda:0', grad_fn=<DivBackward0>)
episode: 112 training return: tensor(6.1591e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 113 training return: tensor(5.2548e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 114 training return: tensor(2.9846e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 115 training return: tensor(5.1553e-14, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 29 test_true_pfm: -596 test_simulate_pfm tensor(6.7644e-21, device='cuda:0', grad_fn=<DivBackward0>)
episode: 116 training return: tensor(6.2893e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 117 training return: tensor(1.0280e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 118 training return: tensor(5.3058e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 119 training return: tensor(1.4313e-15, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 30 test_true_pfm: -596 test_simulate_pfm tensor(2.0534e-20, device='cuda:0', grad_fn=<DivBackward0>)
episode: 120 training return: tensor(3.0800e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 121 training return: tensor(6.4884e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 122 training return: tensor(5.7806e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 123 training return: tensor(2.4239e-15, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 31 test_true_pfm: -596 test_simulate_pfm tensor(6.9001e-21, device='cuda:0', grad_fn=<DivBackward0>)
episode: 124 training return: tensor(9.2301e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 125 training return: tensor(1.6179e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 126 training return: tensor(1.5751e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 127 training return: tensor(1.3184e-14, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 32 test_true_pfm: -596 test_simulate_pfm tensor(6.5257e-21, device='cuda:0', grad_fn=<DivBackward0>)
episode: 128 training return: tensor(5.4036e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 129 training return: tensor(1.2041e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 130 training return: tensor(3.5969e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 131 training return: tensor(2.0957e-15, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 33 test_true_pfm: -595 test_simulate_pfm tensor(6.3659e-21, device='cuda:0', grad_fn=<DivBackward0>)
episode: 132 training return: tensor(4.4755e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 133 training return: tensor(4.1082e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 134 training return: tensor(1.8728e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 135 training return: tensor(1.4963e-13, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 34 test_true_pfm: -596 test_simulate_pfm tensor(3.4195e-21, device='cuda:0', grad_fn=<DivBackward0>)
episode: 136 training return: tensor(1.3160e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 137 training return: tensor(2.9659e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 138 training return: tensor(5.4442e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 139 training return: tensor(2.8768e-15, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 35 test_true_pfm: -597 test_simulate_pfm tensor(4.4544e-21, device='cuda:0', grad_fn=<DivBackward0>)
episode: 140 training return: tensor(3.3682e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 141 training return: tensor(2.2850e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 142 training return: tensor(4.7294e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 143 training return: tensor(1.0356e-14, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 36 test_true_pfm: -596 test_simulate_pfm tensor(5.5261e-21, device='cuda:0', grad_fn=<DivBackward0>)
episode: 144 training return: tensor(1.8401e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 145 training return: tensor(9.1218e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 146 training return: tensor(2.9822e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 147 training return: tensor(3.2073e-15, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 37 test_true_pfm: -594 test_simulate_pfm tensor(1.8631e-21, device='cuda:0', grad_fn=<DivBackward0>)
episode: 148 training return: tensor(5.8811e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 149 training return: tensor(4.0044e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 150 training return: tensor(6.6846e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 151 training return: tensor(7.9353e-15, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 38 test_true_pfm: -596 test_simulate_pfm tensor(1.2028e-20, device='cuda:0', grad_fn=<DivBackward0>)
episode: 152 training return: tensor(9.2850e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 153 training return: tensor(1.8058e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 154 training return: tensor(4.8913e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 155 training return: tensor(8.2243e-15, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 39 test_true_pfm: -596 test_simulate_pfm tensor(1.6205e-21, device='cuda:0', grad_fn=<DivBackward0>)
episode: 156 training return: tensor(1.4353e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 157 training return: tensor(1.8521e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 158 training return: tensor(3.6375e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 159 training return: tensor(2.8886e-14, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 40 test_true_pfm: -595 test_simulate_pfm tensor(3.0834e-20, device='cuda:0', grad_fn=<DivBackward0>)
episode: 160 training return: tensor(3.4314e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 161 training return: tensor(1.9074e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 162 training return: tensor(5.2883e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 163 training return: tensor(1.1361e-14, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 41 test_true_pfm: -594 test_simulate_pfm tensor(6.1629e-21, device='cuda:0', grad_fn=<DivBackward0>)
episode: 164 training return: tensor(2.5792e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 165 training return: tensor(1.0485e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 166 training return: tensor(2.7837e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 167 training return: tensor(2.4904e-15, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 42 test_true_pfm: -597 test_simulate_pfm tensor(7.0204e-21, device='cuda:0', grad_fn=<DivBackward0>)
episode: 168 training return: tensor(2.4471e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 169 training return: tensor(1.6525e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 170 training return: tensor(1.2870e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 171 training return: tensor(2.9005e-15, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 43 test_true_pfm: -597 test_simulate_pfm tensor(1.3988e-20, device='cuda:0', grad_fn=<DivBackward0>)
episode: 172 training return: tensor(3.8666e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 173 training return: tensor(3.2044e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 174 training return: tensor(1.2177e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 175 training return: tensor(7.8186e-15, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 44 test_true_pfm: -597 test_simulate_pfm tensor(2.9160e-21, device='cuda:0', grad_fn=<DivBackward0>)
episode: 176 training return: tensor(4.0267e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 177 training return: tensor(9.8306e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 178 training return: tensor(2.8956e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 179 training return: tensor(5.6074e-15, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 45 test_true_pfm: -596 test_simulate_pfm tensor(4.0974e-21, device='cuda:0', grad_fn=<DivBackward0>)
episode: 180 training return: tensor(5.2191e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 181 training return: tensor(5.2688e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 182 training return: tensor(1.6193e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 183 training return: tensor(1.5120e-14, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 46 test_true_pfm: -596 test_simulate_pfm tensor(9.1025e-20, device='cuda:0', grad_fn=<DivBackward0>)
episode: 184 training return: tensor(1.6013e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 185 training return: tensor(1.9683e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 186 training return: tensor(2.2381e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 187 training return: tensor(1.2874e-12, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 47 test_true_pfm: -596 test_simulate_pfm tensor(1.1853e-20, device='cuda:0', grad_fn=<DivBackward0>)
episode: 188 training return: tensor(2.1055e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 189 training return: tensor(2.0197e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 190 training return: tensor(6.3821e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 191 training return: tensor(2.9665e-15, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 48 test_true_pfm: -596 test_simulate_pfm tensor(1.3017e-20, device='cuda:0', grad_fn=<DivBackward0>)
episode: 192 training return: tensor(5.9959e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 193 training return: tensor(1.5400e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 194 training return: tensor(2.1718e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 195 training return: tensor(5.9869e-15, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 49 test_true_pfm: -596 test_simulate_pfm tensor(1.0574e-20, device='cuda:0', grad_fn=<DivBackward0>)
episode: 196 training return: tensor(4.7251e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 197 training return: tensor(2.2232e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 198 training return: tensor(2.2440e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 199 training return: tensor(1.9775e-14, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 50 test_true_pfm: -597 test_simulate_pfm tensor(6.1495e-21, device='cuda:0', grad_fn=<DivBackward0>)
episode: 200 training return: tensor(1.0311e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 201 training return: tensor(1.1331e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 202 training return: tensor(4.2539e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 203 training return: tensor(6.2057e-15, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 51 test_true_pfm: -596 test_simulate_pfm tensor(2.4220e-20, device='cuda:0', grad_fn=<DivBackward0>)
episode: 204 training return: tensor(3.1382e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 205 training return: tensor(4.8835e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 206 training return: tensor(6.8037e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 207 training return: tensor(1.7590e-13, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 52 test_true_pfm: -596 test_simulate_pfm tensor(5.7379e-21, device='cuda:0', grad_fn=<DivBackward0>)
episode: 208 training return: tensor(8.1905e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 209 training return: tensor(9.4731e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 210 training return: tensor(2.6444e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 211 training return: tensor(3.4215e-14, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 53 test_true_pfm: -597 test_simulate_pfm tensor(2.4071e-21, device='cuda:0', grad_fn=<DivBackward0>)
episode: 212 training return: tensor(1.1131e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 213 training return: tensor(1.0414e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 214 training return: tensor(3.8688e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 215 training return: tensor(3.0302e-15, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 54 test_true_pfm: -596 test_simulate_pfm tensor(1.0368e-20, device='cuda:0', grad_fn=<DivBackward0>)
episode: 216 training return: tensor(9.8364e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 217 training return: tensor(5.5733e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 218 training return: tensor(1.5647e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 219 training return: tensor(1.0888e-14, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 55 test_true_pfm: -596 test_simulate_pfm tensor(1.1263e-20, device='cuda:0', grad_fn=<DivBackward0>)
episode: 220 training return: tensor(1.7966e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 221 training return: tensor(3.2583e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 222 training return: tensor(3.9453e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 223 training return: tensor(1.0445e-14, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 56 test_true_pfm: -596 test_simulate_pfm tensor(2.0898e-21, device='cuda:0', grad_fn=<DivBackward0>)
episode: 224 training return: tensor(1.5763e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 225 training return: tensor(4.7604e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 226 training return: tensor(2.8418e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 227 training return: tensor(8.3861e-15, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 57 test_true_pfm: -597 test_simulate_pfm tensor(1.0187e-20, device='cuda:0', grad_fn=<DivBackward0>)
episode: 228 training return: tensor(6.6120e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 229 training return: tensor(3.2338e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 230 training return: tensor(2.8636e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 231 training return: tensor(2.1825e-14, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 58 test_true_pfm: -597 test_simulate_pfm tensor(4.0241e-21, device='cuda:0', grad_fn=<DivBackward0>)
episode: 232 training return: tensor(5.3699e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 233 training return: tensor(9.1566e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 234 training return: tensor(1.0412e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 235 training return: tensor(7.9846e-15, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 59 test_true_pfm: -597 test_simulate_pfm tensor(4.2991e-21, device='cuda:0', grad_fn=<DivBackward0>)
episode: 236 training return: tensor(1.1207e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 237 training return: tensor(5.9943e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 238 training return: tensor(1.3379e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 239 training return: tensor(7.4899e-15, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 60 test_true_pfm: -596 test_simulate_pfm tensor(1.5345e-20, device='cuda:0', grad_fn=<DivBackward0>)
episode: 240 training return: tensor(3.4551e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 241 training return: tensor(2.5245e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 242 training return: tensor(5.1374e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 243 training return: tensor(1.1251e-14, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 61 test_true_pfm: -596 test_simulate_pfm tensor(2.1766e-21, device='cuda:0', grad_fn=<DivBackward0>)
episode: 244 training return: tensor(6.3305e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 245 training return: tensor(8.5908e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 246 training return: tensor(5.3024e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 247 training return: tensor(7.8962e-15, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 62 test_true_pfm: -597 test_simulate_pfm tensor(3.6062e-21, device='cuda:0', grad_fn=<DivBackward0>)
episode: 248 training return: tensor(4.9098e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 249 training return: tensor(7.3825e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 250 training return: tensor(5.7733e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 251 training return: tensor(1.1639e-14, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 63 test_true_pfm: -596 test_simulate_pfm tensor(3.2945e-21, device='cuda:0', grad_fn=<DivBackward0>)
episode: 252 training return: tensor(1.6375e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 253 training return: tensor(8.4914e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 254 training return: tensor(7.8794e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 255 training return: tensor(9.1381e-15, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 64 test_true_pfm: -596 test_simulate_pfm tensor(4.4290e-21, device='cuda:0', grad_fn=<DivBackward0>)
episode: 256 training return: tensor(5.6431e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 257 training return: tensor(2.4914e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 258 training return: tensor(5.4398e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 259 training return: tensor(2.6682e-15, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 65 test_true_pfm: -596 test_simulate_pfm tensor(5.7469e-22, device='cuda:0', grad_fn=<DivBackward0>)
episode: 260 training return: tensor(1.8449e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 261 training return: tensor(5.8687e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 262 training return: tensor(1.2232e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 263 training return: tensor(7.7595e-15, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 66 test_true_pfm: -596 test_simulate_pfm tensor(1.7952e-21, device='cuda:0', grad_fn=<DivBackward0>)
episode: 264 training return: tensor(1.1461e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 265 training return: tensor(3.8034e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 266 training return: tensor(3.7160e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 267 training return: tensor(1.3904e-14, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 67 test_true_pfm: -593 test_simulate_pfm tensor(5.2284e-21, device='cuda:0', grad_fn=<DivBackward0>)
episode: 268 training return: tensor(7.3767e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 269 training return: tensor(6.2886e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 270 training return: tensor(4.5311e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 271 training return: tensor(5.5374e-15, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 68 test_true_pfm: -596 test_simulate_pfm tensor(4.1912e-21, device='cuda:0', grad_fn=<DivBackward0>)
episode: 272 training return: tensor(1.4561e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 273 training return: tensor(9.1812e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 274 training return: tensor(3.4477e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 275 training return: tensor(2.4227e-15, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 69 test_true_pfm: -596 test_simulate_pfm tensor(1.3323e-20, device='cuda:0', grad_fn=<DivBackward0>)
episode: 276 training return: tensor(2.3557e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 277 training return: tensor(1.3270e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 278 training return: tensor(3.2272e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 279 training return: tensor(8.2904e-15, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 70 test_true_pfm: -596 test_simulate_pfm tensor(4.0919e-21, device='cuda:0', grad_fn=<DivBackward0>)
episode: 280 training return: tensor(5.9630e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 281 training return: tensor(5.5692e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 282 training return: tensor(3.1442e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 283 training return: tensor(1.5915e-14, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 71 test_true_pfm: -597 test_simulate_pfm tensor(4.4313e-21, device='cuda:0', grad_fn=<DivBackward0>)
episode: 284 training return: tensor(2.8688e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 285 training return: tensor(1.8135e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 286 training return: tensor(4.8937e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 287 training return: tensor(1.8603e-14, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 72 test_true_pfm: -596 test_simulate_pfm tensor(2.4694e-20, device='cuda:0', grad_fn=<DivBackward0>)
episode: 288 training return: tensor(8.1089e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 289 training return: tensor(1.7295e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 290 training return: tensor(4.6376e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 291 training return: tensor(7.5655e-15, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 73 test_true_pfm: -596 test_simulate_pfm tensor(2.9801e-21, device='cuda:0', grad_fn=<DivBackward0>)
episode: 292 training return: tensor(1.9969e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 293 training return: tensor(3.3971e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 294 training return: tensor(1.8384e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 295 training return: tensor(1.6818e-14, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 74 test_true_pfm: -596 test_simulate_pfm tensor(9.6438e-21, device='cuda:0', grad_fn=<DivBackward0>)
episode: 296 training return: tensor(3.6424e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 297 training return: tensor(4.7007e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 298 training return: tensor(7.0211e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 299 training return: tensor(7.8322e-15, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 75 test_true_pfm: -596 test_simulate_pfm tensor(6.1331e-21, device='cuda:0', grad_fn=<DivBackward0>)
episode: 300 training return: tensor(6.3097e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 301 training return: tensor(5.3611e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 302 training return: tensor(2.9409e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 303 training return: tensor(2.6652e-14, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 76 test_true_pfm: -596 test_simulate_pfm tensor(4.5305e-21, device='cuda:0', grad_fn=<DivBackward0>)
episode: 304 training return: tensor(1.0800e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 305 training return: tensor(5.0534e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 306 training return: tensor(2.7639e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 307 training return: tensor(6.8599e-15, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 77 test_true_pfm: -596 test_simulate_pfm tensor(1.4665e-20, device='cuda:0', grad_fn=<DivBackward0>)
episode: 308 training return: tensor(5.2468e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 309 training return: tensor(4.3780e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 310 training return: tensor(2.5739e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 311 training return: tensor(2.5540e-14, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 78 test_true_pfm: -596 test_simulate_pfm tensor(3.5742e-21, device='cuda:0', grad_fn=<DivBackward0>)
episode: 312 training return: tensor(1.8065e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 313 training return: tensor(2.9237e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 314 training return: tensor(4.5890e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 315 training return: tensor(9.8121e-15, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 79 test_true_pfm: -596 test_simulate_pfm tensor(1.2372e-21, device='cuda:0', grad_fn=<DivBackward0>)
episode: 316 training return: tensor(1.1002e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 317 training return: tensor(7.7883e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 318 training return: tensor(1.1341e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 319 training return: tensor(5.7529e-15, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 80 test_true_pfm: -597 test_simulate_pfm tensor(4.8122e-21, device='cuda:0', grad_fn=<DivBackward0>)
episode: 320 training return: tensor(1.0632e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 321 training return: tensor(7.9803e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 322 training return: tensor(1.5385e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 323 training return: tensor(7.0944e-15, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 81 test_true_pfm: -596 test_simulate_pfm tensor(2.8415e-21, device='cuda:0', grad_fn=<DivBackward0>)
